diff --git a/.bumpversion.toml b/.bumpversion.toml
index face748..8ebe12e 100644
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -33,7 +33,7 @@ search = 'version = "{current_version}"'
replace = 'version = "{new_version}"'
[[tool.bumpversion.files]]
-filename = "src/__version__.py"
+filename = "src/codebase_rag/__version__.py"
search = '__version__ = "{current_version}"'
replace = '__version__ = "{new_version}"'
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 1117c02..ebb3172 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -30,7 +30,7 @@ jobs:
echo "pyproject.toml version: $PROJECT_VERSION"
# Get version from __version__.py
- VERSION_PY=$(grep '__version__ = ' src/__version__.py | cut -d'"' -f2)
+ VERSION_PY=$(grep '__version__ = ' src/codebase_rag/__version__.py | cut -d'"' -f2)
echo "__version__.py version: $VERSION_PY"
# Validate Python version file
@@ -70,7 +70,7 @@ jobs:
bun-version: latest
- name: Build Frontend
- run: ./build-frontend.sh
+ run: ./scripts/build-frontend.sh
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -121,7 +121,7 @@ jobs:
bun-version: latest
- name: Build Frontend
- run: ./build-frontend.sh
+ run: ./scripts/build-frontend.sh
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
@@ -172,7 +172,7 @@ jobs:
bun-version: latest
- name: Build Frontend
- run: ./build-frontend.sh
+ run: ./scripts/build-frontend.sh
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml
index 875323f..bcd490d 100644
--- a/.github/workflows/docs-deploy.yml
+++ b/.github/workflows/docs-deploy.yml
@@ -70,4 +70,4 @@ jobs:
- name: Notify deployment
run: |
echo "๐ Documentation deployed successfully!"
- echo "๐ URL: https://code-graph.vantagecraft.dev"
+ echo "๐ URL: https://vantagecraft.dev/docs/code-graph/"
diff --git a/Dockerfile b/Dockerfile
index 09cfbef..be73276 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,10 +50,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Copy application source code for local package installation
COPY pyproject.toml README.md ./
-COPY api ./api
-COPY core ./core
-COPY services ./services
-COPY mcp_tools ./mcp_tools
+COPY src ./src
COPY *.py ./
# Install local package (without dependencies, already installed)
@@ -95,7 +92,7 @@ COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/
COPY --chown=appuser:appuser . .
# Copy pre-built frontend (if exists)
-# Run ./build-frontend.sh before docker build to generate frontend/dist
+# Run ./scripts/build-frontend.sh before docker build to generate frontend/dist
# If frontend/dist doesn't exist, the app will run as API-only (no web UI)
RUN if [ -d frontend/dist ]; then \
mkdir -p static && \
@@ -103,7 +100,7 @@ RUN if [ -d frontend/dist ]; then \
echo "โ
Frontend copied to static/"; \
else \
echo "โ ๏ธ No frontend/dist found - running as API-only"; \
- echo " Run ./build-frontend.sh to build frontend"; \
+ echo " Run ./scripts/build-frontend.sh to build frontend"; \
fi
# Switch to non-root user
@@ -129,6 +126,6 @@ EXPOSE 8000 8080
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/health || exit 1
-# Default command - starts HTTP API (not MCP)
-# For MCP service, run on host: python start_mcp.py
-CMD ["python", "start.py"]
+# Default command - starts both MCP and Web services (dual-port mode)
+# Alternative: python -m codebase_rag --mcp (MCP only) or --web (Web only)
+CMD ["python", "-m", "codebase_rag"]
diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base
index ee7c6ea..d29e2f0 100644
--- a/docker/Dockerfile.base
+++ b/docker/Dockerfile.base
@@ -2,7 +2,7 @@
# Base Docker image for Code Graph Knowledge System
#
# IMPORTANT: Frontend MUST be pre-built before docker build:
-# ./build-frontend.sh
+# ./scripts/build-frontend.sh
#
# This Dockerfile expects frontend/dist/ to exist
@@ -53,7 +53,7 @@ COPY --chown=appuser:appuser services ./services
COPY --chown=appuser:appuser mcp_tools ./mcp_tools
COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./
-# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first)
+# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first)
COPY --chown=appuser:appuser frontend/dist ./static
USER appuser
diff --git a/docker/Dockerfile.full b/docker/Dockerfile.full
index 6c4cf9e..4393b4d 100644
--- a/docker/Dockerfile.full
+++ b/docker/Dockerfile.full
@@ -2,7 +2,7 @@
# Full Docker image - All features (LLM + Embedding required)
#
# IMPORTANT: Frontend MUST be pre-built before docker build:
-# ./build-frontend.sh
+# ./scripts/build-frontend.sh
#
# This Dockerfile expects frontend/dist/ to exist
@@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho
COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/
# Copy application code
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser core ./core
-COPY --chown=appuser:appuser services ./services
-COPY --chown=appuser:appuser mcp_tools ./mcp_tools
-COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./
+COPY --chown=appuser:appuser src ./src
-# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first)
+# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first)
COPY --chown=appuser:appuser frontend/dist ./static
USER appuser
@@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/health || exit 1
# Start application (dual-port mode)
-CMD ["python", "main.py"]
+CMD ["python", "-m", "codebase_rag"]
diff --git a/docker/Dockerfile.minimal b/docker/Dockerfile.minimal
index a711734..3b64626 100644
--- a/docker/Dockerfile.minimal
+++ b/docker/Dockerfile.minimal
@@ -2,7 +2,7 @@
# Minimal Docker image - Code Graph only (No LLM required)
#
# IMPORTANT: Frontend MUST be pre-built before docker build:
-# ./build-frontend.sh
+# ./scripts/build-frontend.sh
#
# This Dockerfile expects frontend/dist/ to exist
@@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho
COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/
# Copy application code
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser core ./core
-COPY --chown=appuser:appuser services ./services
-COPY --chown=appuser:appuser mcp_tools ./mcp_tools
-COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./
+COPY --chown=appuser:appuser src ./src
-# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first)
+# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first)
COPY --chown=appuser:appuser frontend/dist ./static
USER appuser
@@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/health || exit 1
# Start application (dual-port mode)
-CMD ["python", "main.py"]
+CMD ["python", "-m", "codebase_rag"]
diff --git a/docker/Dockerfile.standard b/docker/Dockerfile.standard
index df53260..f461b19 100644
--- a/docker/Dockerfile.standard
+++ b/docker/Dockerfile.standard
@@ -2,7 +2,7 @@
# Standard Docker image - Code Graph + Memory Store (Embedding required)
#
# IMPORTANT: Frontend MUST be pre-built before docker build:
-# ./build-frontend.sh
+# ./scripts/build-frontend.sh
#
# This Dockerfile expects frontend/dist/ to exist
@@ -48,13 +48,9 @@ COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/pytho
COPY --from=builder /usr/local/bin/uvicorn /usr/local/bin/
# Copy application code
-COPY --chown=appuser:appuser api ./api
-COPY --chown=appuser:appuser core ./core
-COPY --chown=appuser:appuser services ./services
-COPY --chown=appuser:appuser mcp_tools ./mcp_tools
-COPY --chown=appuser:appuser start.py start_mcp.py mcp_server.py config.py main.py ./
+COPY --chown=appuser:appuser src ./src
-# Copy pre-built frontend (MUST exist - run ./build-frontend.sh first)
+# Copy pre-built frontend (MUST exist - run ./scripts/build-frontend.sh first)
COPY --chown=appuser:appuser frontend/dist ./static
USER appuser
@@ -67,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/api/v1/health || exit 1
# Start application (dual-port mode)
-CMD ["python", "main.py"]
+CMD ["python", "-m", "codebase_rag"]
diff --git a/docs/CNAME b/docs/CNAME
deleted file mode 100644
index 0e88ad8..0000000
--- a/docs/CNAME
+++ /dev/null
@@ -1 +0,0 @@
-code-graph.vantagecraft.dev
diff --git a/docs/api/mcp-tools.md b/docs/api/mcp-tools.md
index dac4011..eda0b2a 100644
--- a/docs/api/mcp-tools.md
+++ b/docs/api/mcp-tools.md
@@ -30,7 +30,7 @@ The MCP server provides AI assistants (like Claude Desktop, VS Code with MCP, et
```bash
# Using start script
-python start_mcp.py
+python -m codebase_rag --mcp
# Using uv (recommended)
uv run mcp_client
diff --git a/docs/api/python-sdk.md b/docs/api/python-sdk.md
index 47aaa52..d1235eb 100644
--- a/docs/api/python-sdk.md
+++ b/docs/api/python-sdk.md
@@ -106,12 +106,12 @@ OPENROUTER_MODEL=anthropic/claude-3-opus
### Import Services
```python
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
-from services.memory_store import MemoryStore, memory_store
-from services.graph_service import Neo4jGraphService, graph_service
-from services.code_ingestor import CodeIngestor, get_code_ingestor
-from services.task_queue import TaskQueue, task_queue
-from config import settings
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
+from src.codebase_rag.services.memory import MemoryStore, memory_store
+from src.codebase_rag.services.code import Neo4jGraphService, graph_service
+from src.codebase_rag.services.code import CodeIngestor, get_code_ingestor
+from src.codebase_rag.services.tasks import TaskQueue, task_queue
+from src.codebase_rag.config import settings
```
### Service Initialization Pattern
@@ -148,7 +148,7 @@ Primary service for knowledge graph operations with LlamaIndex integration.
### Initialization
```python
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
# Create instance
knowledge_service = Neo4jKnowledgeService()
@@ -405,7 +405,7 @@ Project memory persistence for AI agents.
### Initialization
```python
-from services.memory_store import memory_store
+from src.codebase_rag.services.memory import memory_store
# Initialize (async)
await memory_store.initialize()
@@ -627,7 +627,7 @@ Low-level Neo4j graph operations.
### Initialization
```python
-from services.graph_service import graph_service
+from src.codebase_rag.services.code import graph_service
# Connect to Neo4j
await graph_service.connect()
@@ -791,8 +791,8 @@ Repository code ingestion service.
### Initialization
```python
-from services.code_ingestor import get_code_ingestor
-from services.graph_service import graph_service
+from src.codebase_rag.services.code import get_code_ingestor
+from src.codebase_rag.services.code import graph_service
# Initialize graph service first
await graph_service.connect()
@@ -882,7 +882,7 @@ Asynchronous task queue management.
### Initialization
```python
-from services.task_queue import task_queue, TaskStatus
+from src.codebase_rag.services.tasks import task_queue, TaskStatus
# Start task queue
await task_queue.start()
@@ -921,7 +921,7 @@ async def submit_task(
**Example**:
```python
-from services.task_processors import process_document_task
+from src.codebase_rag.services.tasks import process_document_task
task_id = await task_queue.submit_task(
task_func=process_document_task,
@@ -1005,7 +1005,7 @@ def get_queue_stats() -> Dict[str, int]:
Access configuration settings.
```python
-from config import settings
+from src.codebase_rag.config import settings
# Neo4j settings
print(settings.neo4j_uri)
@@ -1034,7 +1034,7 @@ print(settings.top_k)
### Get Current Model Info
```python
-from config import get_current_model_info
+from src.codebase_rag.config import get_current_model_info
model_info = get_current_model_info()
print(f"LLM: {model_info['llm']}")
@@ -1049,7 +1049,7 @@ print(f"Embedding: {model_info['embedding']}")
```python
import asyncio
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
async def main():
# Initialize service
@@ -1087,7 +1087,7 @@ asyncio.run(main())
```python
import asyncio
-from services.memory_store import memory_store
+from src.codebase_rag.services.memory import memory_store
async def main():
# Initialize
@@ -1128,9 +1128,9 @@ asyncio.run(main())
```python
import asyncio
-from services.graph_service import graph_service
-from services.code_ingestor import get_code_ingestor
-from services.git_utils import git_utils
+from src.codebase_rag.services.code import graph_service
+from src.codebase_rag.services.code import get_code_ingestor
+from src.codebase_rag.services.git_utils import git_utils
async def main():
# Connect to Neo4j
@@ -1178,8 +1178,8 @@ asyncio.run(main())
```python
import asyncio
-from services.task_queue import task_queue, TaskStatus
-from services.task_processors import process_document_task
+from src.codebase_rag.services.tasks import task_queue, TaskStatus
+from src.codebase_rag.services.tasks import process_document_task
async def main():
# Start task queue
@@ -1318,7 +1318,7 @@ result = await session.run("MATCH (n) RETURN n LIMIT 10")
### 4. Set Appropriate Timeouts
```python
-from config import settings
+from src.codebase_rag.config import settings
# Adjust timeouts for large operations
settings.operation_timeout = 300 # 5 minutes
@@ -1439,7 +1439,7 @@ for item in items:
```python
# 60x faster for updates
-from services.git_utils import git_utils
+from src.codebase_rag.services.git_utils import git_utils
if git_utils.is_git_repo(repo_path):
changed_files = git_utils.get_changed_files(repo_path)
diff --git a/docs/architecture/components.md b/docs/architecture/components.md
index 1925b68..8bdafb8 100644
--- a/docs/architecture/components.md
+++ b/docs/architecture/components.md
@@ -1482,7 +1482,7 @@ Critical for avoiding circular dependencies:
```python
# 1. Configuration (no dependencies)
-from config import settings
+from src.codebase_rag.config import settings
# 2. Storage layer (no app dependencies)
neo4j_connection = Neo4jGraphStore(...)
@@ -1555,7 +1555,7 @@ class Settings(BaseSettings):
Components access configuration:
```python
-from config import settings
+from src.codebase_rag.config import settings
# Use in service
self.timeout = settings.operation_timeout
diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md
new file mode 100644
index 0000000..1c9553a
--- /dev/null
+++ b/docs/architecture/overview.md
@@ -0,0 +1,537 @@
+# Architecture Overview
+
+## Introduction
+
+Code Graph Knowledge System is a **hybrid intelligence platform** that serves both human users and AI agents through multiple interfaces. This document explains the system architecture, deployment modes, and how different components work together.
+
+## System Architecture
+
+### Dual-Server Design
+
+The system operates on **two independent ports**, each serving different purposes:
+
+```mermaid
+graph TB
+ subgraph "Port 8000 - MCP SSE Service (PRIMARY)"
+ MCP[MCP Server]
+ SSE[SSE Streaming]
+ MCP_TOOLS[25+ MCP Tools]
+ end
+
+ subgraph "Port 8080 - Web UI + REST API (SECONDARY)"
+ WEB[React Frontend]
+ REST[REST API]
+ METRICS[Prometheus Metrics]
+ end
+
+ subgraph "Shared Backend Services"
+ NEO4J[Neo4j Knowledge Store]
+ TASK[Task Queue]
+ MEMORY[Memory Store]
+ CODE[Code Graph]
+ end
+
+ AI[AI Assistants
Claude Desktop, Cursor]
+ USERS[Human Users
Developers, Admins]
+ PROGRAMS[External Systems
CI/CD, Scripts]
+
+ AI -->|stdio/SSE| MCP
+ USERS -->|Browser| WEB
+ PROGRAMS -->|HTTP| REST
+
+ MCP --> NEO4J
+ MCP --> TASK
+ MCP --> MEMORY
+ MCP --> CODE
+
+ WEB --> NEO4J
+ REST --> TASK
+ REST --> MEMORY
+ REST --> CODE
+
+ SSE -.->|Real-time updates| WEB
+
+ style MCP fill:#e1f5e1
+ style WEB fill:#e3f2fd
+ style REST fill:#fff9e6
+```
+
+### Port 8000: MCP SSE Service
+
+**Purpose**: AI assistant integration and real-time communication
+
+**Components**:
+- **MCP Protocol Server**: stdio-based communication for AI tools
+- **SSE Endpoint** (`/sse`): Server-Sent Events for real-time updates
+- **Message Endpoint** (`/messages/`): Async message handling
+
+**Primary Users**:
+- AI assistants (Claude Desktop, Cursor, etc.)
+- Development tools with MCP support
+
+**Key Features**:
+- 25+ MCP tools for code intelligence
+- Real-time task monitoring via SSE
+- Bi-directional communication with AI agents
+
+### Port 8080: Web UI + REST API
+
+**Purpose**: Human interaction and programmatic access
+
+**Components**:
+- **React Frontend**: Task monitoring, file upload, batch processing
+- **REST API** (`/api/v1/*`): Full HTTP API for all system features
+- **Prometheus Metrics** (`/metrics`): System health and performance
+
+**Primary Users**:
+- Developers (via web browser)
+- System administrators
+- External applications (via HTTP API)
+- CI/CD pipelines
+- Custom integrations
+
+**Key Features**:
+- Visual task monitoring dashboard
+- Document upload and management
+- System configuration and health monitoring
+- Programmatic API access
+
+---
+
+## Understanding the REST API
+
+### What is the REST API?
+
+The REST API provides **HTTP-based programmatic access** to all system capabilities. It allows external applications, scripts, and services to interact with the knowledge system without requiring MCP protocol support.
+
+### Why Do We Need REST API?
+
+While MCP protocol serves AI assistants, REST API enables broader integration scenarios:
+
+#### 1. **System Integration**
+Connect Code Graph with existing enterprise tools:
+
+```mermaid
+graph LR
+ A[CI/CD Pipeline
GitHub Actions] -->|POST /ingest/repo| API[REST API]
+ B[Slack Bot] -->|POST /knowledge/query| API
+ C[IDE Plugin] -->|GET /graph/related| API
+ D[Monitoring Dashboard] -->|GET /health| API
+
+ API --> SERVICES[Backend Services]
+
+ style API fill:#fff9e6
+```
+
+**Example**: Automatically analyze code on every commit:
+```yaml
+# .github/workflows/analyze.yml
+- name: Analyze Code
+ run: |
+ curl -X POST http://code-graph:8080/api/v1/ingest/repo \
+ -H "Content-Type: application/json" \
+ -d '{"local_path": ".", "mode": "incremental"}'
+```
+
+#### 2. **Custom Application Development**
+
+Build your own interfaces on top of Code Graph:
+
+```javascript
+// Internal chatbot
+async function askCodeQuestion(question) {
+ const response = await fetch('http://code-graph:8080/api/v1/knowledge/query', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ question, mode: 'hybrid' })
+ });
+ return await response.json();
+}
+```
+
+#### 3. **Automation and Scripting**
+
+Automate repetitive tasks:
+
+```python
+# Daily documentation sync script
+import httpx
+
+async def sync_docs():
+ # Upload latest docs to knowledge base
+ response = await httpx.post(
+ "http://code-graph:8080/api/v1/documents/directory",
+ json={
+ "directory_path": "/company/docs",
+ "recursive": True
+ }
+ )
+ return response.json()
+```
+
+#### 4. **Cross-Language Support**
+
+Access from any programming language:
+
+```go
+// Go client
+func searchCode(query string) ([]Result, error) {
+ resp, err := http.Post(
+ "http://code-graph:8080/api/v1/knowledge/search",
+ "application/json",
+ bytes.NewBuffer([]byte(fmt.Sprintf(`{"query":"%s"}`, query))),
+ )
+ // Parse and return results
+}
+```
+
+### REST API vs MCP Protocol
+
+| Feature | REST API | MCP Protocol |
+|---------|----------|--------------|
+| **Transport** | HTTP/HTTPS | stdio / SSE |
+| **Format** | JSON over HTTP | JSON-RPC |
+| **Client** | Any language | AI assistants |
+| **Authentication** | API keys (future) | N/A |
+| **Use Case** | System integration | AI context enhancement |
+| **Examples** | curl, Python, JS | Claude Desktop |
+
+**When to use REST API**:
+- โ
Integrating with CI/CD
+- โ
Building custom UIs
+- โ
Scripting and automation
+- โ
Cross-language access
+- โ
Webhook integrations
+
+**When to use MCP Protocol**:
+- โ
AI assistant integration
+- โ
IDE plugin development
+- โ
Real-time AI interactions
+
+---
+
+## Deployment Modes
+
+### Three Usage Scenarios
+
+The system provides three startup modes for different scenarios:
+
+#### 1. MCP Server Only (`codebase-rag-mcp`)
+
+**Purpose**: AI assistant integration
+
+**What it starts**:
+- MCP protocol server (stdio)
+- Direct communication with AI tools
+
+**Use when**:
+- Using with Claude Desktop
+- Developing MCP-compatible tools
+- AI-only workflows
+
+**Example**:
+```bash
+# Start MCP server
+codebase-rag-mcp
+
+# Configure Claude Desktop
+{
+ "mcpServers": {
+ "code-graph": {
+ "command": "codebase-rag-mcp"
+ }
+ }
+}
+```
+
+#### 2. Web Server (`codebase-rag-web`)
+
+**Purpose**: Full-featured deployment for human users and applications
+
+**What it starts**:
+- Port 8000: MCP SSE service
+- Port 8080: React frontend + REST API
+
+**Use when**:
+- Deploying for team usage
+- Need visual monitoring
+- Require programmatic access
+- Production environments
+
+**Example**:
+```bash
+# Start web server
+codebase-rag-web
+
+# Access:
+# - Web UI: http://localhost:8080
+# - REST API: http://localhost:8080/api/v1/
+# - MCP SSE: http://localhost:8000/sse
+```
+
+#### 3. Complete Service (`codebase-rag`)
+
+**Purpose**: Development and comprehensive deployment
+
+**What it starts**:
+- Everything from web server mode
+- Full system capabilities
+- All interfaces available
+
+**Use when**:
+- Local development
+- Testing all features
+- Production deployment with all services
+
+---
+
+## Component Architecture
+
+### Backend Services
+
+All backend services are shared across both ports:
+
+#### 1. **Neo4j Knowledge Store**
+- Graph database for code relationships
+- Native vector index for semantic search
+- Hybrid query engine
+
+#### 2. **Task Queue**
+- Asynchronous processing for heavy operations
+- Real-time progress tracking
+- Retry and error handling
+
+#### 3. **Memory Store**
+- Project knowledge persistence
+- Decision and preference tracking
+- Temporal knowledge management
+
+#### 4. **Code Graph Service**
+- Repository ingestion and analysis
+- Symbol relationship tracking
+- Impact analysis engine
+
+### Frontend Components
+
+#### React Web UI
+- **Task Monitor**: Real-time progress visualization
+- **Document Upload**: File and directory processing
+- **System Dashboard**: Health and statistics
+- **Configuration**: System settings management
+
+Built with:
+- **React** + **TanStack Router**: Modern SPA
+- **TanStack Query**: Data fetching and caching
+- **Tailwind CSS**: Responsive design
+- **Recharts**: Data visualization
+
+---
+
+## Data Flow
+
+### Typical Request Flows
+
+#### AI Assistant Query Flow
+
+```mermaid
+sequenceDiagram
+ participant AI as AI Assistant
+ participant MCP as MCP Server :8000
+ participant Services as Backend Services
+ participant Neo4j as Neo4j Database
+
+ AI->>MCP: MCP Tool Call
query_knowledge
+ MCP->>Services: Process Query
+ Services->>Neo4j: Graph + Vector Search
+ Neo4j-->>Services: Results
+ Services-->>MCP: Formatted Response
+ MCP-->>AI: Tool Result
+```
+
+#### REST API Request Flow
+
+```mermaid
+sequenceDiagram
+ participant Client as HTTP Client
+ participant REST as REST API :8080
+ participant Queue as Task Queue
+ participant Services as Backend Services
+ participant Neo4j as Neo4j Database
+
+ Client->>REST: POST /api/v1/ingest/repo
+ REST->>Queue: Submit Task
+ Queue-->>REST: Task ID
+ REST-->>Client: 202 Accepted
{task_id: "..."}
+
+ Queue->>Services: Process Repository
+ Services->>Neo4j: Store Code Graph
+ Neo4j-->>Services: Success
+ Services-->>Queue: Complete
+
+ Client->>REST: GET /api/v1/tasks/{task_id}
+ REST-->>Client: Task Status
{status: "SUCCESS"}
+```
+
+#### Real-time Monitoring Flow
+
+```mermaid
+sequenceDiagram
+ participant Browser as Web Browser
+ participant Frontend as React App :8080
+ participant SSE as SSE Endpoint :8000
+ participant Queue as Task Queue
+
+ Browser->>Frontend: Open Task Monitor
+ Frontend->>SSE: Connect SSE
GET /sse/tasks
+ SSE-->>Frontend: Connection Established
+
+ loop Real-time Updates
+ Queue->>SSE: Task Progress Event
+ SSE-->>Frontend: data: {...}
+ Frontend->>Browser: Update UI
+ end
+```
+
+---
+
+## Technology Stack
+
+### Backend
+- **Python 3.13+**: Core runtime
+- **FastAPI**: Web framework
+- **Neo4j 5.x**: Graph database
+- **LlamaIndex**: LLM integration framework
+- **Prometheus**: Metrics and monitoring
+
+### Frontend
+- **React 18**: UI framework
+- **TypeScript**: Type safety
+- **Bun**: Package manager and bundler
+- **TanStack Router**: Client-side routing
+- **Tailwind CSS**: Styling
+
+### Integration
+- **MCP Protocol**: AI assistant communication
+- **Server-Sent Events**: Real-time updates
+- **REST API**: HTTP-based access
+
+### Storage
+- **Neo4j**: Primary data store
+ - Document storage
+ - Vector embeddings
+ - Graph relationships
+ - Memory persistence
+
+---
+
+## Scalability Considerations
+
+### Horizontal Scaling
+
+The system supports horizontal scaling:
+
+```mermaid
+graph TB
+ LB[Load Balancer]
+
+ subgraph "Web Servers"
+ W1[Server 1:8080]
+ W2[Server 2:8080]
+ W3[Server N:8080]
+ end
+
+ subgraph "MCP Servers"
+ M1[Server 1:8000]
+ M2[Server 2:8000]
+ M3[Server N:8000]
+ end
+
+ subgraph "Shared State"
+ NEO4J[(Neo4j Cluster)]
+ REDIS[(Redis Cache)]
+ end
+
+ LB --> W1
+ LB --> W2
+ LB --> W3
+
+ LB --> M1
+ LB --> M2
+ LB --> M3
+
+ W1 --> NEO4J
+ W2 --> NEO4J
+ W3 --> NEO4J
+
+ M1 --> NEO4J
+ M2 --> NEO4J
+ M3 --> NEO4J
+
+ W1 -.-> REDIS
+ W2 -.-> REDIS
+ W3 -.-> REDIS
+```
+
+### Performance Optimization
+
+1. **Task Queue**: Offload heavy operations
+2. **Caching**: Redis for frequently accessed data
+3. **Connection Pooling**: Efficient database connections
+4. **Incremental Processing**: Only process changed files
+
+---
+
+## Security Architecture
+
+### Current Security Model
+
+**Authentication**: Currently no authentication required (development mode)
+
+**Network Security**:
+- Bind to localhost by default
+- Configurable host/port via environment variables
+
+**Data Security**:
+- No sensitive data storage by default
+- User responsible for network security
+
+### Future Enhancements
+
+Planned security features:
+
+1. **API Authentication**:
+ - JWT token authentication
+ - API key management
+ - Role-based access control (RBAC)
+
+2. **Data Encryption**:
+ - TLS/HTTPS support
+ - At-rest encryption for sensitive data
+
+3. **Audit Logging**:
+ - Request logging
+ - Access tracking
+ - Change history
+
+---
+
+## Summary
+
+Code Graph Knowledge System is a multi-interface platform that serves:
+
+1. **AI Assistants**: Via MCP protocol on port 8000
+2. **Human Users**: Via React UI on port 8080
+3. **External Systems**: Via REST API on port 8080
+
+This architecture enables:
+- โ
Flexible deployment modes
+- โ
Broad integration possibilities
+- โ
Scalable multi-user support
+- โ
Real-time monitoring and feedback
+
+Choose your deployment mode based on your needs:
+- **MCP only**: AI assistant integration
+- **Web server**: Team collaboration + API access
+- **Complete service**: Full-featured deployment
+
+For detailed API documentation, see [REST API Reference](../api/rest.md).
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
index c8417f9..cf21d1e 100644
--- a/docs/deployment/docker.md
+++ b/docs/deployment/docker.md
@@ -81,7 +81,7 @@ http://localhost:8080/api/v1/
curl -fsSL https://bun.sh/install | bash
# Build frontend
-./build-frontend.sh
+./scripts/build-frontend.sh
```
This pre-builds the React frontend and generates static files in `frontend/dist/`, which are then copied into the Docker image. The production image does not include Node.js, npm, or any frontend build tools (~405MB savings).
@@ -94,7 +94,7 @@ git clone https://github.com/royisme/codebase-rag.git
cd codebase-rag
# Build frontend first (REQUIRED)
-./build-frontend.sh
+./scripts/build-frontend.sh
# Build minimal
docker build -f docker/Dockerfile.minimal -t my-codebase-rag:minimal .
diff --git a/docs/development/contributing.md b/docs/development/contributing.md
index 864b6dc..74eb77d 100644
--- a/docs/development/contributing.md
+++ b/docs/development/contributing.md
@@ -188,8 +188,8 @@ from fastapi import FastAPI, HTTPException
from neo4j import GraphDatabase
# Local imports
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
-from core.config import settings
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
+from src.codebase_rag.core.config import settings
```
**Type Hints:**
diff --git a/docs/development/migration-guide.md b/docs/development/migration-guide.md
new file mode 100644
index 0000000..88cd2a5
--- /dev/null
+++ b/docs/development/migration-guide.md
@@ -0,0 +1,413 @@
+# Migration Guide: v0.7.x to v0.8.0
+
+Complete guide for migrating from the old directory structure to the new src-layout.
+
+**Release Date**: 2025-11-06
+**Breaking Changes**: Yes
+**Migration Effort**: Low (15-30 minutes)
+
+---
+
+## ๐ Summary of Changes
+
+Version 0.8.0 introduces a complete restructuring to adopt Python's standard src-layout. This brings better organization, clearer package boundaries, and follows Python best practices.
+
+### Major Changes
+
+1. **All code moved to `src/codebase_rag/`**
+2. **All old entry scripts removed**
+3. **Import paths updated**
+4. **New standardized entry points**
+5. **Backward compatibility removed**
+
+---
+
+## ๐จ Breaking Changes
+
+### 1. Entry Scripts Removed
+
+**Old** (โ No longer works):
+```bash
+python start.py
+python start_mcp.py
+python main.py
+```
+
+**New** (โ
Use these instead):
+```bash
+# Direct module invocation
+python -m codebase_rag # Start both services
+python -m codebase_rag --web # Web only
+python -m codebase_rag --mcp # MCP only
+python -m codebase_rag --version
+
+# After installation (pip install -e .)
+codebase-rag # Main CLI
+codebase-rag-web # Web server
+codebase-rag-mcp # MCP server
+```
+
+### 2. Import Paths Changed
+
+**Old** (โ No longer works):
+```python
+from config import settings
+from services.neo4j_knowledge_service import Neo4jKnowledgeService
+from services.memory_store import MemoryStore
+from core.app import create_app
+from api.routes import router
+from mcp_tools.utils import some_function
+```
+
+**New** (โ
Use these instead):
+```python
+from src.codebase_rag.config import settings
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
+from src.codebase_rag.services.memory import MemoryStore
+from src.codebase_rag.core.app import create_app
+from src.codebase_rag.api.routes import router
+from src.codebase_rag.mcp.utils import some_function
+```
+
+### 3. Directory Structure Changed
+
+**Old Structure** (โ Removed):
+```
+codebase-rag/
+โโโ api/ # โ Deleted
+โโโ core/ # โ Deleted
+โโโ services/ # โ Deleted
+โโโ mcp_tools/ # โ Deleted
+โโโ config.py # โ Deleted
+โโโ main.py # โ Deleted
+โโโ start.py # โ Deleted
+โโโ start_mcp.py # โ Deleted
+```
+
+**New Structure** (โ
Current):
+```
+codebase-rag/
+โโโ src/
+โ โโโ codebase_rag/ # โ
All code here
+โ โโโ __init__.py
+โ โโโ __main__.py
+โ โโโ config/
+โ โโโ server/
+โ โโโ core/
+โ โโโ api/
+โ โโโ services/
+โ โโโ mcp/ # Renamed from mcp_tools
+โโโ pyproject.toml # โ
Updated
+โโโ docs/
+โโโ tests/
+โโโ ...
+```
+
+### 4. Docker Changes
+
+**Dockerfile CMD** changed:
+
+```dockerfile
+# Old
+CMD ["python", "start.py"]
+
+# New
+CMD ["python", "-m", "codebase_rag"]
+```
+
+---
+
+## ๐ Migration Steps
+
+### For End Users (Docker Deployment)
+
+If you're using Docker, **no changes needed**! Just pull the new image:
+
+```bash
+# Pull latest
+docker pull royisme/codebase-rag:latest
+
+# Or rebuild
+docker-compose down
+docker-compose pull
+docker-compose up -d
+```
+
+### For Developers (Local Development)
+
+#### Step 1: Update Repository
+
+```bash
+# Pull latest changes
+git pull origin main
+
+# Or if on a branch
+git fetch origin
+git rebase origin/main
+```
+
+#### Step 2: Reinstall Package
+
+```bash
+# Remove old installation
+pip uninstall code-graph -y
+
+# Reinstall with new structure
+pip install -e .
+
+# Or with uv
+uv pip install -e .
+```
+
+#### Step 3: Update Your Code
+
+**Update all import statements** in your custom scripts/tools:
+
+```python
+# Old imports (need to update)
+from config import settings
+from services.xxx import Yyy
+
+# New imports
+from src.codebase_rag.config import settings
+from src.codebase_rag.services.xxx import Yyy
+```
+
+**Find all files to update:**
+```bash
+# Search for old imports in your codebase
+grep -r "from config import" .
+grep -r "from services\." .
+grep -r "from core\." .
+grep -r "from api\." .
+grep -r "from mcp_tools\." .
+```
+
+#### Step 4: Update Entry Scripts
+
+If you have custom scripts that call the server:
+
+```python
+# Old
+if __name__ == "__main__":
+ from start import main
+ main()
+
+# New
+if __name__ == "__main__":
+ from src.codebase_rag.server.web import main
+ main()
+```
+
+Or better, use the standard module invocation:
+
+```python
+import subprocess
+subprocess.run(["python", "-m", "codebase_rag"])
+```
+
+#### Step 5: Update MCP Configurations
+
+If using MCP (Claude Desktop, Cursor, etc.):
+
+**Old** `claude_desktop_config.json`:
+```json
+{
+ "mcpServers": {
+ "codebase-rag": {
+ "command": "python",
+ "args": ["/path/to/codebase-rag/start_mcp.py"]
+ }
+ }
+}
+```
+
+**New**:
+```json
+{
+ "mcpServers": {
+ "codebase-rag": {
+ "command": "python",
+ "args": ["-m", "codebase_rag", "--mcp"],
+ "cwd": "/path/to/codebase-rag"
+ }
+ }
+}
+```
+
+Or after installation:
+```json
+{
+ "mcpServers": {
+ "codebase-rag": {
+ "command": "codebase-rag-mcp"
+ }
+ }
+}
+```
+
+---
+
+## ๐งช Testing Your Migration
+
+After migration, test all functionality:
+
+### 1. Test Import Paths
+
+```python
+# Test configuration import
+from src.codebase_rag.config import settings
+print(f"โ
Config: {settings.app_name}")
+
+# Test service imports
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
+print("โ
Services import successful")
+```
+
+### 2. Test Entry Points
+
+```bash
+# Test version
+python -m codebase_rag --version
+# Should output: codebase-rag version 0.8.0
+
+# Test help
+python -m codebase_rag --help
+
+# Test web server (Ctrl+C to stop)
+python -m codebase_rag --web
+```
+
+### 3. Test Docker
+
+```bash
+# Build test image
+docker build -t codebase-rag:test .
+
+# Run test container
+docker run -p 8000:8000 -p 8080:8080 codebase-rag:test
+
+# Check health
+curl http://localhost:8080/api/v1/health
+```
+
+### 4. Run Tests
+
+```bash
+# Run test suite
+pytest tests/ -v
+
+# Run with coverage
+pytest tests/ --cov=src/codebase_rag --cov-report=html
+```
+
+---
+
+## ๐ Common Issues
+
+### Issue 1: ModuleNotFoundError
+
+**Error:**
+```
+ModuleNotFoundError: No module named 'config'
+```
+
+**Solution:**
+Update import to new path:
+```python
+from src.codebase_rag.config import settings
+```
+
+### Issue 2: start.py not found
+
+**Error:**
+```
+python: can't open file 'start.py': [Errno 2] No such file or directory
+```
+
+**Solution:**
+Use new entry point:
+```bash
+python -m codebase_rag
+```
+
+### Issue 3: Old imports in tests
+
+**Error:**
+```
+ImportError: cannot import name 'Neo4jKnowledgeService' from 'services.neo4j_knowledge_service'
+```
+
+**Solution:**
+Update test imports:
+```python
+from src.codebase_rag.services.knowledge import Neo4jKnowledgeService
+```
+
+### Issue 4: Docker container fails to start
+
+**Error:**
+```
+python: can't open file 'start.py'
+```
+
+**Solution:**
+Rebuild Docker image:
+```bash
+docker-compose down
+docker-compose build --no-cache
+docker-compose up -d
+```
+
+---
+
+## ๐ฏ Benefits of New Structure
+
+### 1. Standard Python Package
+
+- โ
Follows PyPA src-layout recommendations
+- โ
Proper package namespace (`codebase_rag`)
+- โ
Cleaner imports
+
+### 2. Better Organization
+
+- โ
All source code in `src/`
+- โ
Clear separation of concerns
+- โ
Logical service grouping
+
+### 3. Easier Development
+
+- โ
Standard entry points (`python -m codebase_rag`)
+- โ
Proper console scripts after installation
+- โ
No confusion about root vs package code
+
+### 4. Improved Maintainability
+
+- โ
No duplicate code
+- โ
Clear module boundaries
+- โ
Easier to navigate for new contributors
+
+---
+
+## ๐ Additional Resources
+
+- [Python Packaging Guide](https://packaging.python.org/en/latest/tutorials/packaging-projects/)
+- [src-layout vs flat-layout](https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#src-layout)
+- [Development Setup](./setup.md)
+- [Python SDK Guide](../api/python-sdk.md)
+
+---
+
+## ๐ Need Help?
+
+If you encounter issues not covered in this guide:
+
+1. Check [Troubleshooting](../troubleshooting.md)
+2. Check [FAQ](../faq.md)
+3. Open an issue on GitHub
+4. Ask in Discussions
+
+---
+
+**Last Updated**: 2025-11-06
+**Next Version**: 0.9.0 (planned)
diff --git a/docs/development/setup.md b/docs/development/setup.md
index bae3e3e..ee2507c 100644
--- a/docs/development/setup.md
+++ b/docs/development/setup.md
@@ -593,7 +593,7 @@ ollama list
```bash
# Start the application
-python start.py
+python -m codebase_rag
# You should see:
# โ All service health checks passed
diff --git a/docs/development/testing.md b/docs/development/testing.md
index a2aab34..2db7287 100644
--- a/docs/development/testing.md
+++ b/docs/development/testing.md
@@ -81,7 +81,7 @@ import pytest
@pytest.mark.unit
async def test_parse_memory_type():
"""Test memory type parsing logic."""
- from services.memory_store import parse_memory_type
+ from src.codebase_rag.services.memory import parse_memory_type
result = parse_memory_type("decision")
assert result == "decision"
@@ -513,7 +513,7 @@ def test_with_env_vars(mocker):
'NEO4J_PASSWORD': 'testpass'
})
- from core.config import settings
+ from src.codebase_rag.core.config import settings
assert settings.neo4j_uri == 'bolt://test:7687'
```
@@ -678,7 +678,7 @@ and memory relationships.
import pytest
from typing import Dict, Any
-from services.memory_store import MemoryStore
+from src.codebase_rag.services.memory import MemoryStore
class TestMemoryStore:
diff --git a/docs/faq.md b/docs/faq.md
index e983278..959ff11 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -158,7 +158,7 @@ pip install -e .
# Follow: https://neo4j.com/docs/operations-manual/current/installation/
# Configure and run
-python start.py
+python -m codebase_rag
```
**Note**: Docker is recommended for easier setup and isolation.
@@ -396,7 +396,7 @@ OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# Restart
docker-compose restart api
# or
-pkill -f start.py && python start.py
+pkill -f start.py && python -m codebase_rag
```
No data migration needed - embeddings are recalculated automatically.
@@ -887,7 +887,7 @@ jobs:
```bash
# In your build.sh
python -c "
-from services.memory_store import MemoryStore
+from src.codebase_rag.services.memory import MemoryStore
# Auto-extract memories after build
"
```
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
index a3f42b5..c72d4cb 100644
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -119,10 +119,10 @@ cp env.example .env
nano .env
# Start MCP server
-python start_mcp.py
+python -m codebase_rag --mcp
# Or start FastAPI server
-python start.py
+python -m codebase_rag
```
## Verify Installation
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
index 5a96189..446e3e2 100644
--- a/docs/getting-started/quickstart.md
+++ b/docs/getting-started/quickstart.md
@@ -2,7 +2,25 @@
Get Code Graph Knowledge System up and running in 5 minutes!
-## ๐ฏ Choose Your Path
+## ๐ฏ Choose Your Deployment Mode
+
+Code Graph Knowledge System offers **three deployment modes** based on which features you need:
+
+| Mode | Description | Ports | LLM Required | Use Case |
+|------|-------------|-------|--------------|----------|
+| **Minimal** | Code Graph only | 7474, 7687, 8000, 8080 | โ No | Static code analysis, repository exploration |
+| **Standard** | Code Graph + Memory Store | 7474, 7687, 8000, 8080 | Embedding only | Project knowledge tracking, AI agent memory |
+| **Full** | All Features + Knowledge RAG | 7474, 7687, 8000, 8080 | LLM + Embedding | Complete intelligent knowledge management |
+
+!!! info "What's Running?"
+ All modes start **two servers**:
+
+ - **Port 8000**: MCP SSE Service (for AI assistants)
+ - **Port 8080**: Web UI + REST API (for humans & programs)
+
+ See [Architecture Overview](../architecture/overview.md) to understand how these work together.
+
+## ๐ Choose Your Path
=== "Minimal (Recommended)"
**Code Graph only** - No LLM required
@@ -74,6 +92,54 @@ You should see:
- โ
API running at http://localhost:8000
- โ
API docs at http://localhost:8000/docs
+## ๐ก Understanding the Interfaces
+
+After starting the services, you have **three ways** to interact with the system:
+
+### 1. REST API (Port 8080)
+
+**For**: Programmatic access, scripts, CI/CD integration
+
+```bash
+# Health check
+curl http://localhost:8080/api/v1/health
+
+# Query knowledge
+curl -X POST http://localhost:8080/api/v1/knowledge/query \
+ -H "Content-Type: application/json" \
+ -d '{"question": "How does authentication work?"}'
+```
+
+**Use cases**:
+- Automation scripts
+- CI/CD pipelines
+- Custom applications
+- Testing and monitoring
+
+[Full REST API Documentation](../api/rest.md)
+
+### 2. Web UI (Port 8080)
+
+**For**: Human users, visual monitoring
+
+Open in browser: http://localhost:8080
+
+Features:
+- ๐ Task monitoring dashboard
+- ๐ File and directory upload
+- ๐ System health and statistics
+- โ๏ธ Configuration management
+
+### 3. MCP Protocol (Port 8000)
+
+**For**: AI assistants (Claude Desktop, Cursor, etc.)
+
+Configure your AI tool to connect via MCP. The system provides 25+ tools for code intelligence.
+
+[MCP Integration Guide](../guide/mcp/overview.md)
+
+---
+
## ๐ First Steps
### 1. Access Neo4j Browser
diff --git a/docs/guide/code-graph/ingestion.md b/docs/guide/code-graph/ingestion.md
index 6f49482..db57474 100644
--- a/docs/guide/code-graph/ingestion.md
+++ b/docs/guide/code-graph/ingestion.md
@@ -863,8 +863,8 @@ ORDER BY count DESC
For complex workflows, use the Python API directly:
```python
-from services.graph_service import graph_service
-from services.code_ingestor import CodeIngestor
+from src.codebase_rag.services.code import graph_service
+from src.codebase_rag.services.code import CodeIngestor
# Initialize
await graph_service.connect()
diff --git a/docs/guide/code-graph/overview.md b/docs/guide/code-graph/overview.md
index 1ace079..91bda4c 100644
--- a/docs/guide/code-graph/overview.md
+++ b/docs/guide/code-graph/overview.md
@@ -205,10 +205,10 @@ POST /api/v1/code-graph/context-pack - Build context pack
For custom integrations, use Python services directly:
```python
-from services.graph_service import graph_service
-from services.code_ingestor import code_ingestor
-from services.ranker import ranker
-from services.pack_builder import pack_builder
+from src.codebase_rag.services.code import graph_service
+from src.codebase_rag.services.code import code_ingestor
+from src.codebase_rag.services.ranker import ranker
+from src.codebase_rag.services.code import pack_builder
```
## Deployment Modes
diff --git a/docs/guide/mcp/claude-desktop.md b/docs/guide/mcp/claude-desktop.md
index f5c687c..cbadfd7 100644
--- a/docs/guide/mcp/claude-desktop.md
+++ b/docs/guide/mcp/claude-desktop.md
@@ -33,7 +33,7 @@ You need a running instance:
docker-compose -f docker/docker-compose.full.yml up -d
# Option 2: Local development
-python start_mcp.py
+python -m codebase_rag --mcp
# Verify it's running
ps aux | grep start_mcp.py
@@ -288,7 +288,7 @@ If tools don't appear:
tail -f /path/to/codebase-rag/mcp_server.log
# Enable debug mode
-MCP_LOG_LEVEL=DEBUG python start_mcp.py
+MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp
```
**Claude Desktop Logs**:
@@ -544,7 +544,7 @@ After tool calls:
```bash
# Test the command manually
cd /path/to/codebase-rag
- python start_mcp.py
+ python -m codebase_rag --mcp
```
4. **Review MCP server logs**:
diff --git a/docs/guide/mcp/overview.md b/docs/guide/mcp/overview.md
index 74aa91c..37a27b4 100644
--- a/docs/guide/mcp/overview.md
+++ b/docs/guide/mcp/overview.md
@@ -370,13 +370,13 @@ ENABLE_MEMORY_STORE=true
```bash
# Direct execution
-python start_mcp.py
+python -m codebase_rag --mcp
# Using uv
uv run mcp_server
# With custom config
-MCP_LOG_LEVEL=DEBUG python start_mcp.py
+MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp
```
### Client Configuration
@@ -575,7 +575,7 @@ REQUEST_TIMEOUT=30 # seconds
tail -f mcp_server.log
# Enable debug logging
-MCP_LOG_LEVEL=DEBUG python start_mcp.py
+MCP_LOG_LEVEL=DEBUG python -m codebase_rag --mcp
```
### Tool Call Tracing
diff --git a/docs/guide/mcp/vscode.md b/docs/guide/mcp/vscode.md
index 7651a43..22e67fd 100644
--- a/docs/guide/mcp/vscode.md
+++ b/docs/guide/mcp/vscode.md
@@ -50,7 +50,7 @@ Ensure the MCP server is accessible:
```bash
# Running locally
cd /path/to/codebase-rag
-python start_mcp.py
+python -m codebase_rag --mcp
# Or via Docker
docker-compose -f docker/docker-compose.full.yml up -d
@@ -186,7 +186,7 @@ uv pip install -e .
"command": "ssh",
"args": [
"user@remote-server",
- "cd /path/to/codebase-rag && python start_mcp.py"
+ "cd /path/to/codebase-rag && python -m codebase_rag --mcp"
]
}
}
@@ -666,7 +666,7 @@ For multiple projects, use workspace folders:
2. **Verify command works**:
```bash
cd /path/to/codebase-rag
- python start_mcp.py
+ python -m codebase_rag --mcp
# Should not exit immediately
```
diff --git a/docs/guide/memory/extraction.md b/docs/guide/memory/extraction.md
index 289d6e1..9d27520 100644
--- a/docs/guide/memory/extraction.md
+++ b/docs/guide/memory/extraction.md
@@ -106,7 +106,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/conversation \
**Python Service**:
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
result = await memory_extractor.extract_from_conversation(
project_id="my-project",
@@ -284,7 +284,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/commit \
**Python Service**:
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
result = await memory_extractor.extract_from_git_commit(
project_id="my-project",
@@ -457,7 +457,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/comments \
**Python Service**:
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
result = await memory_extractor.extract_from_code_comments(
project_id="my-project",
@@ -637,7 +637,7 @@ curl -X POST http://localhost:8000/api/v1/memory/suggest \
**Python Service**:
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
result = await memory_extractor.suggest_memory_from_query(
project_id="my-project",
@@ -689,8 +689,8 @@ if result['should_save']:
### Integration with Knowledge Service
```python
-from services.neo4j_knowledge_service import knowledge_service
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.knowledge import knowledge_service
+from src.codebase_rag.services.memory import memory_extractor
async def query_with_memory_suggestion(
project_id: str,
@@ -768,7 +768,7 @@ curl -X POST http://localhost:8000/api/v1/memory/extract/batch \
**Python Service**:
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
result = await memory_extractor.batch_extract_from_repository(
project_id="my-project",
@@ -951,7 +951,7 @@ import subprocess
import sys
sys.path.insert(0, '/path/to/project')
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
async def main():
# Get commit details
@@ -1035,7 +1035,7 @@ OPENAI_API_KEY=your-key
Adjust auto-save threshold (default: 0.7):
```python
-from services.memory_extractor import memory_extractor
+from src.codebase_rag.services.memory import memory_extractor
# Lower threshold (more auto-saves)
memory_extractor.confidence_threshold = 0.6
@@ -1049,7 +1049,7 @@ memory_extractor.confidence_threshold = 0.8
Adjust processing limits:
```python
-from services.memory_extractor import MemoryExtractor
+from src.codebase_rag.services.memory import MemoryExtractor
# Custom limits
MemoryExtractor.MAX_COMMITS_TO_PROCESS = 30
diff --git a/docs/guide/memory/manual.md b/docs/guide/memory/manual.md
index bf0ff63..3379198 100644
--- a/docs/guide/memory/manual.md
+++ b/docs/guide/memory/manual.md
@@ -69,7 +69,7 @@ curl -X POST http://localhost:8000/api/v1/memory/add \
**Python Service**:
```python
-from services.memory_store import memory_store
+from src.codebase_rag.services.memory import memory_store
result = await memory_store.add_memory(
project_id="my-project",
diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md
index 4ca0268..45afc19 100644
--- a/docs/guide/memory/overview.md
+++ b/docs/guide/memory/overview.md
@@ -376,7 +376,7 @@ curl -X POST http://localhost:8000/api/v1/memory/search \
For Python applications:
```python
-from services.memory_store import memory_store
+from src.codebase_rag.services.memory import memory_store
import asyncio
async def main():
diff --git a/docs/guide/memory/search.md b/docs/guide/memory/search.md
index 85c3f1a..ec25428 100644
--- a/docs/guide/memory/search.md
+++ b/docs/guide/memory/search.md
@@ -59,7 +59,7 @@ curl -X POST http://localhost:8000/api/v1/memory/search \
**Python Service**:
```python
-from services.memory_store import memory_store
+from src.codebase_rag.services.memory import memory_store
result = await memory_store.search_memories(
project_id="my-project",
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index e0df1b1..6762e13 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -51,7 +51,7 @@ curl http://localhost:8000/api/v1/health
```bash
# Check all services
-python start.py --check
+python -m codebase_rag --check
# Check logs
tail -f logs/application.log
@@ -778,10 +778,10 @@ Error: MCP server failed to start
```bash
# Try starting manually
-python start_mcp.py
+python -m codebase_rag --mcp
# Check logs
-python start_mcp.py 2>&1 | tee mcp.log
+python -m codebase_rag --mcp 2>&1 | tee mcp.log
```
**Solutions:**
diff --git a/config/sky.yml b/examples/configs/sky.yml
similarity index 100%
rename from config/sky.yml
rename to examples/configs/sky.yml
diff --git a/mcp_tools/README.md b/mcp_tools/README.md
deleted file mode 100644
index 48ba31b..0000000
--- a/mcp_tools/README.md
+++ /dev/null
@@ -1,141 +0,0 @@
-# MCP Tools - Modular Structure
-
-This directory contains the modularized MCP Server v2 implementation. The code has been split from a single 1454-line file into logical, maintainable modules.
-
-## Directory Structure
-
-```
-mcp_tools/
-โโโ __init__.py # Package exports for all handlers and utilities
-โโโ tool_definitions.py # Tool definitions (495 lines)
-โโโ utils.py # Utility functions (140 lines)
-โโโ knowledge_handlers.py # Knowledge base handlers (135 lines)
-โโโ code_handlers.py # Code graph handlers (173 lines)
-โโโ memory_handlers.py # Memory store handlers (168 lines)
-โโโ task_handlers.py # Task management handlers (245 lines)
-โโโ system_handlers.py # System handlers (73 lines)
-โโโ resources.py # Resource handlers (84 lines)
-โโโ prompts.py # Prompt handlers (91 lines)
-```
-
-## Module Descriptions
-
-### `__init__.py`
-Central import point for the package. Exports all handlers, utilities, and definitions for use in the main server file.
-
-### `tool_definitions.py`
-Contains the `get_tool_definitions()` function that returns all 25 tool definitions organized by category:
-- Knowledge Base (5 tools)
-- Code Graph (4 tools)
-- Memory Store (7 tools)
-- Task Management (6 tools)
-- System (3 tools)
-
-### `utils.py`
-Contains the `format_result()` function that formats handler results for display, with specialized formatting for:
-- Query results with answers
-- Search results
-- Memory search results
-- Code graph results
-- Context packs
-- Task lists
-- Queue statistics
-
-### `knowledge_handlers.py`
-Handlers for knowledge base operations:
-- `handle_query_knowledge()` - Query using GraphRAG
-- `handle_search_similar_nodes()` - Vector similarity search
-- `handle_add_document()` - Add document (sync/async based on size)
-- `handle_add_file()` - Add single file
-- `handle_add_directory()` - Add directory (async)
-
-### `code_handlers.py`
-Handlers for code graph operations:
-- `handle_code_graph_ingest_repo()` - Ingest repository (full/incremental)
-- `handle_code_graph_related()` - Find related files
-- `handle_code_graph_impact()` - Analyze impact/dependencies
-- `handle_context_pack()` - Build context pack for AI agents
-
-### `memory_handlers.py`
-Handlers for memory store operations:
-- `handle_add_memory()` - Add new memory
-- `handle_search_memories()` - Search with filters
-- `handle_get_memory()` - Get by ID
-- `handle_update_memory()` - Update existing
-- `handle_delete_memory()` - Soft delete
-- `handle_supersede_memory()` - Replace with history
-- `handle_get_project_summary()` - Project overview
-
-### `task_handlers.py`
-Handlers for task queue operations:
-- `handle_get_task_status()` - Get single task status
-- `handle_watch_task()` - Monitor task until completion
-- `handle_watch_tasks()` - Monitor multiple tasks
-- `handle_list_tasks()` - List with filters
-- `handle_cancel_task()` - Cancel task
-- `handle_get_queue_stats()` - Queue statistics
-
-### `system_handlers.py`
-Handlers for system operations:
-- `handle_get_graph_schema()` - Get Neo4j schema
-- `handle_get_statistics()` - Get KB statistics
-- `handle_clear_knowledge_base()` - Clear all data (dangerous)
-
-### `resources.py`
-MCP resource handlers:
-- `get_resource_list()` - List available resources
-- `read_resource_content()` - Read resource content (config, status)
-
-### `prompts.py`
-MCP prompt handlers:
-- `get_prompt_list()` - List available prompts
-- `get_prompt_content()` - Get prompt content (suggest_queries)
-
-## Service Injection Pattern
-
-All handlers use dependency injection for services. Services are passed as parameters from the main server file:
-
-```python
-# Example from knowledge_handlers.py
-async def handle_query_knowledge(args: Dict, knowledge_service) -> Dict:
- result = await knowledge_service.query(
- question=args["question"],
- mode=args.get("mode", "hybrid")
- )
- return result
-
-# Called from mcp_server_v2.py
-result = await handle_query_knowledge(arguments, knowledge_service)
-```
-
-This pattern:
-- Keeps handlers testable (easy to mock services)
-- Makes dependencies explicit
-- Allows handlers to be pure functions
-- Enables better code organization
-
-## Main Server File
-
-The main `mcp_server_v2.py` (310 lines) is now much cleaner:
-- Imports all handlers from `mcp_tools`
-- Initializes services
-- Routes tool calls to appropriate handlers
-- Handles resources and prompts
-
-## Benefits of Modularization
-
-1. **Maintainability**: Each module has a single responsibility
-2. **Readability**: Easier to find and understand code
-3. **Testability**: Modules can be tested independently
-4. **Scalability**: Easy to add new handlers without cluttering main file
-5. **Reusability**: Handlers can potentially be reused in other contexts
-
-## Usage
-
-The modularization is transparent to users. The server is used exactly the same way:
-
-```bash
-python start_mcp_v2.py
-```
-
-All tools, resources, and prompts work identically to the previous implementation.
diff --git a/mcp_tools/__init__.py b/mcp_tools/__init__.py
deleted file mode 100644
index a47defd..0000000
--- a/mcp_tools/__init__.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""
-MCP Tools Package
-
-This package contains modularized handlers for MCP Server v2.
-All tool handlers, utilities, and definitions are organized into logical modules.
-"""
-
-# Knowledge base handlers
-from .knowledge_handlers import (
- handle_query_knowledge,
- handle_search_similar_nodes,
- handle_add_document,
- handle_add_file,
- handle_add_directory,
-)
-
-# Code graph handlers
-from .code_handlers import (
- handle_code_graph_ingest_repo,
- handle_code_graph_related,
- handle_code_graph_impact,
- handle_context_pack,
-)
-
-# Memory store handlers
-from .memory_handlers import (
- handle_add_memory,
- handle_search_memories,
- handle_get_memory,
- handle_update_memory,
- handle_delete_memory,
- handle_supersede_memory,
- handle_get_project_summary,
- # v0.7 Automatic extraction
- handle_extract_from_conversation,
- handle_extract_from_git_commit,
- handle_extract_from_code_comments,
- handle_suggest_memory_from_query,
- handle_batch_extract_from_repository,
-)
-
-# Task management handlers
-from .task_handlers import (
- handle_get_task_status,
- handle_watch_task,
- handle_watch_tasks,
- handle_list_tasks,
- handle_cancel_task,
- handle_get_queue_stats,
-)
-
-# System handlers
-from .system_handlers import (
- handle_get_graph_schema,
- handle_get_statistics,
- handle_clear_knowledge_base,
-)
-
-# Tool definitions
-from .tool_definitions import get_tool_definitions
-
-# Utilities
-from .utils import format_result
-
-# Resources
-from .resources import get_resource_list, read_resource_content
-
-# Prompts
-from .prompts import get_prompt_list, get_prompt_content
-
-
-__all__ = [
- # Knowledge handlers
- "handle_query_knowledge",
- "handle_search_similar_nodes",
- "handle_add_document",
- "handle_add_file",
- "handle_add_directory",
- # Code handlers
- "handle_code_graph_ingest_repo",
- "handle_code_graph_related",
- "handle_code_graph_impact",
- "handle_context_pack",
- # Memory handlers
- "handle_add_memory",
- "handle_search_memories",
- "handle_get_memory",
- "handle_update_memory",
- "handle_delete_memory",
- "handle_supersede_memory",
- "handle_get_project_summary",
- # v0.7 Extraction handlers
- "handle_extract_from_conversation",
- "handle_extract_from_git_commit",
- "handle_extract_from_code_comments",
- "handle_suggest_memory_from_query",
- "handle_batch_extract_from_repository",
- # Task handlers
- "handle_get_task_status",
- "handle_watch_task",
- "handle_watch_tasks",
- "handle_list_tasks",
- "handle_cancel_task",
- "handle_get_queue_stats",
- # System handlers
- "handle_get_graph_schema",
- "handle_get_statistics",
- "handle_clear_knowledge_base",
- # Tool definitions
- "get_tool_definitions",
- # Utilities
- "format_result",
- # Resources
- "get_resource_list",
- "read_resource_content",
- # Prompts
- "get_prompt_list",
- "get_prompt_content",
-]
diff --git a/mkdocs.yml b/mkdocs.yml
index 9bc4a4e..b88e061 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,10 +1,12 @@
site_name: Code Graph Knowledge System
-site_url: https://code-graph.vantagecraft.dev
+site_url: https://vantagecraft.dev/docs/code-graph/
site_description: AI-powered code intelligence and knowledge management system
site_author: VantageCraft
repo_url: https://github.com/royisme/codebase-rag
repo_name: codebase-rag
edit_uri: edit/main/docs/
+use_directory_urls: true
+
theme:
name: material
@@ -170,6 +172,7 @@ nav:
- MCP Tools: api/mcp-tools.md
- Python SDK: api/python-sdk.md
- Architecture:
+ - Overview: architecture/overview.md
- System Design: architecture/design.md
- Components: architecture/components.md
- Data Flow: architecture/dataflow.md
@@ -177,6 +180,7 @@ nav:
- Contributing: development/contributing.md
- Development Setup: development/setup.md
- Testing: development/testing.md
+ - Migration Guide (v0.8.0): development/migration-guide.md
- Version Management: development/version-management.md
- Changelog Automation: development/changelog-automation.md
- Release Process: development/release.md
diff --git a/pyproject.toml b/pyproject.toml
index 75a1d1a..f41e2e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,12 +50,15 @@ dev = [
]
[project.scripts]
-server = "start:main"
-mcp_client = "start_mcp:main"
+codebase-rag = "codebase_rag.__main__:main"
+codebase-rag-web = "codebase_rag.server.web:main"
+codebase-rag-mcp = "codebase_rag.server.mcp:main"
[tool.setuptools]
-packages = ["api", "core", "services", "mcp_tools"]
-py-modules = ["start", "start_mcp", "mcp_server", "config", "main"]
+packages = {find = {where = ["src"]}}
+
+[tool.setuptools.package-data]
+codebase_rag = ["py.typed"]
[tool.pytest.ini_options]
minversion = "6.0"
@@ -73,7 +76,7 @@ asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
[tool.coverage.run]
-source = ["mcp_tools", "services", "api", "core"]
+source = ["src/codebase_rag"]
omit = [
"*/tests/*",
"*/test_*.py",
diff --git a/build-frontend.sh b/scripts/build-frontend.sh
similarity index 98%
rename from build-frontend.sh
rename to scripts/build-frontend.sh
index 42e3f44..043ab2f 100755
--- a/build-frontend.sh
+++ b/scripts/build-frontend.sh
@@ -8,7 +8,7 @@
# to the Docker image's /app/static directory.
#
# Usage:
-# ./build-frontend.sh [--clean]
+# ./scripts/build-frontend.sh [--clean]
#
# Options:
# --clean Clean node_modules and dist before building
diff --git a/scripts/bump-version.sh b/scripts/bump-version.sh
index f8453c4..491d7c0 100755
--- a/scripts/bump-version.sh
+++ b/scripts/bump-version.sh
@@ -81,11 +81,11 @@ if [[ -z "$DRY_RUN" ]]; then
echo -e "${YELLOW}This will:${NC}"
if [[ "$GENERATE_CHANGELOG" == true ]]; then
echo " 1. Generate changelog from git commits"
- echo " 2. Update version in pyproject.toml, src/__version__.py"
+ echo " 2. Update version in pyproject.toml, src/codebase_rag/__version__.py"
echo " 3. Create a git commit"
echo " 4. Create a git tag v$NEW_VERSION"
else
- echo " 1. Update version in pyproject.toml, src/__version__.py"
+ echo " 1. Update version in pyproject.toml, src/codebase_rag/__version__.py"
echo " 2. Create a git commit"
echo " 3. Create a git tag v$NEW_VERSION"
fi
diff --git a/docker-start.sh b/scripts/docker-start.sh
similarity index 97%
rename from docker-start.sh
rename to scripts/docker-start.sh
index 0930b59..24560d5 100755
--- a/docker-start.sh
+++ b/scripts/docker-start.sh
@@ -148,5 +148,5 @@ echo -e "${YELLOW}Useful commands:${NC}"
echo -e " View logs: docker compose logs -f"
echo -e " Stop services: docker compose down"
echo -e " Restart: docker compose restart"
-echo -e " Bootstrap Neo4j: docker compose exec app python -c 'from services.graph_service import graph_service; graph_service._setup_schema()'"
+echo -e " Bootstrap Neo4j: docker compose exec app python -c 'from src.codebase_rag.services.graph import graph_service; graph_service._setup_schema()'"
echo ""
diff --git a/docker-stop.sh b/scripts/docker-stop.sh
similarity index 100%
rename from docker-stop.sh
rename to scripts/docker-stop.sh
diff --git a/scripts/neo4j_bootstrap.sh b/scripts/neo4j_bootstrap.sh
index 9760ef9..64862a2 100755
--- a/scripts/neo4j_bootstrap.sh
+++ b/scripts/neo4j_bootstrap.sh
@@ -17,7 +17,7 @@ NC='\033[0m' # No Color
# Script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-SCHEMA_FILE="$PROJECT_ROOT/services/graph/schema.cypher"
+SCHEMA_FILE="$PROJECT_ROOT/src/codebase_rag/services/graph/schema.cypher"
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN}Neo4j Schema Bootstrap${NC}"
@@ -152,7 +152,7 @@ if __name__ == "__main__":
user = os.getenv("NEO4J_USER", "neo4j")
password = os.getenv("NEO4J_PASSWORD", "password")
database = os.getenv("NEO4J_DATABASE", "neo4j")
- schema_file = sys.argv[1] if len(sys.argv) > 1 else "services/graph/schema.cypher"
+ schema_file = sys.argv[1] if len(sys.argv) > 1 else "src/codebase_rag/services/graph/schema.cypher"
print(f"Connecting to {uri} as {user}...")
apply_schema(uri, user, password, database, schema_file)
diff --git a/services/__init__.py b/services/__init__.py
deleted file mode 100644
index 3a86e8d..0000000
--- a/services/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Services module initialization
\ No newline at end of file
diff --git a/src/codebase_rag/__init__.py b/src/codebase_rag/__init__.py
new file mode 100644
index 0000000..115c339
--- /dev/null
+++ b/src/codebase_rag/__init__.py
@@ -0,0 +1,26 @@
+"""
+Codebase RAG - Code Knowledge Graph and RAG System.
+
+A comprehensive system for code analysis, knowledge extraction, and RAG-based querying.
+Supports MCP protocol for AI assistant integration.
+"""
+
+from codebase_rag.__version__ import (
+ __version__,
+ __version_info__,
+ get_version,
+ get_version_info,
+ get_features,
+ FEATURES,
+ DEPLOYMENT_MODES,
+)
+
+__all__ = [
+ "__version__",
+ "__version_info__",
+ "get_version",
+ "get_version_info",
+ "get_features",
+ "FEATURES",
+ "DEPLOYMENT_MODES",
+]
diff --git a/src/codebase_rag/__main__.py b/src/codebase_rag/__main__.py
new file mode 100644
index 0000000..042137a
--- /dev/null
+++ b/src/codebase_rag/__main__.py
@@ -0,0 +1,56 @@
+"""
+Main entry point for codebase-rag package.
+
+Usage:
+ python -m codebase_rag [--web|--mcp|--version]
+"""
+
+import sys
+import argparse
+
+
+def main():
+ """Main entry point for the package."""
+ parser = argparse.ArgumentParser(
+ description="Codebase RAG - Code Knowledge Graph and RAG System"
+ )
+ parser.add_argument(
+ "--version",
+ action="store_true",
+ help="Show version information",
+ )
+ parser.add_argument(
+ "--web",
+ action="store_true",
+ help="Start web server (FastAPI)",
+ )
+ parser.add_argument(
+ "--mcp",
+ action="store_true",
+ help="Start MCP server",
+ )
+
+ args = parser.parse_args()
+
+ if args.version:
+ from codebase_rag import __version__
+ print(f"codebase-rag version {__version__}")
+ return 0
+
+ if args.mcp:
+ # Run MCP server
+ print("Starting MCP server...")
+ from codebase_rag.server.mcp import main as mcp_main
+ return mcp_main()
+
+ if args.web or not any([args.web, args.mcp, args.version]):
+ # Default: start web server
+ print("Starting web server...")
+ from codebase_rag.server.web import main as web_main
+ return web_main()
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/src/__version__.py b/src/codebase_rag/__version__.py
similarity index 100%
rename from src/__version__.py
rename to src/codebase_rag/__version__.py
diff --git a/api/__init__.py b/src/codebase_rag/api/__init__.py
similarity index 100%
rename from api/__init__.py
rename to src/codebase_rag/api/__init__.py
diff --git a/api/memory_routes.py b/src/codebase_rag/api/memory_routes.py
similarity index 99%
rename from api/memory_routes.py
rename to src/codebase_rag/api/memory_routes.py
index 0445b68..ae779c4 100644
--- a/api/memory_routes.py
+++ b/src/codebase_rag/api/memory_routes.py
@@ -11,8 +11,7 @@
from pydantic import BaseModel, Field
from typing import Optional, List, Dict, Any, Literal
-from services.memory_store import memory_store
-from services.memory_extractor import memory_extractor
+from codebase_rag.services.memory import memory_store, memory_extractor
from loguru import logger
diff --git a/api/neo4j_routes.py b/src/codebase_rag/api/neo4j_routes.py
similarity index 98%
rename from api/neo4j_routes.py
rename to src/codebase_rag/api/neo4j_routes.py
index dfd011c..326c5d4 100644
--- a/api/neo4j_routes.py
+++ b/src/codebase_rag/api/neo4j_routes.py
@@ -8,7 +8,7 @@
import tempfile
import os
-from services.neo4j_knowledge_service import neo4j_knowledge_service
+from codebase_rag.services.knowledge import neo4j_knowledge_service
router = APIRouter(prefix="/neo4j-knowledge", tags=["Neo4j Knowledge Graph"])
diff --git a/api/routes.py b/src/codebase_rag/api/routes.py
similarity index 98%
rename from api/routes.py
rename to src/codebase_rag/api/routes.py
index 072acd7..2bdc710 100644
--- a/api/routes.py
+++ b/src/codebase_rag/api/routes.py
@@ -5,17 +5,12 @@
import uuid
from datetime import datetime
-from services.sql_parser import sql_analyzer
-from services.graph_service import graph_service
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
-from services.universal_sql_schema_parser import parse_sql_schema_smart
-from services.task_queue import task_queue
-from services.code_ingestor import get_code_ingestor
-from services.git_utils import git_utils
-from services.ranker import ranker
-from services.pack_builder import pack_builder
-from services.metrics import metrics_service
-from config import settings
+from codebase_rag.services.sql import sql_analyzer, parse_sql_schema_smart
+from codebase_rag.services.code import graph_service, get_code_ingestor, pack_builder
+from codebase_rag.services.knowledge import Neo4jKnowledgeService
+from codebase_rag.services.tasks import task_queue
+from codebase_rag.services.utils import git_utils, ranker, metrics_service
+from codebase_rag.config import settings
from loguru import logger
# create router
diff --git a/api/sse_routes.py b/src/codebase_rag/api/sse_routes.py
similarity index 99%
rename from api/sse_routes.py
rename to src/codebase_rag/api/sse_routes.py
index 9e123ad..84c1921 100644
--- a/api/sse_routes.py
+++ b/src/codebase_rag/api/sse_routes.py
@@ -9,7 +9,7 @@
from fastapi.responses import StreamingResponse
from loguru import logger
-from services.task_queue import task_queue, TaskStatus
+from codebase_rag.services.tasks import task_queue, TaskStatus
router = APIRouter(prefix="/sse", tags=["SSE"])
diff --git a/api/task_routes.py b/src/codebase_rag/api/task_routes.py
similarity index 99%
rename from api/task_routes.py
rename to src/codebase_rag/api/task_routes.py
index 9956272..1e86e6a 100644
--- a/api/task_routes.py
+++ b/src/codebase_rag/api/task_routes.py
@@ -9,10 +9,9 @@
from pydantic import BaseModel
from datetime import datetime
-from services.task_queue import task_queue, TaskStatus
-from services.task_storage import TaskType
+from codebase_rag.services.tasks import task_queue, TaskStatus, TaskType
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
router = APIRouter(prefix="/tasks", tags=["Task Management"])
diff --git a/api/websocket_routes.py b/src/codebase_rag/api/websocket_routes.py
similarity index 99%
rename from api/websocket_routes.py
rename to src/codebase_rag/api/websocket_routes.py
index 9531d47..94a80bd 100644
--- a/api/websocket_routes.py
+++ b/src/codebase_rag/api/websocket_routes.py
@@ -9,7 +9,7 @@
import json
from loguru import logger
-from services.task_queue import task_queue
+from codebase_rag.services.tasks import task_queue
router = APIRouter()
diff --git a/src/codebase_rag/config/__init__.py b/src/codebase_rag/config/__init__.py
new file mode 100644
index 0000000..188a239
--- /dev/null
+++ b/src/codebase_rag/config/__init__.py
@@ -0,0 +1,28 @@
+"""
+Configuration module for Codebase RAG.
+
+This module exports all configuration-related objects and functions.
+"""
+
+from codebase_rag.config.settings import Settings, settings
+from codebase_rag.config.validation import (
+ validate_neo4j_connection,
+ validate_ollama_connection,
+ validate_openai_connection,
+ validate_gemini_connection,
+ validate_openrouter_connection,
+ get_current_model_info,
+)
+
+__all__ = [
+ # Settings
+ "Settings",
+ "settings",
+ # Validation functions
+ "validate_neo4j_connection",
+ "validate_ollama_connection",
+ "validate_openai_connection",
+ "validate_gemini_connection",
+ "validate_openrouter_connection",
+ "get_current_model_info",
+]
diff --git a/config.py b/src/codebase_rag/config/settings.py
similarity index 58%
rename from config.py
rename to src/codebase_rag/config/settings.py
index b1625b8..ab9cf0f 100644
--- a/config.py
+++ b/src/codebase_rag/config/settings.py
@@ -1,7 +1,15 @@
+"""
+Configuration settings for Codebase RAG.
+
+This module defines all application settings using Pydantic Settings.
+Settings can be configured via environment variables or .env file.
+"""
+
from pydantic_settings import BaseSettings
from pydantic import Field
from typing import Optional, Literal
+
class Settings(BaseSettings):
# Application Settings
app_name: str = "Code Graph Knowledge Service"
@@ -19,197 +27,92 @@ class Settings(BaseSettings):
# Vector Search Settings (using Neo4j built-in vector index)
vector_index_name: str = Field(default="knowledge_vectors", description="Neo4j vector index name")
vector_dimension: int = Field(default=384, description="Vector embedding dimension")
-
+
# Neo4j Graph Database
neo4j_uri: str = Field(default="bolt://localhost:7687", description="Neo4j connection URI", alias="NEO4J_URI")
neo4j_username: str = Field(default="neo4j", description="Neo4j username", alias="NEO4J_USER")
neo4j_password: str = Field(default="password", description="Neo4j password", alias="NEO4J_PASSWORD")
neo4j_database: str = Field(default="neo4j", description="Neo4j database name")
-
+
# LLM Provider Configuration
llm_provider: Literal["ollama", "openai", "gemini", "openrouter"] = Field(
- default="ollama",
- description="LLM provider to use",
+ default="ollama",
+ description="LLM provider to use",
alias="LLM_PROVIDER"
)
-
+
# Ollama LLM Service
ollama_base_url: str = Field(default="http://localhost:11434", description="Ollama service URL", alias="OLLAMA_HOST")
ollama_model: str = Field(default="llama2", description="Ollama model name", alias="OLLAMA_MODEL")
-
+
# OpenAI Configuration
openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key", alias="OPENAI_API_KEY")
openai_model: str = Field(default="gpt-3.5-turbo", description="OpenAI model name", alias="OPENAI_MODEL")
openai_base_url: Optional[str] = Field(default=None, description="OpenAI API base URL", alias="OPENAI_BASE_URL")
-
+
# Google Gemini Configuration
google_api_key: Optional[str] = Field(default=None, description="Google API key", alias="GOOGLE_API_KEY")
gemini_model: str = Field(default="gemini-pro", description="Gemini model name", alias="GEMINI_MODEL")
-
+
# OpenRouter Configuration
openrouter_api_key: Optional[str] = Field(default=None, description="OpenRouter API key", alias="OPENROUTER_API_KEY")
openrouter_base_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API base URL", alias="OPENROUTER_BASE_URL")
openrouter_model: Optional[str] = Field(default="openai/gpt-3.5-turbo", description="OpenRouter model", alias="OPENROUTER_MODEL")
openrouter_max_tokens: int = Field(default=2048, description="OpenRouter max tokens for completion", alias="OPENROUTER_MAX_TOKENS")
-
+
# Embedding Provider Configuration
embedding_provider: Literal["ollama", "openai", "gemini", "huggingface", "openrouter"] = Field(
- default="ollama",
- description="Embedding provider to use",
+ default="ollama",
+ description="Embedding provider to use",
alias="EMBEDDING_PROVIDER"
)
-
+
# Ollama Embedding
ollama_embedding_model: str = Field(default="nomic-embed-text", description="Ollama embedding model", alias="OLLAMA_EMBEDDING_MODEL")
-
+
# OpenAI Embedding
openai_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenAI embedding model", alias="OPENAI_EMBEDDING_MODEL")
-
+
# Gemini Embedding
gemini_embedding_model: str = Field(default="models/embedding-001", description="Gemini embedding model", alias="GEMINI_EMBEDDING_MODEL")
-
+
# HuggingFace Embedding
huggingface_embedding_model: str = Field(default="BAAI/bge-small-en-v1.5", description="HuggingFace embedding model", alias="HF_EMBEDDING_MODEL")
-
+
# OpenRouter Embedding
openrouter_embedding_model: str = Field(default="text-embedding-ada-002", description="OpenRouter embedding model", alias="OPENROUTER_EMBEDDING_MODEL")
-
+
# Model Parameters
temperature: float = Field(default=0.1, description="LLM temperature")
max_tokens: int = Field(default=2048, description="Maximum tokens for LLM response")
-
+
# RAG Settings
chunk_size: int = Field(default=512, description="Text chunk size for processing")
chunk_overlap: int = Field(default=50, description="Chunk overlap size")
top_k: int = Field(default=5, description="Top K results for retrieval")
-
+
# Timeout Settings
connection_timeout: int = Field(default=30, description="Connection timeout in seconds")
operation_timeout: int = Field(default=120, description="Operation timeout in seconds")
large_document_timeout: int = Field(default=300, description="Large document processing timeout in seconds")
-
+
# Document Processing Settings
max_document_size: int = Field(default=10 * 1024 * 1024, description="Maximum document size in bytes (10MB)")
max_payload_size: int = Field(default=50 * 1024 * 1024, description="Maximum task payload size for storage (50MB)")
-
+
# API Settings
cors_origins: list = Field(default=["*"], description="CORS allowed origins")
api_key: Optional[str] = Field(default=None, description="API authentication key")
-
+
# logging
log_file: Optional[str] = Field(default="app.log", description="Log file path")
log_level: str = Field(default="INFO", description="Log level")
-
+
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
- extra = "ignore" # ๅฟฝ็ฅ้ขๅค็ๅญๆฎต๏ผ้ฟๅ
้ช่ฏ้่ฏฏ
+ extra = "ignore" # Ignore extra fields to avoid validation errors
+
# Global settings instance
settings = Settings()
-
-# Validation functions
-
-def validate_neo4j_connection():
- """Validate Neo4j connection parameters"""
- try:
- from neo4j import GraphDatabase
- driver = GraphDatabase.driver(
- settings.neo4j_uri,
- auth=(settings.neo4j_username, settings.neo4j_password)
- )
- with driver.session() as session:
- session.run("RETURN 1")
- driver.close()
- return True
- except Exception as e:
- print(f"Neo4j connection failed: {e}")
- return False
-
-def validate_ollama_connection():
- """Validate Ollama service connection"""
- try:
- import httpx
- response = httpx.get(f"{settings.ollama_base_url}/api/tags")
- return response.status_code == 200
- except Exception as e:
- print(f"Ollama connection failed: {e}")
- return False
-
-def validate_openai_connection():
- """Validate OpenAI API connection"""
- if not settings.openai_api_key:
- print("OpenAI API key not provided")
- return False
- try:
- import openai
- client = openai.OpenAI(
- api_key=settings.openai_api_key,
- base_url=settings.openai_base_url
- )
- # Test with a simple completion
- response = client.chat.completions.create(
- model=settings.openai_model,
- messages=[{"role": "user", "content": "test"}],
- max_tokens=1
- )
- return True
- except Exception as e:
- print(f"OpenAI connection failed: {e}")
- return False
-
-def validate_gemini_connection():
- """Validate Google Gemini API connection"""
- if not settings.google_api_key:
- print("Google API key not provided")
- return False
- try:
- import google.generativeai as genai
- genai.configure(api_key=settings.google_api_key)
- model = genai.GenerativeModel(settings.gemini_model)
- # Test with a simple generation
- response = model.generate_content("test")
- return True
- except Exception as e:
- print(f"Gemini connection failed: {e}")
- return False
-
-def validate_openrouter_connection():
- """Validate OpenRouter API connection"""
- if not settings.openrouter_api_key:
- print("OpenRouter API key not provided")
- return False
- try:
- import httpx
- # We'll use the models endpoint to check the connection
- headers = {
- "Authorization": f"Bearer {settings.openrouter_api_key}",
- # OpenRouter requires these headers for identification
- "HTTP-Referer": "CodeGraphKnowledgeService",
- "X-Title": "CodeGraph Knowledge Service"
- }
- response = httpx.get("https://openrouter.ai/api/v1/models", headers=headers)
- return response.status_code == 200
- except Exception as e:
- print(f"OpenRouter connection failed: {e}")
- return False
-
-def get_current_model_info():
- """Get information about currently configured models"""
- return {
- "llm_provider": settings.llm_provider,
- "llm_model": {
- "ollama": settings.ollama_model,
- "openai": settings.openai_model,
- "gemini": settings.gemini_model,
- "openrouter": settings.openrouter_model
- }.get(settings.llm_provider),
- "embedding_provider": settings.embedding_provider,
- "embedding_model": {
- "ollama": settings.ollama_embedding_model,
- "openai": settings.openai_embedding_model,
- "gemini": settings.gemini_embedding_model,
- "huggingface": settings.huggingface_embedding_model,
- "openrouter": settings.openrouter_embedding_model
- }.get(settings.embedding_provider)
- }
diff --git a/src/codebase_rag/config/validation.py b/src/codebase_rag/config/validation.py
new file mode 100644
index 0000000..087bec1
--- /dev/null
+++ b/src/codebase_rag/config/validation.py
@@ -0,0 +1,118 @@
+"""
+Validation functions for configuration settings.
+
+This module provides functions to validate connections to various services
+like Neo4j, Ollama, OpenAI, Gemini, and OpenRouter.
+"""
+
+from codebase_rag.config.settings import settings
+
+
+def validate_neo4j_connection() -> bool:
+ """Validate Neo4j connection parameters"""
+ try:
+ from neo4j import GraphDatabase
+ driver = GraphDatabase.driver(
+ settings.neo4j_uri,
+ auth=(settings.neo4j_username, settings.neo4j_password)
+ )
+ with driver.session() as session:
+ session.run("RETURN 1")
+ driver.close()
+ return True
+ except Exception as e:
+ print(f"Neo4j connection failed: {e}")
+ return False
+
+
+def validate_ollama_connection() -> bool:
+ """Validate Ollama service connection"""
+ try:
+ import httpx
+ response = httpx.get(f"{settings.ollama_base_url}/api/tags")
+ return response.status_code == 200
+ except Exception as e:
+ print(f"Ollama connection failed: {e}")
+ return False
+
+
+def validate_openai_connection() -> bool:
+ """Validate OpenAI API connection"""
+ if not settings.openai_api_key:
+ print("OpenAI API key not provided")
+ return False
+ try:
+ import openai
+ client = openai.OpenAI(
+ api_key=settings.openai_api_key,
+ base_url=settings.openai_base_url
+ )
+ # Test with a simple completion
+ response = client.chat.completions.create(
+ model=settings.openai_model,
+ messages=[{"role": "user", "content": "test"}],
+ max_tokens=1
+ )
+ return True
+ except Exception as e:
+ print(f"OpenAI connection failed: {e}")
+ return False
+
+
+def validate_gemini_connection() -> bool:
+ """Validate Google Gemini API connection"""
+ if not settings.google_api_key:
+ print("Google API key not provided")
+ return False
+ try:
+ import google.generativeai as genai
+ genai.configure(api_key=settings.google_api_key)
+ model = genai.GenerativeModel(settings.gemini_model)
+ # Test with a simple generation
+ response = model.generate_content("test")
+ return True
+ except Exception as e:
+ print(f"Gemini connection failed: {e}")
+ return False
+
+
+def validate_openrouter_connection() -> bool:
+ """Validate OpenRouter API connection"""
+ if not settings.openrouter_api_key:
+ print("OpenRouter API key not provided")
+ return False
+ try:
+ import httpx
+ # We'll use the models endpoint to check the connection
+ headers = {
+ "Authorization": f"Bearer {settings.openrouter_api_key}",
+ # OpenRouter requires these headers for identification
+ "HTTP-Referer": "CodeGraphKnowledgeService",
+ "X-Title": "CodeGraph Knowledge Service"
+ }
+ response = httpx.get("https://openrouter.ai/api/v1/models", headers=headers)
+ return response.status_code == 200
+ except Exception as e:
+ print(f"OpenRouter connection failed: {e}")
+ return False
+
+
+def get_current_model_info() -> dict:
+ """Get information about currently configured models"""
+ return {
+ "llm_provider": settings.llm_provider,
+ "llm_model": {
+ "ollama": settings.ollama_model,
+ "openai": settings.openai_model,
+ "gemini": settings.gemini_model,
+ "openrouter": settings.openrouter_model
+ }.get(settings.llm_provider),
+ "embedding_provider": settings.embedding_provider,
+ "embedding_model": {
+ "ollama": settings.ollama_embedding_model,
+ "openai": settings.openai_embedding_model,
+ "gemini": settings.gemini_embedding_model,
+ "huggingface": settings.huggingface_embedding_model,
+ "openrouter": settings.openrouter_embedding_model
+ }.get(settings.embedding_provider)
+ }
diff --git a/core/__init__.py b/src/codebase_rag/core/__init__.py
similarity index 100%
rename from core/__init__.py
rename to src/codebase_rag/core/__init__.py
diff --git a/core/app.py b/src/codebase_rag/core/app.py
similarity index 99%
rename from core/app.py
rename to src/codebase_rag/core/app.py
index 82475ac..2e4cc75 100644
--- a/core/app.py
+++ b/src/codebase_rag/core/app.py
@@ -15,7 +15,7 @@
from loguru import logger
import os
-from config import settings
+from codebase_rag.config import settings
from .exception_handlers import setup_exception_handlers
from .middleware import setup_middleware
from .routes import setup_routes
diff --git a/core/exception_handlers.py b/src/codebase_rag/core/exception_handlers.py
similarity index 96%
rename from core/exception_handlers.py
rename to src/codebase_rag/core/exception_handlers.py
index 97aa766..80c4d67 100644
--- a/core/exception_handlers.py
+++ b/src/codebase_rag/core/exception_handlers.py
@@ -6,7 +6,7 @@
from fastapi.responses import JSONResponse
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
def setup_exception_handlers(app: FastAPI) -> None:
diff --git a/core/lifespan.py b/src/codebase_rag/core/lifespan.py
similarity index 90%
rename from core/lifespan.py
rename to src/codebase_rag/core/lifespan.py
index 0a35c49..cf81b1d 100644
--- a/core/lifespan.py
+++ b/src/codebase_rag/core/lifespan.py
@@ -6,10 +6,9 @@
from fastapi import FastAPI
from loguru import logger
-from services.neo4j_knowledge_service import neo4j_knowledge_service
-from services.task_queue import task_queue
-from services.task_processors import processor_registry
-from services.memory_store import memory_store
+from codebase_rag.services.knowledge import neo4j_knowledge_service
+from codebase_rag.services.tasks import task_queue, processor_registry
+from codebase_rag.services.memory import memory_store
@asynccontextmanager
diff --git a/core/logging.py b/src/codebase_rag/core/logging.py
similarity index 96%
rename from core/logging.py
rename to src/codebase_rag/core/logging.py
index 5725a9b..104a6e3 100644
--- a/core/logging.py
+++ b/src/codebase_rag/core/logging.py
@@ -5,7 +5,7 @@
import sys
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
def setup_logging():
diff --git a/core/mcp_sse.py b/src/codebase_rag/core/mcp_sse.py
similarity index 100%
rename from core/mcp_sse.py
rename to src/codebase_rag/core/mcp_sse.py
diff --git a/core/middleware.py b/src/codebase_rag/core/middleware.py
similarity index 93%
rename from core/middleware.py
rename to src/codebase_rag/core/middleware.py
index 7c921e1..c6cc80d 100644
--- a/core/middleware.py
+++ b/src/codebase_rag/core/middleware.py
@@ -6,7 +6,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
-from config import settings
+from codebase_rag.config import settings
def setup_middleware(app: FastAPI) -> None:
diff --git a/core/routes.py b/src/codebase_rag/core/routes.py
similarity index 59%
rename from core/routes.py
rename to src/codebase_rag/core/routes.py
index 6818e04..373c3f0 100644
--- a/core/routes.py
+++ b/src/codebase_rag/core/routes.py
@@ -4,12 +4,12 @@
from fastapi import FastAPI
-from api.routes import router
-from api.neo4j_routes import router as neo4j_router
-from api.task_routes import router as task_router
-from api.websocket_routes import router as ws_router
-from api.sse_routes import router as sse_router
-from api.memory_routes import router as memory_router
+from codebase_rag.api.routes import router
+from codebase_rag.api.neo4j_routes import router as neo4j_router
+from codebase_rag.api.task_routes import router as task_router
+from codebase_rag.api.websocket_routes import router as ws_router
+from codebase_rag.api.sse_routes import router as sse_router
+from codebase_rag.api.memory_routes import router as memory_router
def setup_routes(app: FastAPI) -> None:
diff --git a/src/codebase_rag/mcp/__init__.py b/src/codebase_rag/mcp/__init__.py
new file mode 100644
index 0000000..55814f3
--- /dev/null
+++ b/src/codebase_rag/mcp/__init__.py
@@ -0,0 +1,9 @@
+"""
+MCP (Model Context Protocol) implementation for Codebase RAG.
+
+This module provides the MCP server and handlers for AI assistant integration.
+"""
+
+from codebase_rag.mcp import handlers, tools, resources, prompts, utils
+
+__all__ = ["handlers", "tools", "resources", "prompts", "utils"]
diff --git a/src/codebase_rag/mcp/handlers/__init__.py b/src/codebase_rag/mcp/handlers/__init__.py
new file mode 100644
index 0000000..914b688
--- /dev/null
+++ b/src/codebase_rag/mcp/handlers/__init__.py
@@ -0,0 +1,11 @@
+"""MCP request handlers."""
+
+from codebase_rag.mcp.handlers import (
+ knowledge,
+ code,
+ memory,
+ tasks,
+ system,
+)
+
+__all__ = ["knowledge", "code", "memory", "tasks", "system"]
diff --git a/mcp_tools/code_handlers.py b/src/codebase_rag/mcp/handlers/code.py
similarity index 100%
rename from mcp_tools/code_handlers.py
rename to src/codebase_rag/mcp/handlers/code.py
diff --git a/mcp_tools/knowledge_handlers.py b/src/codebase_rag/mcp/handlers/knowledge.py
similarity index 100%
rename from mcp_tools/knowledge_handlers.py
rename to src/codebase_rag/mcp/handlers/knowledge.py
diff --git a/mcp_tools/memory_handlers.py b/src/codebase_rag/mcp/handlers/memory.py
similarity index 100%
rename from mcp_tools/memory_handlers.py
rename to src/codebase_rag/mcp/handlers/memory.py
diff --git a/mcp_tools/system_handlers.py b/src/codebase_rag/mcp/handlers/system.py
similarity index 100%
rename from mcp_tools/system_handlers.py
rename to src/codebase_rag/mcp/handlers/system.py
diff --git a/mcp_tools/task_handlers.py b/src/codebase_rag/mcp/handlers/tasks.py
similarity index 100%
rename from mcp_tools/task_handlers.py
rename to src/codebase_rag/mcp/handlers/tasks.py
diff --git a/mcp_tools/prompts.py b/src/codebase_rag/mcp/prompts.py
similarity index 100%
rename from mcp_tools/prompts.py
rename to src/codebase_rag/mcp/prompts.py
diff --git a/mcp_tools/resources.py b/src/codebase_rag/mcp/resources.py
similarity index 100%
rename from mcp_tools/resources.py
rename to src/codebase_rag/mcp/resources.py
diff --git a/mcp_server.py b/src/codebase_rag/mcp/server.py
similarity index 83%
rename from mcp_server.py
rename to src/codebase_rag/mcp/server.py
index ea4e6c1..7f8f6c0 100644
--- a/mcp_server.py
+++ b/src/codebase_rag/mcp/server.py
@@ -39,17 +39,17 @@
from loguru import logger
# Import services
-from services.neo4j_knowledge_service import Neo4jKnowledgeService
-from services.memory_store import memory_store
-from services.memory_extractor import memory_extractor
-from services.task_queue import task_queue, TaskStatus, submit_document_processing_task, submit_directory_processing_task
-from services.task_processors import processor_registry
-from services.graph_service import graph_service
-from services.code_ingestor import get_code_ingestor
-from services.ranker import ranker
-from services.pack_builder import pack_builder
-from services.git_utils import git_utils
-from config import settings, get_current_model_info
+from codebase_rag.services.neo4j_knowledge_service import Neo4jKnowledgeService
+from codebase_rag.services.memory_store import memory_store
+from codebase_rag.services.memory_extractor import memory_extractor
+from codebase_rag.services.task_queue import task_queue, TaskStatus, submit_document_processing_task, submit_directory_processing_task
+from codebase_rag.services.task_processors import processor_registry
+from codebase_rag.services.graph_service import graph_service
+from codebase_rag.services.code_ingestor import get_code_ingestor
+from codebase_rag.services.ranker import ranker
+from codebase_rag.services.pack_builder import pack_builder
+from codebase_rag.services.git_utils import git_utils
+from codebase_rag.config import settings, get_current_model_info
# Import MCP tools modules
from mcp_tools import (
@@ -366,84 +366,9 @@ async def main():
notification_options=None,
experimental_capabilities={}
)
-
- if search_results:
- ranked = ranker.rank_files(
- files=search_results,
- query=keyword,
- limit=10
- )
-
- for file in ranked:
- all_nodes.append({
- "type": "file",
- "path": file["path"],
- "lang": file["lang"],
- "score": file["score"],
- "ref": ranker.generate_ref_handle(path=file["path"])
- })
-
- # Add focus files with high priority
- if focus_list:
- for focus_path in focus_list:
- all_nodes.append({
- "type": "file",
- "path": focus_path,
- "lang": "unknown",
- "score": 10.0, # High priority
- "ref": ranker.generate_ref_handle(path=focus_path)
- })
-
- # Build context pack
- if ctx:
- await ctx.info(f"Packing {len(all_nodes)} candidate files into context...")
-
- context_result = pack_builder.build_context_pack(
- nodes=all_nodes,
- budget=budget,
- stage=stage,
- repo_id=repo_id,
- file_limit=8,
- symbol_limit=12,
- enable_deduplication=True
+ )
)
- # Format items
- items = []
- for item in context_result.get("items", []):
- items.append({
- "kind": item.get("kind", "file"),
- "title": item.get("title", "Unknown"),
- "summary": item.get("summary", ""),
- "ref": item.get("ref", ""),
- "extra": {
- "lang": item.get("extra", {}).get("lang"),
- "score": item.get("extra", {}).get("score", 0.0)
- }
- })
-
- if ctx:
- await ctx.info(f"Context pack built: {len(items)} items, {context_result.get('budget_used', 0)} tokens")
-
- return {
- "success": True,
- "items": items,
- "budget_used": context_result.get("budget_used", 0),
- "budget_limit": budget,
- "stage": stage,
- "repo_id": repo_id,
- "category_counts": context_result.get("category_counts", {})
- }
-
- except Exception as e:
- error_msg = f"Context pack generation failed: {str(e)}"
- logger.error(error_msg)
- if ctx:
- await ctx.error(error_msg)
- return {
- "success": False,
- "error": error_msg
- }
# ===================================
# MCP Resources
diff --git a/mcp_tools/tool_definitions.py b/src/codebase_rag/mcp/tools.py
similarity index 100%
rename from mcp_tools/tool_definitions.py
rename to src/codebase_rag/mcp/tools.py
diff --git a/mcp_tools/utils.py b/src/codebase_rag/mcp/utils.py
similarity index 100%
rename from mcp_tools/utils.py
rename to src/codebase_rag/mcp/utils.py
diff --git a/src/codebase_rag/server/__init__.py b/src/codebase_rag/server/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/codebase_rag/server/cli.py b/src/codebase_rag/server/cli.py
new file mode 100644
index 0000000..639b0ed
--- /dev/null
+++ b/src/codebase_rag/server/cli.py
@@ -0,0 +1,87 @@
+"""
+CLI utilities and helper functions for Codebase RAG servers.
+"""
+
+import sys
+import time
+from pathlib import Path
+from loguru import logger
+
+from codebase_rag.config import (
+ settings,
+ validate_neo4j_connection,
+ validate_ollama_connection,
+ validate_openrouter_connection,
+ get_current_model_info,
+)
+
+
+def check_dependencies():
+ """Check service dependencies"""
+ logger.info("Checking service dependencies...")
+
+ checks = [
+ ("Neo4j", validate_neo4j_connection),
+ ]
+
+ # Conditionally add Ollama if it is the selected LLM or embedding provider
+ if settings.llm_provider == "ollama" or settings.embedding_provider == "ollama":
+ checks.append(("Ollama", validate_ollama_connection))
+
+ # Conditionally add OpenRouter if it is the selected LLM or embedding provider
+ if settings.llm_provider == "openrouter" or settings.embedding_provider == "openrouter":
+ checks.append(("OpenRouter", validate_openrouter_connection))
+
+ all_passed = True
+ for service_name, check_func in checks:
+ try:
+ if check_func():
+ logger.info(f"โ {service_name} connection successful")
+ else:
+ logger.error(f"โ {service_name} connection failed")
+ all_passed = False
+ except Exception as e:
+ logger.error(f"โ {service_name} check error: {e}")
+ all_passed = False
+
+ return all_passed
+
+
+def wait_for_services(max_retries=30, retry_interval=2):
+ """Wait for services to start"""
+ logger.info("Waiting for services to start...")
+
+ for attempt in range(1, max_retries + 1):
+ logger.info(f"Attempt {attempt}/{max_retries}...")
+
+ if check_dependencies():
+ logger.info("All services are ready!")
+ return True
+
+ if attempt < max_retries:
+ logger.info(f"Waiting {retry_interval} seconds before retry...")
+ time.sleep(retry_interval)
+
+ logger.error("Service startup timeout!")
+ return False
+
+
+def print_startup_info():
+ """Print startup information"""
+ print("\n" + "="*60)
+ print("Code Graph Knowledge Service")
+ print("="*60)
+ print(f"Version: {settings.app_version}")
+ print(f"Host: {settings.host}:{settings.port}")
+ print(f"Debug mode: {settings.debug}")
+ print()
+ print("Service configuration:")
+ print(f" Neo4j: {settings.neo4j_uri}")
+ print(f" Ollama: {settings.ollama_base_url}")
+ print()
+ model_info = get_current_model_info()
+ print("Model configuration:")
+ print(f" LLM: {model_info['llm_model']}")
+ print(f" Embedding: {model_info['embedding_model']}")
+ print("="*60)
+ print()
diff --git a/src/codebase_rag/server/mcp.py b/src/codebase_rag/server/mcp.py
new file mode 100644
index 0000000..16a1ba0
--- /dev/null
+++ b/src/codebase_rag/server/mcp.py
@@ -0,0 +1,45 @@
+"""
+MCP Server entry point for Codebase RAG.
+
+This module provides the MCP (Model Context Protocol) server implementation.
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+from loguru import logger
+
+# Configure logging
+logger.remove() # Remove default handler
+logger.add(
+ sys.stderr,
+ level="INFO",
+ format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}"
+)
+
+
+def main():
+ """Main entry point for MCP server"""
+ try:
+ logger.info("=" * 70)
+ logger.info("MCP Server - Codebase RAG")
+ logger.info("=" * 70)
+ logger.info(f"Python: {sys.version}")
+ logger.info(f"Working directory: {Path.cwd()}")
+
+ # Import and run the server from mcp/server.py
+ from codebase_rag.mcp.server import main as server_main
+
+ logger.info("Starting MCP server...")
+ asyncio.run(server_main())
+
+ except KeyboardInterrupt:
+ logger.info("\nServer stopped by user")
+ sys.exit(0)
+ except Exception as e:
+ logger.error(f"Server failed to start: {e}", exc_info=True)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/main.py b/src/codebase_rag/server/web.py
similarity index 87%
rename from main.py
rename to src/codebase_rag/server/web.py
index f3e489e..a1726bc 100644
--- a/main.py
+++ b/src/codebase_rag/server/web.py
@@ -1,5 +1,5 @@
"""
-ไธปๅบ็จๅ
ฅๅฃๆไปถ
+Web server entry point for Codebase RAG.
ARCHITECTURE (Two-Port Setup):
- Port 8000: MCP SSE Service (PRIMARY)
@@ -11,10 +11,10 @@
from loguru import logger
from multiprocessing import Process
-from config import settings
-from core.app import create_app
-from core.logging import setup_logging
-from core.mcp_sse import create_mcp_sse_app
+from codebase_rag.config import settings
+from codebase_rag.core.app import create_app
+from codebase_rag.core.logging import setup_logging
+from codebase_rag.core.mcp_sse import create_mcp_sse_app
# setup logging
setup_logging()
@@ -23,13 +23,13 @@
app = create_app() # Web UI + REST API
mcp_app = create_mcp_sse_app() # MCP SSE
-# start server (legacy - single port)
+
def start_server_legacy():
"""start server (legacy mode - all services on one port)"""
logger.info(f"Starting server on {settings.host}:{settings.port}")
uvicorn.run(
- "main:app",
+ "src.codebase_rag.server.web:app",
host=settings.host,
port=settings.port,
reload=settings.debug,
@@ -37,7 +37,7 @@ def start_server_legacy():
access_log=settings.debug
)
-# start MCP SSE server
+
def start_mcp_server():
"""Start MCP SSE server"""
logger.info("="*70)
@@ -48,14 +48,14 @@ def start_mcp_server():
logger.info("="*70)
uvicorn.run(
- "main:mcp_app",
+ "src.codebase_rag.server.web:mcp_app",
host=settings.host,
port=settings.mcp_port, # From config: MCP_PORT (default 8000)
log_level="info" if not settings.debug else "debug",
access_log=False # Reduce noise
)
-# start Web UI + REST API server
+
def start_web_server():
"""Start Web UI + REST API server"""
logger.info("="*70)
@@ -67,7 +67,7 @@ def start_web_server():
logger.info("="*70)
uvicorn.run(
- "main:app",
+ "src.codebase_rag.server.web:app",
host=settings.host,
port=settings.web_ui_port, # From config: WEB_UI_PORT (default 8080)
reload=settings.debug,
@@ -75,6 +75,7 @@ def start_web_server():
access_log=settings.debug
)
+
def start_server():
"""Start both servers (two-port mode)"""
logger.info("\n" + "="*70)
@@ -110,5 +111,11 @@ def start_server():
web_process.join()
logger.info("Servers stopped")
+
+def main():
+ """Main entry point for web server"""
+ start_server()
+
+
if __name__ == "__main__":
- start_server()
\ No newline at end of file
+ main()
diff --git a/src/codebase_rag/services/__init__.py b/src/codebase_rag/services/__init__.py
new file mode 100644
index 0000000..297bcf6
--- /dev/null
+++ b/src/codebase_rag/services/__init__.py
@@ -0,0 +1,31 @@
+"""
+Services module for Codebase RAG.
+
+This module provides all business logic services organized into logical subpackages:
+- knowledge: Neo4j knowledge graph services
+- memory: Conversation memory and extraction
+- code: Code analysis and ingestion
+- sql: SQL parsing and schema analysis
+- tasks: Task queue and processing
+- utils: Utility functions (git, ranking, metrics)
+- pipeline: Data processing pipeline
+- graph: Graph schema and utilities
+
+Note: Subpackages are not eagerly imported to avoid triggering heavy dependencies.
+Import specific services from their subpackages as needed:
+ from codebase_rag.services.code import Neo4jGraphService
+ from codebase_rag.services.knowledge import Neo4jKnowledgeService
+ from codebase_rag.services.memory import MemoryStore
+"""
+
+# Declare subpackages without eager importing to avoid dependency issues
+__all__ = [
+ "knowledge",
+ "memory",
+ "code",
+ "sql",
+ "tasks",
+ "utils",
+ "pipeline",
+ "graph",
+]
diff --git a/src/codebase_rag/services/code/__init__.py b/src/codebase_rag/services/code/__init__.py
new file mode 100644
index 0000000..ca08fed
--- /dev/null
+++ b/src/codebase_rag/services/code/__init__.py
@@ -0,0 +1,7 @@
+"""Code analysis and ingestion services."""
+
+from codebase_rag.services.code.code_ingestor import CodeIngestor, get_code_ingestor
+from codebase_rag.services.code.graph_service import Neo4jGraphService, graph_service
+from codebase_rag.services.code.pack_builder import PackBuilder, pack_builder
+
+__all__ = ["CodeIngestor", "get_code_ingestor", "Neo4jGraphService", "PackBuilder", "graph_service", "pack_builder"]
diff --git a/services/code_ingestor.py b/src/codebase_rag/services/code/code_ingestor.py
similarity index 100%
rename from services/code_ingestor.py
rename to src/codebase_rag/services/code/code_ingestor.py
diff --git a/services/graph_service.py b/src/codebase_rag/services/code/graph_service.py
similarity index 99%
rename from services/graph_service.py
rename to src/codebase_rag/services/code/graph_service.py
index afb8971..8341d45 100644
--- a/services/graph_service.py
+++ b/src/codebase_rag/services/code/graph_service.py
@@ -2,7 +2,7 @@
from typing import List, Dict, Optional, Any, Union
from pydantic import BaseModel
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
import json
class GraphNode(BaseModel):
diff --git a/services/pack_builder.py b/src/codebase_rag/services/code/pack_builder.py
similarity index 100%
rename from services/pack_builder.py
rename to src/codebase_rag/services/code/pack_builder.py
diff --git a/src/codebase_rag/services/graph/__init__.py b/src/codebase_rag/services/graph/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/services/graph/schema.cypher b/src/codebase_rag/services/graph/schema.cypher
similarity index 100%
rename from services/graph/schema.cypher
rename to src/codebase_rag/services/graph/schema.cypher
diff --git a/src/codebase_rag/services/knowledge/__init__.py b/src/codebase_rag/services/knowledge/__init__.py
new file mode 100644
index 0000000..c1a909e
--- /dev/null
+++ b/src/codebase_rag/services/knowledge/__init__.py
@@ -0,0 +1,8 @@
+"""Knowledge services for Neo4j-based knowledge graph."""
+
+from codebase_rag.services.knowledge.neo4j_knowledge_service import (
+ Neo4jKnowledgeService,
+ neo4j_knowledge_service,
+)
+
+__all__ = ["Neo4jKnowledgeService", "neo4j_knowledge_service"]
diff --git a/services/neo4j_knowledge_service.py b/src/codebase_rag/services/knowledge/neo4j_knowledge_service.py
similarity index 99%
rename from services/neo4j_knowledge_service.py
rename to src/codebase_rag/services/knowledge/neo4j_knowledge_service.py
index 301f0b3..31184f6 100644
--- a/services/neo4j_knowledge_service.py
+++ b/src/codebase_rag/services/knowledge/neo4j_knowledge_service.py
@@ -36,7 +36,7 @@
# Core components
from llama_index.core.node_parser import SimpleNodeParser
-from config import settings
+from codebase_rag.config import settings
class Neo4jKnowledgeService:
"""knowledge graph service based on Neo4j's native vector index"""
diff --git a/src/codebase_rag/services/memory/__init__.py b/src/codebase_rag/services/memory/__init__.py
new file mode 100644
index 0000000..c213027
--- /dev/null
+++ b/src/codebase_rag/services/memory/__init__.py
@@ -0,0 +1,6 @@
+"""Memory services for conversation memory and extraction."""
+
+from codebase_rag.services.memory.memory_store import MemoryStore, memory_store
+from codebase_rag.services.memory.memory_extractor import MemoryExtractor, memory_extractor
+
+__all__ = ["MemoryStore", "MemoryExtractor", "memory_store", "memory_extractor"]
diff --git a/services/memory_extractor.py b/src/codebase_rag/services/memory/memory_extractor.py
similarity index 99%
rename from services/memory_extractor.py
rename to src/codebase_rag/services/memory/memory_extractor.py
index 1423268..a3e5efb 100644
--- a/services/memory_extractor.py
+++ b/src/codebase_rag/services/memory/memory_extractor.py
@@ -20,7 +20,7 @@
from llama_index.core import Settings
from loguru import logger
-from services.memory_store import memory_store
+from .memory_store import memory_store
class MemoryExtractor:
diff --git a/services/memory_store.py b/src/codebase_rag/services/memory/memory_store.py
similarity index 99%
rename from services/memory_store.py
rename to src/codebase_rag/services/memory/memory_store.py
index 9638aff..1c0ac02 100644
--- a/services/memory_store.py
+++ b/src/codebase_rag/services/memory/memory_store.py
@@ -18,7 +18,7 @@
from loguru import logger
from neo4j import AsyncGraphDatabase
-from config import settings
+from codebase_rag.config import settings
class MemoryStore:
diff --git a/services/pipeline/__init__.py b/src/codebase_rag/services/pipeline/__init__.py
similarity index 100%
rename from services/pipeline/__init__.py
rename to src/codebase_rag/services/pipeline/__init__.py
diff --git a/services/pipeline/base.py b/src/codebase_rag/services/pipeline/base.py
similarity index 100%
rename from services/pipeline/base.py
rename to src/codebase_rag/services/pipeline/base.py
diff --git a/services/pipeline/embeddings.py b/src/codebase_rag/services/pipeline/embeddings.py
similarity index 100%
rename from services/pipeline/embeddings.py
rename to src/codebase_rag/services/pipeline/embeddings.py
diff --git a/services/pipeline/loaders.py b/src/codebase_rag/services/pipeline/loaders.py
similarity index 100%
rename from services/pipeline/loaders.py
rename to src/codebase_rag/services/pipeline/loaders.py
diff --git a/services/pipeline/pipeline.py b/src/codebase_rag/services/pipeline/pipeline.py
similarity index 100%
rename from services/pipeline/pipeline.py
rename to src/codebase_rag/services/pipeline/pipeline.py
diff --git a/services/pipeline/storers.py b/src/codebase_rag/services/pipeline/storers.py
similarity index 100%
rename from services/pipeline/storers.py
rename to src/codebase_rag/services/pipeline/storers.py
diff --git a/services/pipeline/transformers.py b/src/codebase_rag/services/pipeline/transformers.py
similarity index 100%
rename from services/pipeline/transformers.py
rename to src/codebase_rag/services/pipeline/transformers.py
diff --git a/src/codebase_rag/services/sql/__init__.py b/src/codebase_rag/services/sql/__init__.py
new file mode 100644
index 0000000..5c8171e
--- /dev/null
+++ b/src/codebase_rag/services/sql/__init__.py
@@ -0,0 +1,10 @@
+"""SQL parsing and schema analysis services."""
+
+from codebase_rag.services.sql.sql_parser import SQLParser, sql_analyzer
+from codebase_rag.services.sql.sql_schema_parser import SQLSchemaParser
+from codebase_rag.services.sql.universal_sql_schema_parser import (
+ UniversalSQLSchemaParser,
+ parse_sql_schema_smart,
+)
+
+__all__ = ["SQLParser", "SQLSchemaParser", "UniversalSQLSchemaParser", "sql_analyzer", "parse_sql_schema_smart"]
diff --git a/services/sql_parser.py b/src/codebase_rag/services/sql/sql_parser.py
similarity index 100%
rename from services/sql_parser.py
rename to src/codebase_rag/services/sql/sql_parser.py
diff --git a/services/sql_schema_parser.py b/src/codebase_rag/services/sql/sql_schema_parser.py
similarity index 100%
rename from services/sql_schema_parser.py
rename to src/codebase_rag/services/sql/sql_schema_parser.py
diff --git a/services/universal_sql_schema_parser.py b/src/codebase_rag/services/sql/universal_sql_schema_parser.py
similarity index 100%
rename from services/universal_sql_schema_parser.py
rename to src/codebase_rag/services/sql/universal_sql_schema_parser.py
diff --git a/src/codebase_rag/services/tasks/__init__.py b/src/codebase_rag/services/tasks/__init__.py
new file mode 100644
index 0000000..c2f8c9e
--- /dev/null
+++ b/src/codebase_rag/services/tasks/__init__.py
@@ -0,0 +1,7 @@
+"""Task queue and processing services."""
+
+from codebase_rag.services.tasks.task_queue import TaskQueue, task_queue, TaskStatus
+from codebase_rag.services.tasks.task_storage import TaskStorage, TaskType
+from codebase_rag.services.tasks.task_processors import TaskProcessor, processor_registry
+
+__all__ = ["TaskQueue", "TaskStorage", "TaskProcessor", "task_queue", "TaskStatus", "TaskType", "processor_registry"]
diff --git a/services/task_processors.py b/src/codebase_rag/services/tasks/task_processors.py
similarity index 100%
rename from services/task_processors.py
rename to src/codebase_rag/services/tasks/task_processors.py
diff --git a/services/task_queue.py b/src/codebase_rag/services/tasks/task_queue.py
similarity index 100%
rename from services/task_queue.py
rename to src/codebase_rag/services/tasks/task_queue.py
diff --git a/services/task_storage.py b/src/codebase_rag/services/tasks/task_storage.py
similarity index 99%
rename from services/task_storage.py
rename to src/codebase_rag/services/tasks/task_storage.py
index 5b78c8c..41efe9b 100644
--- a/services/task_storage.py
+++ b/src/codebase_rag/services/tasks/task_storage.py
@@ -13,7 +13,7 @@
from dataclasses import dataclass, asdict
from pathlib import Path
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
from .task_queue import TaskResult, TaskStatus
diff --git a/src/codebase_rag/services/utils/__init__.py b/src/codebase_rag/services/utils/__init__.py
new file mode 100644
index 0000000..6287d6f
--- /dev/null
+++ b/src/codebase_rag/services/utils/__init__.py
@@ -0,0 +1,7 @@
+"""Utility services for git, ranking, and metrics."""
+
+from codebase_rag.services.utils.git_utils import GitUtils, git_utils
+from codebase_rag.services.utils.ranker import Ranker, ranker
+from codebase_rag.services.utils.metrics import MetricsCollector, metrics_service
+
+__all__ = ["GitUtils", "Ranker", "MetricsCollector", "git_utils", "ranker", "metrics_service"]
diff --git a/services/git_utils.py b/src/codebase_rag/services/utils/git_utils.py
similarity index 100%
rename from services/git_utils.py
rename to src/codebase_rag/services/utils/git_utils.py
diff --git a/services/metrics.py b/src/codebase_rag/services/utils/metrics.py
similarity index 99%
rename from services/metrics.py
rename to src/codebase_rag/services/utils/metrics.py
index 9bc3eaf..798cd04 100644
--- a/services/metrics.py
+++ b/src/codebase_rag/services/utils/metrics.py
@@ -7,7 +7,7 @@
import time
from functools import wraps
from loguru import logger
-from config import settings
+from codebase_rag.config import settings
# Create a custom registry to avoid conflicts
registry = CollectorRegistry()
diff --git a/services/ranker.py b/src/codebase_rag/services/utils/ranker.py
similarity index 100%
rename from services/ranker.py
rename to src/codebase_rag/services/utils/ranker.py
diff --git a/start.py b/start.py
deleted file mode 100644
index b3f1004..0000000
--- a/start.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-"""
-Code Graph Knowledge Service
-"""
-
-import asyncio
-import sys
-import time
-from pathlib import Path
-
-# add project root to path
-sys.path.insert(0, str(Path(__file__).parent))
-
-from config import settings, validate_neo4j_connection, validate_ollama_connection, validate_openrouter_connection, get_current_model_info
-from loguru import logger
-
-def check_dependencies():
- """check service dependencies"""
- logger.info("check service dependencies...")
-
- checks = [
- ("Neo4j", validate_neo4j_connection),
- ]
-
- # Conditionally add Ollama if it is the selected LLM or embedding provider
- if settings.llm_provider == "ollama" or settings.embedding_provider == "ollama":
- checks.append(("Ollama", validate_ollama_connection))
-
- # Conditionally add OpenRouter if it is the selected LLM or embedding provider
- if settings.llm_provider == "openrouter" or settings.embedding_provider == "openrouter":
- checks.append(("OpenRouter", validate_openrouter_connection))
-
- all_passed = True
- for service_name, check_func in checks:
- try:
- if check_func():
- logger.info(f"โ {service_name} connection successful")
- else:
- logger.error(f"โ {service_name} connection failed")
- all_passed = False
- except Exception as e:
- logger.error(f"โ {service_name} check error: {e}")
- all_passed = False
-
- return all_passed
-
-def wait_for_services(max_retries=30, retry_interval=2):
- """wait for services to start"""
- logger.info("wait for services to start...")
-
- for attempt in range(1, max_retries + 1):
- logger.info(f"try {attempt}/{max_retries}...")
-
- if check_dependencies():
- logger.info("all services are ready!")
- return True
-
- if attempt < max_retries:
- logger.info(f"wait {retry_interval} seconds and retry...")
- time.sleep(retry_interval)
-
- logger.error("service startup timeout!")
- return False
-
-def print_startup_info():
- """print startup info"""
- print("\n" + "="*60)
- print("Code Graph Knowledge Service")
- print("="*60)
- print(f"version: {settings.app_version}")
- print(f"host: {settings.host}:{settings.port}")
- print(f"debug mode: {settings.debug}")
- print()
- print("service config:")
- print(f" Neo4j: {settings.neo4j_uri}")
- print(f" Ollama: {settings.ollama_base_url}")
- print()
- model_info = get_current_model_info()
- print("model config:")
- print(f" LLM: {model_info['llm_model']}")
- print(f" Embedding: {model_info['embedding_model']}")
- print("="*60)
- print()
-
-def main():
- """main function"""
- print_startup_info()
-
- # check Python version
- if sys.version_info < (3, 8):
- logger.error("Python 3.8 or higher is required")
- sys.exit(1)
-
- # check environment variables
- logger.info("check environment config...")
-
- # optional: wait for services to start (useful in development)
- if not settings.debug or input("skip service dependency check? (y/N): ").lower().startswith('y'):
- logger.info("skip service dependency check")
- else:
- if not wait_for_services():
- logger.error("service dependency check failed, continue startup may encounter problems")
- if not input("continue startup? (y/N): ").lower().startswith('y'):
- sys.exit(1)
-
- # start application
- logger.info("start FastAPI application...")
-
- try:
- from main import start_server
- start_server()
- except KeyboardInterrupt:
- logger.info("service interrupted by user")
- except Exception as e:
- logger.error(f"start failed: {e}")
- sys.exit(1)
-
-if __name__ == "__main__":
- main()
diff --git a/start_mcp.py b/start_mcp.py
deleted file mode 100644
index 3a7b9bd..0000000
--- a/start_mcp.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-MCP Server v2 Startup Script
-
-Starts the official MCP SDK-based server with enhanced features:
-- Session management
-- Streaming responses (ready for future use)
-- Multi-transport support
-- Focus on Memory Store tools
-
-Usage:
- python start_mcp_v2.py
-
-Configuration:
- Add to Claude Desktop config:
- {
- "mcpServers": {
- "codebase-rag-memory-v2": {
- "command": "python",
- "args": ["/path/to/start_mcp_v2.py"],
- "env": {}
- }
- }
- }
-"""
-
-import asyncio
-import sys
-from pathlib import Path
-
-from loguru import logger
-
-# Configure logging
-logger.remove() # Remove default handler
-logger.add(
- sys.stderr,
- level="INFO",
- format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {message}"
-)
-
-# Add project root to path
-project_root = Path(__file__).parent
-sys.path.insert(0, str(project_root))
-
-
-def main():
- """Main entry point"""
- try:
- logger.info("=" * 70)
- logger.info("MCP Server v2 (Official SDK) - Memory Store")
- logger.info("=" * 70)
- logger.info(f"Python: {sys.version}")
- logger.info(f"Working directory: {Path.cwd()}")
-
- # Import and run the server
- from mcp_server_v2 import main as server_main
-
- logger.info("Starting server...")
- asyncio.run(server_main())
-
- except KeyboardInterrupt:
- logger.info("\nServer stopped by user")
- sys.exit(0)
- except Exception as e:
- logger.error(f"Server failed to start: {e}", exc_info=True)
- sys.exit(1)
-
-
-if __name__ == "__main__":
- main()
diff --git a/tests/conftest.py b/tests/conftest.py
index 110c231..ad97d98 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,7 +11,7 @@
sys.path.insert(0, str(Path(__file__).parent.parent))
from fastapi.testclient import TestClient
-from services.graph_service import Neo4jGraphService
+from src.codebase_rag.services.code import Neo4jGraphService
@pytest.fixture(scope="session")
@@ -81,7 +81,7 @@ def graph_service():
@pytest.fixture(scope="module")
def test_client():
"""FastAPI test client"""
- from main import app
+ from src.codebase_rag.server.web import app
return TestClient(app)
diff --git a/tests/test_context_pack.py b/tests/test_context_pack.py
index 80e5edb..78f2636 100644
--- a/tests/test_context_pack.py
+++ b/tests/test_context_pack.py
@@ -3,7 +3,7 @@
Tests GET /context/pack endpoint
"""
import pytest
-from services.pack_builder import PackBuilder
+from src.codebase_rag.services.pipeline import PackBuilder
class TestPackBuilder:
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
index 7b5092f..a3efbcb 100644
--- a/tests/test_ingest.py
+++ b/tests/test_ingest.py
@@ -3,8 +3,7 @@
Tests POST /ingest/repo endpoint
"""
import pytest
-from services.code_ingestor import CodeIngestor
-from services.graph_service import Neo4jGraphService
+from src.codebase_rag.services.code import CodeIngestor, Neo4jGraphService
class TestCodeIngestor:
diff --git a/tests/test_mcp_handlers.py b/tests/test_mcp_handlers.py
index c5a03d3..c031563 100644
--- a/tests/test_mcp_handlers.py
+++ b/tests/test_mcp_handlers.py
@@ -17,20 +17,20 @@
import asyncio
# Import handlers
-from mcp_tools.knowledge_handlers import (
+from src.codebase_rag.mcp.handlers.knowledge import (
handle_query_knowledge,
handle_search_similar_nodes,
handle_add_document,
handle_add_file,
handle_add_directory,
)
-from mcp_tools.code_handlers import (
+from src.codebase_rag.mcp.handlers.code import (
handle_code_graph_ingest_repo,
handle_code_graph_related,
handle_code_graph_impact,
handle_context_pack,
)
-from mcp_tools.memory_handlers import (
+from src.codebase_rag.mcp.handlers.memory import (
handle_add_memory,
handle_search_memories,
handle_get_memory,
@@ -39,7 +39,7 @@
handle_supersede_memory,
handle_get_project_summary,
)
-from mcp_tools.task_handlers import (
+from src.codebase_rag.mcp.handlers.tasks import (
handle_get_task_status,
handle_watch_task,
handle_watch_tasks,
@@ -47,7 +47,7 @@
handle_cancel_task,
handle_get_queue_stats,
)
-from mcp_tools.system_handlers import (
+from src.codebase_rag.mcp.handlers.system import (
handle_get_graph_schema,
handle_get_statistics,
handle_clear_knowledge_base,
diff --git a/tests/test_mcp_integration.py b/tests/test_mcp_integration.py
index 4297ad4..eaa506b 100644
--- a/tests/test_mcp_integration.py
+++ b/tests/test_mcp_integration.py
@@ -15,9 +15,9 @@
from unittest.mock import AsyncMock, Mock, patch
import json
-from mcp_tools.tool_definitions import get_tool_definitions
-from mcp_tools.resources import get_resource_list, read_resource_content
-from mcp_tools.prompts import get_prompt_list, get_prompt_content
+from src.codebase_rag.mcp.tools import get_tool_definitions
+from src.codebase_rag.mcp.resources import get_resource_list, read_resource_content
+from src.codebase_rag.mcp.prompts import get_prompt_list, get_prompt_content
class TestToolDefinitions:
@@ -309,7 +309,7 @@ class TestToolExecutionRouting:
@pytest.mark.asyncio
async def test_knowledge_tool_routing(self, mock_knowledge_service):
"""Test that knowledge tools route to correct service"""
- from mcp_tools.knowledge_handlers import handle_query_knowledge
+ from src.codebase_rag.mcp.handlers.knowledge import handle_query_knowledge
mock_knowledge_service.query.return_value = {
"success": True,
@@ -327,7 +327,7 @@ async def test_knowledge_tool_routing(self, mock_knowledge_service):
@pytest.mark.asyncio
async def test_memory_tool_routing(self, mock_memory_store):
"""Test that memory tools route to correct service"""
- from mcp_tools.memory_handlers import handle_add_memory
+ from src.codebase_rag.mcp.handlers.memory import handle_add_memory
mock_memory_store.add_memory.return_value = {
"success": True,
@@ -350,7 +350,7 @@ async def test_memory_tool_routing(self, mock_memory_store):
@pytest.mark.asyncio
async def test_task_tool_routing(self, mock_task_queue, mock_task_status):
"""Test that task tools route to correct service"""
- from mcp_tools.task_handlers import handle_get_queue_stats
+ from src.codebase_rag.mcp.handlers.tasks import handle_get_queue_stats
mock_task_queue.get_stats.return_value = {
"pending": 5,
@@ -368,7 +368,7 @@ async def test_task_tool_routing(self, mock_task_queue, mock_task_status):
@pytest.mark.asyncio
async def test_system_tool_routing(self, mock_knowledge_service):
"""Test that system tools route to correct service"""
- from mcp_tools.system_handlers import handle_get_statistics
+ from src.codebase_rag.mcp.handlers.system import handle_get_statistics
mock_knowledge_service.get_statistics.return_value = {
"success": True,
@@ -390,7 +390,7 @@ class TestErrorHandlingPatterns:
@pytest.mark.asyncio
async def test_knowledge_service_error(self, mock_knowledge_service):
"""Test knowledge service error handling"""
- from mcp_tools.knowledge_handlers import handle_query_knowledge
+ from src.codebase_rag.mcp.handlers.knowledge import handle_query_knowledge
mock_knowledge_service.query.return_value = {
"success": False,
@@ -408,7 +408,7 @@ async def test_knowledge_service_error(self, mock_knowledge_service):
@pytest.mark.asyncio
async def test_memory_store_error(self, mock_memory_store):
"""Test memory store error handling"""
- from mcp_tools.memory_handlers import handle_get_memory
+ from src.codebase_rag.mcp.handlers.memory import handle_get_memory
mock_memory_store.get_memory.return_value = {
"success": False,
@@ -426,7 +426,7 @@ async def test_memory_store_error(self, mock_memory_store):
@pytest.mark.asyncio
async def test_task_queue_error(self, mock_task_queue, mock_task_status):
"""Test task queue error handling"""
- from mcp_tools.task_handlers import handle_get_task_status
+ from src.codebase_rag.mcp.handlers.tasks import handle_get_task_status
mock_task_queue.get_task.return_value = None
@@ -442,7 +442,7 @@ async def test_task_queue_error(self, mock_task_queue, mock_task_status):
@pytest.mark.asyncio
async def test_code_handler_exception(self, mock_code_ingestor, mock_git_utils):
"""Test code handler exception handling"""
- from mcp_tools.code_handlers import handle_code_graph_ingest_repo
+ from src.codebase_rag.mcp.handlers.code import handle_code_graph_ingest_repo
mock_git_utils.is_git_repo.side_effect = Exception("Git error")
@@ -462,7 +462,7 @@ class TestAsyncTaskHandling:
@pytest.mark.asyncio
async def test_large_document_async_processing(self, mock_knowledge_service, mock_submit_document_task):
"""Test large documents trigger async processing"""
- from mcp_tools.knowledge_handlers import handle_add_document
+ from src.codebase_rag.mcp.handlers.knowledge import handle_add_document
mock_submit_document_task.return_value = "task-123"
large_content = "x" * 15000 # 15KB
@@ -481,7 +481,7 @@ async def test_large_document_async_processing(self, mock_knowledge_service, moc
@pytest.mark.asyncio
async def test_directory_always_async(self, mock_submit_directory_task):
"""Test directory processing always uses async"""
- from mcp_tools.knowledge_handlers import handle_add_directory
+ from src.codebase_rag.mcp.handlers.knowledge import handle_add_directory
mock_submit_directory_task.return_value = "task-456"
@@ -497,7 +497,7 @@ async def test_directory_always_async(self, mock_submit_directory_task):
@pytest.mark.asyncio
async def test_watch_task_monitors_progress(self, mock_task_queue, mock_task_status):
"""Test watch_task monitors task until completion"""
- from mcp_tools.task_handlers import handle_watch_task
+ from src.codebase_rag.mcp.handlers.tasks import handle_watch_task
# Simulate task completing immediately
mock_task = Mock()
@@ -525,7 +525,7 @@ class TestDataValidation:
@pytest.mark.asyncio
async def test_clear_knowledge_base_requires_confirmation(self, mock_knowledge_service):
"""Test clear_knowledge_base requires explicit confirmation"""
- from mcp_tools.system_handlers import handle_clear_knowledge_base
+ from src.codebase_rag.mcp.handlers.system import handle_clear_knowledge_base
# Without confirmation
result = await handle_clear_knowledge_base(
@@ -555,7 +555,7 @@ async def test_clear_knowledge_base_requires_confirmation(self, mock_knowledge_s
@pytest.mark.asyncio
async def test_memory_importance_defaults(self, mock_memory_store):
"""Test memory importance has sensible default"""
- from mcp_tools.memory_handlers import handle_add_memory
+ from src.codebase_rag.mcp.handlers.memory import handle_add_memory
mock_memory_store.add_memory.return_value = {
"success": True,
@@ -580,7 +580,7 @@ async def test_memory_importance_defaults(self, mock_memory_store):
@pytest.mark.asyncio
async def test_search_top_k_defaults(self, mock_knowledge_service):
"""Test search top_k has sensible default"""
- from mcp_tools.knowledge_handlers import handle_search_similar_nodes
+ from src.codebase_rag.mcp.handlers.knowledge import handle_search_similar_nodes
mock_knowledge_service.search_similar_nodes.return_value = {
"success": True,
diff --git a/tests/test_mcp_utils.py b/tests/test_mcp_utils.py
index 37c4881..39981bd 100644
--- a/tests/test_mcp_utils.py
+++ b/tests/test_mcp_utils.py
@@ -8,7 +8,7 @@
"""
import pytest
-from mcp_tools.utils import format_result
+from src.codebase_rag.mcp.utils import format_result
class TestFormatResult:
diff --git a/tests/test_memory_store.py b/tests/test_memory_store.py
index 16a9bff..2e69a10 100644
--- a/tests/test_memory_store.py
+++ b/tests/test_memory_store.py
@@ -7,7 +7,7 @@
import pytest
import asyncio
-from services.memory_store import MemoryStore
+from src.codebase_rag.services.memory import MemoryStore
# Test fixtures
diff --git a/tests/test_related.py b/tests/test_related.py
index f2ab078..2319200 100644
--- a/tests/test_related.py
+++ b/tests/test_related.py
@@ -3,7 +3,7 @@
Tests GET /graph/related endpoint
"""
import pytest
-from services.ranker import Ranker
+from src.codebase_rag.services.utils import Ranker
class TestRanker: