diff --git a/nova/core/ai_client.py b/nova/core/ai_client.py index 37de893..a464498 100644 --- a/nova/core/ai_client.py +++ b/nova/core/ai_client.py @@ -4,7 +4,9 @@ import logging from abc import ABC, abstractmethod from collections.abc import AsyncGenerator +from datetime import datetime +from nova.core.metrics import ContextAnalysis, get_metrics_collector from nova.models.config import AIModelConfig logger = logging.getLogger(__name__) @@ -39,6 +41,7 @@ class BaseAIClient(ABC): def __init__(self, config: AIModelConfig): self.config = config + self.metrics_collector = get_metrics_collector() @abstractmethod async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: @@ -62,6 +65,29 @@ async def list_models(self) -> list[str]: """List available models for this provider""" pass + def _analyze_context(self, messages: list[dict[str, str]]) -> ContextAnalysis: + """Analyze context window usage for this provider""" + # Get max context tokens for this model (provider-specific) + max_context = self._get_max_context_tokens() + + context_analysis = self.metrics_collector.analyze_context( + messages=messages, max_context_tokens=max_context + ) + + # Print warnings if context is getting full + self.metrics_collector.print_context_warning(context_analysis) + + return context_analysis + + @abstractmethod + def _get_max_context_tokens(self) -> int: + """Get maximum context tokens for the current model""" + pass + + def _get_provider_name(self) -> str: + """Get the provider name for metrics""" + return self.__class__.__name__.replace("Client", "").lower() + class OpenAIClient(BaseAIClient): """OpenAI API client""" @@ -85,8 +111,25 @@ def validate_config(self) -> bool: return False return True - async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: + async def generate_response( + self, messages: list[dict[str, str]], conversation_id: str = "unknown", **kwargs + ) -> str: """Generate response using OpenAI API""" + # Analyze context before making request + context_analysis = self._analyze_context(messages) + + # Log request details + self.metrics_collector.log_request( + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + request_start = datetime.now() + try: response = await self.client.chat.completions.create( model=self.config.model_name, @@ -95,7 +138,41 @@ async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> s temperature=self.config.temperature, **kwargs, ) - return response.choices[0].message.content + + response_complete = datetime.now() + response_content = response.choices[0].message.content + + # Extract token usage if available + usage = response.usage + input_tokens = usage.prompt_tokens if usage else 0 + output_tokens = usage.completion_tokens if usage else 0 + + # Create metrics + metrics = self.metrics_collector.create_metrics( + conversation_id=conversation_id, + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + response=response_content, + request_start=request_start, + response_complete=response_complete, + context_analysis=context_analysis, + input_tokens=input_tokens, + output_tokens=output_tokens, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + # Log response details + self.metrics_collector.log_response( + provider=self._get_provider_name(), + model=self.config.model_name, + response=response_content, + metrics=metrics, + ) + + return response_content except Exception as e: self._handle_api_error(e) @@ -129,6 +206,18 @@ async def list_models(self) -> list[str]: except Exception as e: self._handle_api_error(e) + def _get_max_context_tokens(self) -> int: + """Get maximum context tokens for OpenAI models""" + model_limits = { + "gpt-4": 8192, + "gpt-4-32k": 32768, + "gpt-4-turbo": 128000, + "gpt-4o": 128000, + "gpt-3.5-turbo": 4096, + "gpt-3.5-turbo-16k": 16384, + } + return model_limits.get(self.config.model_name, 8192) # Default to GPT-4 limit + def _handle_api_error(self, error: Exception) -> None: """Convert OpenAI errors to our standard errors""" import openai @@ -167,8 +256,25 @@ def validate_config(self) -> bool: return False return True - async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: + async def generate_response( + self, messages: list[dict[str, str]], conversation_id: str = "unknown", **kwargs + ) -> str: """Generate response using Anthropic API""" + # Analyze context before making request + context_analysis = self._analyze_context(messages) + + # Log request details + self.metrics_collector.log_request( + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + request_start = datetime.now() + try: # Convert messages to Anthropic format anthropic_messages = self._convert_messages(messages) @@ -180,7 +286,41 @@ async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> s temperature=self.config.temperature, **kwargs, ) - return response.content[0].text + + response_complete = datetime.now() + response_content = response.content[0].text + + # Extract token usage if available + usage = response.usage + input_tokens = usage.input_tokens if usage else 0 + output_tokens = usage.output_tokens if usage else 0 + + # Create metrics + metrics = self.metrics_collector.create_metrics( + conversation_id=conversation_id, + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + response=response_content, + request_start=request_start, + response_complete=response_complete, + context_analysis=context_analysis, + input_tokens=input_tokens, + output_tokens=output_tokens, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + # Log response details + self.metrics_collector.log_response( + provider=self._get_provider_name(), + model=self.config.model_name, + response=response_content, + metrics=metrics, + ) + + return response_content except Exception as e: self._handle_api_error(e) @@ -231,6 +371,17 @@ def _convert_messages(self, messages: list[dict[str, str]]) -> list[dict[str, st ) return converted + def _get_max_context_tokens(self) -> int: + """Get maximum context tokens for Anthropic models""" + model_limits = { + "claude-3-5-sonnet-20241022": 200000, + "claude-3-5-haiku-20241022": 200000, + "claude-3-opus-20240229": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-haiku-20240307": 200000, + } + return model_limits.get(self.config.model_name, 200000) # Default to 200k + def _handle_api_error(self, error: Exception) -> None: """Convert Anthropic errors to our standard errors""" import anthropic @@ -265,8 +416,25 @@ def validate_config(self) -> bool: # Ollama doesn't require API key, just check if server is reachable return True - async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> str: + async def generate_response( + self, messages: list[dict[str, str]], conversation_id: str = "unknown", **kwargs + ) -> str: """Generate response using Ollama API""" + # Analyze context before making request + context_analysis = self._analyze_context(messages) + + # Log request details + self.metrics_collector.log_request( + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + request_start = datetime.now() + try: response = await self.client.chat( model=self.config.model_name, @@ -277,7 +445,42 @@ async def generate_response(self, messages: list[dict[str, str]], **kwargs) -> s }, **kwargs, ) - return response["message"]["content"] + + response_complete = datetime.now() + response_content = response["message"]["content"] + + # Ollama doesn't provide token usage, so we estimate + input_tokens = self.metrics_collector._estimate_tokens(messages) + output_tokens = self.metrics_collector._estimate_tokens( + [{"content": response_content}] + ) + + # Create metrics + metrics = self.metrics_collector.create_metrics( + conversation_id=conversation_id, + provider=self._get_provider_name(), + model=self.config.model_name, + messages=messages, + response=response_content, + request_start=request_start, + response_complete=response_complete, + context_analysis=context_analysis, + input_tokens=input_tokens, + output_tokens=output_tokens, + temperature=self.config.temperature, + max_tokens=self.config.max_tokens, + **kwargs, + ) + + # Log response details + self.metrics_collector.log_response( + provider=self._get_provider_name(), + model=self.config.model_name, + response=response_content, + metrics=metrics, + ) + + return response_content except Exception as e: self._handle_api_error(e) @@ -313,6 +516,27 @@ async def list_models(self) -> list[str]: except Exception as e: self._handle_api_error(e) + def _get_max_context_tokens(self) -> int: + """Get maximum context tokens for Ollama models""" + # Common context limits for popular Ollama models + model_limits = { + "llama2": 4096, + "llama2:13b": 4096, + "llama2:70b": 4096, + "mistral": 8192, + "mistral:7b": 8192, + "codellama": 16384, + "codellama:13b": 16384, + "codellama:34b": 16384, + "vicuna": 2048, + "orca-mini": 2048, + } + # Extract base model name (remove size suffix) + base_model = self.config.model_name.split(":")[0] + return model_limits.get( + self.config.model_name, model_limits.get(base_model, 4096) + ) + def _handle_api_error(self, error: Exception) -> None: """Convert Ollama errors to our standard errors""" if "connection" in str(error).lower(): diff --git a/nova/core/metrics.py b/nova/core/metrics.py new file mode 100644 index 0000000..5f6f131 --- /dev/null +++ b/nova/core/metrics.py @@ -0,0 +1,309 @@ +"""LLM interaction metrics collection and monitoring""" + +import json +import logging +from dataclasses import asdict, dataclass +from datetime import datetime +from enum import Enum +from pathlib import Path + + +class ContextStatus(Enum): + """Context window utilization status""" + + OPTIMAL = "optimal" # < 70% utilization + WARNING = "warning" # 70-90% utilization + CRITICAL = "critical" # > 90% utilization + TRUNCATED = "truncated" # Content was truncated + + +class MetricLevel(Enum): + """Metric collection detail level""" + + BASIC = "basic" # Just essential metrics + DETAILED = "detailed" # Include timing and context analysis + DEBUG = "debug" # Full request/response logging + + +@dataclass +class TokenUsage: + """Token usage information""" + + input_tokens: int + output_tokens: int + total_tokens: int + + @property + def efficiency_ratio(self) -> float: + """Output tokens per input token (higher = more efficient)""" + return self.output_tokens / self.input_tokens if self.input_tokens > 0 else 0.0 + + +@dataclass +class ContextAnalysis: + """Context window analysis and optimization info""" + + total_messages: int + context_tokens: int + max_context_tokens: int + utilization_percent: float + status: ContextStatus + truncated_messages: int = 0 + optimization_suggestions: list[str] = None + + def __post_init__(self): + if self.optimization_suggestions is None: + self.optimization_suggestions = [] + + +@dataclass +class LLMMetrics: + """Comprehensive LLM interaction metrics""" + + # Request metadata + timestamp: datetime + conversation_id: str + provider: str + model: str + + # Token usage + token_usage: TokenUsage + + # Context analysis + context_analysis: ContextAnalysis + + # Performance + request_start_time: datetime + response_complete_time: datetime + + # Request details + message_count: int + response_length: int + + # Optional fields with defaults + time_to_first_token: float | None = None # For streaming responses + temperature: float = 0.0 + max_tokens: int = 0 + success: bool = True + error_message: str | None = None + + @property + def total_response_time(self) -> float: + """Total time from request start to completion (seconds)""" + return (self.response_complete_time - self.request_start_time).total_seconds() + + @property + def tokens_per_second(self) -> float: + """Output tokens generated per second""" + if self.total_response_time > 0 and self.token_usage.output_tokens > 0: + return self.token_usage.output_tokens / self.total_response_time + return 0.0 + + +class MetricsCollector: + """Collects and manages LLM interaction metrics""" + + def __init__( + self, level: MetricLevel = MetricLevel.BASIC, debug_log_path: Path | None = None + ): + self.level = level + self.debug_log_path = debug_log_path + self.logger = logging.getLogger(__name__) + + # Initialize debug logging if requested + if debug_log_path and level == MetricLevel.DEBUG: + self._setup_debug_logging() + + def _setup_debug_logging(self): + """Setup debug file logging for LLM interactions""" + debug_handler = logging.FileHandler(self.debug_log_path) + debug_handler.setLevel(logging.DEBUG) + debug_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + debug_handler.setFormatter(debug_formatter) + + # Create debug logger specifically for LLM interactions + self.debug_logger = logging.getLogger(f"{__name__}.debug") + self.debug_logger.setLevel(logging.DEBUG) + self.debug_logger.addHandler(debug_handler) + self.debug_logger.propagate = False + + def log_request( + self, provider: str, model: str, messages: list[dict[str, str]], **kwargs + ) -> None: + """Log LLM request details""" + if self.level != MetricLevel.DEBUG: + return + + request_data = { + "type": "request", + "timestamp": datetime.now().isoformat(), + "provider": provider, + "model": model, + "message_count": len(messages), + "messages": ( + messages + if self.level == MetricLevel.DEBUG + else [ + {"role": m.get("role"), "content_length": len(m.get("content", ""))} + for m in messages + ] + ), + "parameters": kwargs, + } + + if hasattr(self, "debug_logger"): + self.debug_logger.debug( + f"LLM_REQUEST: {json.dumps(request_data, indent=2, default=str)}" + ) + + def log_response( + self, provider: str, model: str, response: str, metrics: LLMMetrics + ) -> None: + """Log LLM response details""" + if self.level != MetricLevel.DEBUG: + return + + response_data = { + "type": "response", + "timestamp": datetime.now().isoformat(), + "provider": provider, + "model": model, + "response_length": len(response), + "response": ( + response + if self.level == MetricLevel.DEBUG + else f"[{len(response)} characters]" + ), + "metrics": asdict(metrics), + } + + if hasattr(self, "debug_logger"): + self.debug_logger.debug( + f"LLM_RESPONSE: {json.dumps(response_data, indent=2, default=str)}" + ) + + def analyze_context( + self, + messages: list[dict[str, str]], + max_context_tokens: int, + estimated_tokens: int | None = None, + ) -> ContextAnalysis: + """Analyze context window usage and provide optimization suggestions""" + + # Estimate tokens if not provided + if estimated_tokens is None: + estimated_tokens = self._estimate_tokens(messages) + + utilization = (estimated_tokens / max_context_tokens) * 100 + + # Determine status + if utilization < 70: + status = ContextStatus.OPTIMAL + elif utilization < 90: + status = ContextStatus.WARNING + else: + status = ContextStatus.CRITICAL + + # Generate optimization suggestions + suggestions = [] + if utilization > 80: + suggestions.append("Consider summarizing older messages") + if utilization > 90: + suggestions.append("Context window nearly full - truncation likely") + if len(messages) > 20: + suggestions.append("Consider conversation pruning") + + return ContextAnalysis( + total_messages=len(messages), + context_tokens=estimated_tokens, + max_context_tokens=max_context_tokens, + utilization_percent=utilization, + status=status, + optimization_suggestions=suggestions, + ) + + def _estimate_tokens(self, messages: list[dict[str, str]]) -> int: + """Rough token estimation for context analysis""" + total_chars = sum(len(msg.get("content", "")) for msg in messages) + # Rough estimate: ~4 characters per token for English text + return int(total_chars / 4) + + def create_metrics( + self, + conversation_id: str, + provider: str, + model: str, + messages: list[dict[str, str]], + response: str, + request_start: datetime, + response_complete: datetime, + context_analysis: ContextAnalysis, + input_tokens: int = 0, + output_tokens: int = 0, + time_to_first_token: float | None = None, + **kwargs, + ) -> LLMMetrics: + """Create comprehensive metrics for an LLM interaction""" + + token_usage = TokenUsage( + input_tokens=input_tokens or self._estimate_tokens(messages), + output_tokens=output_tokens + or self._estimate_tokens([{"content": response}]), + total_tokens=(input_tokens or 0) + (output_tokens or 0), + ) + + return LLMMetrics( + timestamp=datetime.now(), + conversation_id=conversation_id, + provider=provider, + model=model, + token_usage=token_usage, + context_analysis=context_analysis, + request_start_time=request_start, + response_complete_time=response_complete, + time_to_first_token=time_to_first_token, + message_count=len(messages), + response_length=len(response), + temperature=kwargs.get("temperature", 0.0), + max_tokens=kwargs.get("max_tokens", 0), + success=True, + ) + + def print_context_warning(self, analysis: ContextAnalysis) -> None: + """Print context utilization warnings to console""" + if analysis.status == ContextStatus.WARNING: + self.logger.warning( + f"Context window {analysis.utilization_percent:.1f}% full " + f"({analysis.context_tokens}/{analysis.max_context_tokens} tokens)" + ) + elif analysis.status == ContextStatus.CRITICAL: + self.logger.error( + f"Context window {analysis.utilization_percent:.1f}% full - " + f"truncation likely ({analysis.context_tokens}/{analysis.max_context_tokens} tokens)" + ) + + if analysis.optimization_suggestions: + for suggestion in analysis.optimization_suggestions: + self.logger.info(f"💡 Optimization: {suggestion}") + + +# Global metrics collector instance +_metrics_collector: MetricsCollector | None = None + + +def get_metrics_collector() -> MetricsCollector: + """Get or create the global metrics collector""" + global _metrics_collector + if _metrics_collector is None: + _metrics_collector = MetricsCollector() + return _metrics_collector + + +def configure_metrics( + level: MetricLevel = MetricLevel.BASIC, debug_log_path: Path | None = None +) -> None: + """Configure global metrics collection""" + global _metrics_collector + _metrics_collector = MetricsCollector(level=level, debug_log_path=debug_log_path) diff --git a/nova/models/config.py b/nova/models/config.py index 222ab76..1e709ff 100644 --- a/nova/models/config.py +++ b/nova/models/config.py @@ -5,6 +5,32 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator +class MonitoringConfig(BaseModel): + """Configuration for LLM monitoring and metrics""" + + enabled: bool = Field(default=True, description="Enable LLM metrics collection") + level: str = Field( + default="basic", description="Monitoring level: basic, detailed, debug" + ) + debug_log_file: Path | None = Field( + default=None, description="File path for debug logging (debug level only)" + ) + context_warnings: bool = Field( + default=True, description="Show context window utilization warnings" + ) + performance_metrics: bool = Field( + default=True, description="Collect response time and performance metrics" + ) + + @field_validator("level") + @classmethod + def validate_level(cls, v: str) -> str: + allowed_levels = {"basic", "detailed", "debug"} + if v not in allowed_levels: + raise ValueError(f"Level must be one of: {', '.join(allowed_levels)}") + return v + + class AIModelConfig(BaseModel): """Configuration for AI model settings""" @@ -111,6 +137,7 @@ class NovaConfig(BaseModel): chat: ChatConfig = Field(default_factory=ChatConfig) search: SearchConfig = Field(default_factory=SearchConfig) + monitoring: MonitoringConfig = Field(default_factory=MonitoringConfig) profiles: dict[str, AIProfile] = Field( default_factory=dict, description="Named AI profiles" ) diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py new file mode 100644 index 0000000..d207e19 --- /dev/null +++ b/tests/unit/test_metrics.py @@ -0,0 +1,286 @@ +"""Tests for LLM metrics collection system""" + +import tempfile +from datetime import datetime +from pathlib import Path + +from nova.core.metrics import ( + ContextAnalysis, + ContextStatus, + LLMMetrics, + MetricLevel, + MetricsCollector, + TokenUsage, + configure_metrics, + get_metrics_collector, +) + + +class TestTokenUsage: + """Test token usage calculations""" + + def test_token_usage_creation(self): + """Test creating token usage""" + usage = TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150) + + assert usage.input_tokens == 100 + assert usage.output_tokens == 50 + assert usage.total_tokens == 150 + + def test_efficiency_ratio(self): + """Test efficiency ratio calculation""" + usage = TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150) + assert usage.efficiency_ratio == 0.5 + + # Test with zero input tokens + usage_zero = TokenUsage(input_tokens=0, output_tokens=50, total_tokens=50) + assert usage_zero.efficiency_ratio == 0.0 + + +class TestContextAnalysis: + """Test context analysis functionality""" + + def test_context_analysis_creation(self): + """Test creating context analysis""" + analysis = ContextAnalysis( + total_messages=10, + context_tokens=1000, + max_context_tokens=4096, + utilization_percent=24.4, + status=ContextStatus.OPTIMAL, + optimization_suggestions=["Test suggestion"], + ) + + assert analysis.total_messages == 10 + assert analysis.context_tokens == 1000 + assert analysis.utilization_percent == 24.4 + assert analysis.status == ContextStatus.OPTIMAL + assert "Test suggestion" in analysis.optimization_suggestions + + def test_default_optimization_suggestions(self): + """Test default empty optimization suggestions""" + analysis = ContextAnalysis( + total_messages=5, + context_tokens=500, + max_context_tokens=4096, + utilization_percent=12.2, + status=ContextStatus.OPTIMAL, + ) + + assert analysis.optimization_suggestions == [] + + +class TestMetricsCollector: + """Test metrics collector functionality""" + + def test_basic_metrics_collector(self): + """Test basic metrics collector initialization""" + collector = MetricsCollector(level=MetricLevel.BASIC) + assert collector.level == MetricLevel.BASIC + assert collector.debug_log_path is None + + def test_debug_metrics_collector(self): + """Test debug metrics collector with log file""" + with tempfile.TemporaryDirectory() as temp_dir: + log_path = Path(temp_dir) / "debug.log" + collector = MetricsCollector( + level=MetricLevel.DEBUG, debug_log_path=log_path + ) + + assert collector.level == MetricLevel.DEBUG + assert collector.debug_log_path == log_path + + def test_token_estimation(self): + """Test rough token estimation""" + collector = MetricsCollector() + + messages = [ + {"role": "user", "content": "Hello world! How are you today?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, + ] + + estimated = collector._estimate_tokens(messages) + assert estimated > 0 + # Rough estimate should be reasonable + assert 10 < estimated < 50 + + def test_context_analysis(self): + """Test context window analysis""" + collector = MetricsCollector() + + messages = [{"role": "user", "content": "Short message"}] + max_tokens = 4096 + + analysis = collector.analyze_context(messages, max_tokens) + + assert analysis.total_messages == 1 + assert analysis.context_tokens > 0 + assert analysis.max_context_tokens == max_tokens + assert analysis.utilization_percent >= 0 + assert analysis.status == ContextStatus.OPTIMAL + + def test_context_warning_status(self): + """Test context status determination""" + collector = MetricsCollector() + + # Test optimal status (low utilization) + messages_low = [{"role": "user", "content": "Short"}] + analysis_low = collector.analyze_context(messages_low, 1000) + assert analysis_low.status == ContextStatus.OPTIMAL + + # Test warning status (high utilization) + long_content = "x" * 3000 # Long message to trigger high utilization + messages_high = [{"role": "user", "content": long_content}] + analysis_high = collector.analyze_context(messages_high, 1000) + assert analysis_high.status in [ContextStatus.WARNING, ContextStatus.CRITICAL] + + def test_optimization_suggestions(self): + """Test optimization suggestions generation""" + collector = MetricsCollector() + + # Create scenario with many messages and high token usage + messages = [{"role": "user", "content": "x" * 100}] * 25 # 25 messages + analysis = collector.analyze_context(messages, 1000) + + suggestions = analysis.optimization_suggestions + assert len(suggestions) > 0 + + # Should suggest conversation pruning for many messages + assert any("conversation pruning" in s.lower() for s in suggestions) + + def test_create_metrics(self): + """Test comprehensive metrics creation""" + collector = MetricsCollector() + + messages = [{"role": "user", "content": "Test message"}] + response = "Test response" + start_time = datetime.now() + end_time = datetime.now() + + context_analysis = ContextAnalysis( + total_messages=1, + context_tokens=10, + max_context_tokens=4096, + utilization_percent=0.24, + status=ContextStatus.OPTIMAL, + ) + + metrics = collector.create_metrics( + conversation_id="test-123", + provider="openai", + model="gpt-4", + messages=messages, + response=response, + request_start=start_time, + response_complete=end_time, + context_analysis=context_analysis, + input_tokens=10, + output_tokens=5, + temperature=0.7, + max_tokens=2000, + ) + + assert metrics.conversation_id == "test-123" + assert metrics.provider == "openai" + assert metrics.model == "gpt-4" + assert metrics.token_usage.input_tokens == 10 + assert metrics.token_usage.output_tokens == 5 + assert metrics.temperature == 0.7 + assert metrics.max_tokens == 2000 + assert metrics.success is True + + def test_metrics_performance_calculations(self): + """Test performance metric calculations""" + collector = MetricsCollector() + + start_time = datetime(2024, 1, 1, 12, 0, 0) + end_time = datetime(2024, 1, 1, 12, 0, 2) # 2 seconds later + + context_analysis = ContextAnalysis( + total_messages=1, + context_tokens=10, + max_context_tokens=4096, + utilization_percent=0.24, + status=ContextStatus.OPTIMAL, + ) + + metrics = collector.create_metrics( + conversation_id="test-perf", + provider="openai", + model="gpt-4", + messages=[{"role": "user", "content": "Test"}], + response="Response", + request_start=start_time, + response_complete=end_time, + context_analysis=context_analysis, + output_tokens=100, + ) + + assert metrics.total_response_time == 2.0 + assert metrics.tokens_per_second == 50.0 # 100 tokens / 2 seconds + + +class TestGlobalMetricsCollector: + """Test global metrics collector functions""" + + def test_get_metrics_collector(self): + """Test getting global metrics collector""" + collector = get_metrics_collector() + assert isinstance(collector, MetricsCollector) + + # Should return same instance on subsequent calls + collector2 = get_metrics_collector() + assert collector is collector2 + + def test_configure_metrics(self): + """Test configuring global metrics collector""" + with tempfile.TemporaryDirectory() as temp_dir: + log_path = Path(temp_dir) / "test.log" + + configure_metrics(level=MetricLevel.DEBUG, debug_log_path=log_path) + collector = get_metrics_collector() + + assert collector.level == MetricLevel.DEBUG + assert collector.debug_log_path == log_path + + +class TestLLMMetrics: + """Test LLM metrics dataclass""" + + def test_llm_metrics_creation(self): + """Test creating LLM metrics""" + token_usage = TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150) + + context_analysis = ContextAnalysis( + total_messages=5, + context_tokens=500, + max_context_tokens=4096, + utilization_percent=12.2, + status=ContextStatus.OPTIMAL, + ) + + start_time = datetime.now() + end_time = datetime.now() + + metrics = LLMMetrics( + timestamp=datetime.now(), + conversation_id="test-conv", + provider="openai", + model="gpt-4", + token_usage=token_usage, + context_analysis=context_analysis, + request_start_time=start_time, + response_complete_time=end_time, + message_count=2, + response_length=100, + temperature=0.7, + max_tokens=2000, + ) + + assert metrics.conversation_id == "test-conv" + assert metrics.provider == "openai" + assert metrics.model == "gpt-4" + assert metrics.token_usage == token_usage + assert metrics.context_analysis == context_analysis + assert metrics.message_count == 2 + assert metrics.response_length == 100