From 26cf9b5a2f609af6ee9802218d00ce0e89964bc9 Mon Sep 17 00:00:00 2001 From: MeMe Date: Wed, 29 Oct 2025 06:42:54 +0000 Subject: [PATCH 1/3] **Multi-LLM-Backend Support**: Integrates with multiple LLM providers, including OpenAI and Google Gemini. --- config.py | 4 + src/cli/main.py | 37 ++-- src/core/llm_client.py | 333 ++++++++++++++++++++++++------------ src/core/query_processor.py | 4 +- src/web/app.py | 10 +- 5 files changed, 244 insertions(+), 144 deletions(-) diff --git a/config.py b/config.py index e17f811..7ba40e9 100644 --- a/config.py +++ b/config.py @@ -18,6 +18,8 @@ class Config: # LLM API Settings llm_api_url: Optional[str] = None llm_api_key: Optional[str] = None + gemini_api_key: Optional[str] = None + llm_provider: str = "openai" llm_model_name: str = "gpt-3.5-turbo" llm_temperature: float = 0.7 llm_max_tokens: int = 2000 @@ -53,6 +55,8 @@ def from_env(cls) -> 'Config': # LLM API Settings llm_api_url=os.getenv('LLM_API_URL'), llm_api_key=os.getenv('LLM_API_KEY'), + gemini_api_key=os.getenv('GEMINI_API_KEY'), + llm_provider=os.getenv('LLM_PROVIDER', 'openai'), llm_model_name=os.getenv('LLM_MODEL_NAME', 'gpt-3.5-turbo'), llm_temperature=float(os.getenv('LLM_TEMPERATURE', '0.7')), llm_max_tokens=int(os.getenv('LLM_MAX_TOKENS', '2000')), diff --git a/src/cli/main.py b/src/cli/main.py index 72b8cc0..8353de5 100644 --- a/src/cli/main.py +++ b/src/cli/main.py @@ -29,7 +29,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from core.model_parser import ModelParser -from core.llm_client import LLMClient, LLMConfig +from core.llm_client import get_llm_client from core.query_processor import QueryProcessor, QueryResult # Initialize Rich console for pretty output @@ -64,44 +64,27 @@ def from_env(cls): def setup_processor(config: CLIConfig) -> Optional[QueryProcessor]: """Initialize the query processor with given configuration""" try: - # Load models if config.verbose: console.print(f"[blue]Loading thinking models from {config.models_dir}...[/blue]") - + model_parser = ModelParser(config.models_dir) models = model_parser.load_all_models() - + if config.verbose: console.print(f"[green]✓ Loaded {len(models)} thinking models[/green]") - - # Setup LLM client - if not config.api_url: - console.print("[red]Error: LLM_API_URL environment variable must be set[/red]") - return None - - llm_config = LLMConfig( - api_url=config.api_url, - api_key=config.api_key, - model_name=config.model_name, - temperature=config.temperature, - max_tokens=config.max_tokens - ) - - llm_client = LLMClient(llm_config) - - # Test connection if in verbose mode + + # Get LLM client from factory + llm_client = get_llm_client() + if config.verbose: console.print("[blue]Testing LLM connection...[/blue]") if llm_client.test_connection(): console.print("[green]✓ LLM connection successful[/green]") else: console.print("[yellow]⚠ LLM connection test failed, but continuing anyway[/yellow]") - - # Create query processor - processor = QueryProcessor(model_parser, llm_client) - - return processor - + + return QueryProcessor(model_parser, llm_client) + except Exception as e: console.print(f"[red]Error initializing processor: {str(e)}[/red]") return None diff --git a/src/core/llm_client.py b/src/core/llm_client.py index 60700c4..f2b31a7 100644 --- a/src/core/llm_client.py +++ b/src/core/llm_client.py @@ -1,7 +1,7 @@ """ LLM Client for ThinkingModels -This module provides a client for interacting with OpenAI-compatible LLM APIs +This module provides a client for interacting with LLM APIs with proper error handling, retry logic, and configuration support. """ @@ -9,6 +9,7 @@ import time import json import requests +from abc import ABC, abstractmethod from typing import Optional, Dict, List, Any from dataclasses import dataclass import logging @@ -25,28 +26,30 @@ class LLMConfig: """ api_url: str api_key: Optional[str] = None + gemini_api_key: Optional[str] = None model_name: str = "gpt-3.5-turbo" temperature: float = 0.7 max_tokens: int = 2000 timeout: int = 30 max_retries: int = 3 retry_delay: float = 1.0 - + @classmethod def from_env(cls) -> 'LLMConfig': """ Create config from environment variables - + Returns: LLMConfig instance with values from environment """ api_url = os.getenv('LLM_API_URL') if not api_url: raise ValueError("LLM_API_URL environment variable must be set") - + return cls( api_url=api_url, api_key=os.getenv('LLM_API_KEY'), + gemini_api_key=os.getenv('GEMINI_API_KEY'), model_name=os.getenv('LLM_MODEL_NAME', 'gpt-3.5-turbo'), temperature=float(os.getenv('LLM_TEMPERATURE', '0.7')), max_tokens=int(os.getenv('LLM_MAX_TOKENS', '2000')), @@ -55,45 +58,66 @@ def from_env(cls) -> 'LLMConfig': retry_delay=float(os.getenv('LLM_RETRY_DELAY', '1.0')) ) +class BaseLLMClient(ABC): + """ + Abstract base class for LLM clients. + """ + def __init__(self, config: LLMConfig): + self.config = config + + @abstractmethod + def generate_response(self, prompt: str, system_prompt: Optional[str] = None) -> str: + """ + Generate a response from the LLM. + """ + pass + + @abstractmethod + def request_model_selection(self, query: str, available_models: List[Dict[str, Any]]) -> List[str]: + """ + Request LLM to select relevant thinking models for a query. + """ + pass + + @abstractmethod + def request_solution(self, query: str, selected_models: List[Dict[str, Any]]) -> str: + """ + Request LLM to solve a problem using selected thinking models. + """ + pass + + @abstractmethod + def test_connection(self) -> bool: + """ + Test the connection to the LLM API. + """ + pass -class LLMClient: + +class OpenAIClient(BaseLLMClient): """ - Client for interacting with OpenAI-compatible LLM APIs + Client for interacting with OpenAI-compatible LLM APIs. """ - def __init__(self, config: Optional[LLMConfig] = None): """ - Initialize the LLM client - - Args: - config: LLM configuration. If None, loads from environment + Initialize the OpenAI client. """ - self.config = config or LLMConfig.from_env() + super().__init__(config or LLMConfig.from_env()) self.session = requests.Session() - + # Set up headers self.session.headers.update({ 'Content-Type': 'application/json', 'User-Agent': 'ThinkingModels/1.0' }) - + # Add API key if provided if self.config.api_key: self.session.headers['Authorization'] = f'Bearer {self.config.api_key}' - + def _make_request(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]: """ - Make a request to the LLM API with retry logic - - Args: - messages: List of message dictionaries - **kwargs: Additional parameters for the API call - - Returns: - API response dictionary - - Raises: - RuntimeError: If all retry attempts fail + Make a request to the LLM API with retry logic. """ payload = { 'model': self.config.model_name, @@ -102,87 +126,59 @@ def _make_request(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, A 'max_tokens': self.config.max_tokens, **kwargs } - + last_error = None - + for attempt in range(self.config.max_retries): try: - logger.info(f"Making LLM API request (attempt {attempt + 1}/{self.config.max_retries})") - + logger.info(f"Making OpenAI API request (attempt {attempt + 1}/{self.config.max_retries})") + # Determine the correct endpoint if self.config.api_url.endswith('/v1') or self.config.api_url.endswith('/v1/'): endpoint = f"{self.config.api_url.rstrip('/')}/chat/completions" else: endpoint = f"{self.config.api_url.rstrip('/')}/v1/chat/completions" - + response = self.session.post( endpoint, json=payload, timeout=self.config.timeout ) - + response.raise_for_status() result = response.json() - - logger.info("LLM API request successful") + + logger.info("OpenAI API request successful") return result - + except requests.exceptions.RequestException as e: last_error = e logger.warning(f"API request failed (attempt {attempt + 1}): {str(e)}") - + if attempt < self.config.max_retries - 1: - time.sleep(self.config.retry_delay * (2 ** attempt)) # Exponential backoff - - raise RuntimeError(f"LLM API request failed after {self.config.max_retries} attempts. Last error: {last_error}") - + time.sleep(self.config.retry_delay * (2 ** attempt)) + + raise RuntimeError(f"OpenAI API request failed after {self.config.max_retries} attempts. Last error: {last_error}") + def _extract_content(self, response: Dict[str, Any]) -> str: """ - Extract content from LLM API response - - Args: - response: API response dictionary - - Returns: - Extracted content string + Extract content from OpenAI API response. """ try: return response['choices'][0]['message']['content'] except (KeyError, IndexError) as e: raise ValueError(f"Invalid API response format: {e}") - + def generate_response(self, prompt: str, system_prompt: Optional[str] = None) -> str: - """ - Generate a response from the LLM - - Args: - prompt: User prompt - system_prompt: Optional system prompt to set context - - Returns: - Generated response text - """ messages = [] - if system_prompt: messages.append({"role": "system", "content": system_prompt}) - messages.append({"role": "user", "content": prompt}) - + response = self._make_request(messages) return self._extract_content(response) - + def request_model_selection(self, query: str, available_models: List[Dict[str, Any]]) -> List[str]: - """ - Request LLM to select relevant thinking models for a query - - Args: - query: User's query - available_models: List of available model data (with id and definition) - - Returns: - List of selected model IDs - """ system_prompt = """ You are an expert at selecting relevant thinking models for problem-solving. Given a user query and a list of available thinking models with their definitions, select only those that are potentially helpful. @@ -190,12 +186,11 @@ def request_model_selection(self, query: str, available_models: List[Dict[str, A Your response should be a JSON list of model IDs, like: ["model1", "model2", "model3"] Select between 0-3 of the most relevant models. Only select a model when it's truly useful. If no model fits, select none. """ - - # Format available models with definitions + models_text = "" for model in available_models: models_text += f"**{model['id']}**: {model['definition'][:300]}...\n\n" - + user_prompt = f""" User Query: {query} @@ -204,49 +199,34 @@ def request_model_selection(self, query: str, available_models: List[Dict[str, A Select the most relevant thinking models for this query: """ - + response = self.generate_response(user_prompt, system_prompt) - - # Try to parse the JSON response + try: - # Clean up the response to extract JSON response = response.strip() if response.startswith('```json'): response = response[7:-3].strip() elif response.startswith('```'): response = response[3:-3].strip() - + selected_models = json.loads(response) - - # Validate that selected models are in available models + available_model_ids = [model['id'] for model in available_models] valid_models = [model for model in selected_models if model in available_model_ids] - - # Limit to maximum 3 models + valid_models = valid_models[:3] - + if not valid_models: logger.warning("No valid models selected, returning empty list") return [] - + return valid_models - + except json.JSONDecodeError: logger.warning(f"Failed to parse model selection response: {response}") - # Fallback: return empty list if parsing fails return [] - + def request_solution(self, query: str, selected_models: List[Dict[str, Any]]) -> str: - """ - Request LLM to solve a problem using selected thinking models - - Args: - query: User's query - selected_models: List of selected thinking model data - - Returns: - Generated solution - """ system_prompt = """ You are an expert problem solver. You have been provided with thinking models that may assist in solving a user's query. Only use these thinking models as guidance if they are helpful. Otherwise, feel free to ignore them. @@ -258,20 +238,19 @@ def request_solution(self, query: str, selected_models: List[Dict[str, Any]]) -> Provide a clear, structured response that demonstrates thoughtful application when relevant, but don't force their use. """ - - # Format the thinking models for the prompt + models_text = "" for i, model in enumerate(selected_models, 1): models_text += f""" {i}. **{model['id']}** ({model['type']}) Definition: {model['definition']} - + Examples: """ for j, example in enumerate(model['examples'], 1): models_text += f" {j}. {example[:200]}...\n" models_text += "\n" - + user_prompt = f""" User Query: {query} @@ -280,20 +259,154 @@ def request_solution(self, query: str, selected_models: List[Dict[str, Any]]) -> Using these thinking models as guidance, provide a comprehensive solution to the user's query: """ - + return self.generate_response(user_prompt, system_prompt) - + def test_connection(self) -> bool: + try: + response = self.generate_response("Hello, please respond with 'OK' if you can hear me.") + return "OK" in response or "ok" in response.lower() + except Exception as e: + logger.error(f"Connection test failed: {e}") + return False + +import google.generativeai as genai + +class GeminiClient(BaseLLMClient): + """ + Client for interacting with Google Gemini LLM API. + """ + def __init__(self, config: Optional[LLMConfig] = None): """ - Test the connection to the LLM API - - Returns: - True if connection successful, False otherwise + Initialize the Gemini client. + """ + super().__init__(config or LLMConfig.from_env()) + if not self.config.gemini_api_key: + raise ValueError("GEMINI_API_KEY must be set for GeminiClient") + genai.configure(api_key=self.config.gemini_api_key) + self.model = genai.GenerativeModel(self.config.model_name) + + def generate_response(self, prompt: str, system_prompt: Optional[str] = None) -> str: + """ + Generate a response from the Gemini LLM. + Note: Gemini API has a different way of handling system prompts. + It's usually part of the initial conversation history. """ + full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt + + try: + logger.info(f"Making Gemini API request with model {self.config.model_name}") + response = self.model.generate_content(full_prompt) + logger.info("Gemini API request successful") + return response.text + except Exception as e: + logger.error(f"Gemini API request failed: {e}") + raise RuntimeError(f"Gemini API request failed: {e}") + + def request_model_selection(self, query: str, available_models: List[Dict[str, Any]]) -> List[str]: + # This implementation can be largely the same as OpenAI's, + # as the logic for prompt creation is abstract enough. + system_prompt = """ +You are an expert at selecting relevant thinking models for problem-solving. +Given a user query and a list of available thinking models with their definitions, select only those that are potentially helpful. + +Your response should be a JSON list of model IDs, like: ["model1", "model2", "model3"] +Select between 0-3 of the most relevant models. Only select a model when it's truly useful. If no model fits, select none. +""" + models_text = "" + for model in available_models: + models_text += f"**{model['id']}**: {model['definition'][:300]}...\n\n" + + user_prompt = f""" +User Query: {query} + +Available Thinking Models: +{models_text} + +Select the most relevant thinking models for this query: +""" + response_text = self.generate_response(user_prompt, system_prompt) + + try: + response_text = response_text.strip() + if response_text.startswith('```json'): + response_text = response_text[7:-3].strip() + elif response_text.startswith('```'): + response_text = response_text[3:-3].strip() + + selected_models = json.loads(response_text) + + available_model_ids = [m['id'] for m in available_models] + valid_models = [m for m in selected_models if m in available_model_ids][:3] + + if not valid_models: + logger.warning("No valid models selected by Gemini, returning empty list") + return valid_models + except (json.JSONDecodeError, TypeError): + logger.warning(f"Failed to parse model selection response from Gemini: {response_text}") + return [] + + def request_solution(self, query: str, selected_models: List[Dict[str, Any]]) -> str: + system_prompt = """ +You are an expert problem solver. You have been provided with thinking models that may assist in solving a user's query. +Only use these thinking models as guidance if they are helpful. Otherwise, feel free to ignore them. +Provide a clear, structured response. +""" + models_text = "" + for i, model in enumerate(selected_models, 1): + models_text += f"\n{i}. **{model['id']}** ({model['type']}): {model['definition']}" + + user_prompt = f""" +User Query: {query} + +Relevant Thinking Models: +{models_text} + +Using these thinking models as guidance, provide a comprehensive solution to the user's query: +""" + return self.generate_response(user_prompt, system_prompt) + + def test_connection(self) -> bool: try: response = self.generate_response("Hello, please respond with 'OK' if you can hear me.") return "OK" in response or "ok" in response.lower() except Exception as e: - logger.error(f"Connection test failed: {e}") + logger.error(f"Gemini connection test failed: {e}") return False +from config import get_config + +def get_llm_client() -> BaseLLMClient: + """ + Factory function to get the appropriate LLM client based on configuration. + """ + config = get_config() + + if config.llm_provider.lower() == 'gemini': + # For Gemini, the API key is passed differently + gemini_config = LLMConfig( + api_url="", # Not used by Gemini SDK + gemini_api_key=config.gemini_api_key, # Correctly assign to gemini_api_key + api_key=None, # Explicitly set OpenAI key to None + model_name=config.llm_model_name, + temperature=config.llm_temperature, + max_tokens=config.llm_max_tokens + ) + return GeminiClient(config=gemini_config) + + + # Default to OpenAI + openai_config = LLMConfig( + api_url=config.llm_api_url, + api_key=config.llm_api_key, + gemini_api_key=None, # Explicitly null for OpenAI + model_name=config.llm_model_name, + temperature=config.llm_temperature, + max_tokens=config.llm_max_tokens, + timeout=config.llm_timeout, + max_retries=config.llm_max_retries, + retry_delay=config.llm_retry_delay + ) + return OpenAIClient(config=openai_config) + + diff --git a/src/core/query_processor.py b/src/core/query_processor.py index 3372647..254adf6 100644 --- a/src/core/query_processor.py +++ b/src/core/query_processor.py @@ -8,7 +8,7 @@ from typing import Dict, List, Any, Optional from dataclasses import dataclass from src.core.model_parser import ModelParser, ThinkingModel -from src.core.llm_client import LLMClient +from src.core.llm_client import get_llm_client, BaseLLMClient import logging import time @@ -36,7 +36,7 @@ class QueryProcessor: - Parsing and validating LLM responses """ - def __init__(self, model_parser: ModelParser, llm_client: LLMClient): + def __init__(self, model_parser: ModelParser, llm_client: BaseLLMClient): self.model_parser = model_parser self.llm_client = llm_client diff --git a/src/web/app.py b/src/web/app.py index a65fca6..dcd3266 100644 --- a/src/web/app.py +++ b/src/web/app.py @@ -26,7 +26,7 @@ # Add parent directories to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from core.model_parser import ModelParser -from core.llm_client import LLMClient, LLMConfig +from core.llm_client import get_llm_client from core.query_processor import QueryProcessor, QueryResult # Set up logging @@ -133,10 +133,10 @@ async def startup_event(): # Initialize LLM client (if configured) api_url = os.getenv('LLM_API_URL') - if api_url: - llm_config = LLMConfig.from_env() - llm_client = LLMClient(llm_config) - + gemini_api_key = os.getenv('GEMINI_API_KEY') + if api_url or gemini_api_key: + llm_client = get_llm_client() + # Initialize query processor query_processor = QueryProcessor(model_parser, llm_client) logger.info("Query processor initialized with LLM client") From e98cf6346b0d7888c3294402e90374ed3c73074e Mon Sep 17 00:00:00 2001 From: MeMe Date: Wed, 29 Oct 2025 06:48:28 +0000 Subject: [PATCH 2/3] Update readme --- README.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index febbe48..01abb2a 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ This two-phase approach ensures that the solutions are not just generic LLM resp - **140 Thinking Models**: A comprehensive library of thinking models, from SWOT analysis to second-order thinking. - **Two-Phase Query Processing**: Enhances LLM responses with structured problem-solving methodologies. -- **OpenAI-Compatible API**: Integrates with any OpenAI-compatible LLM API. +- **Multi-LLM-Backend Support**: Integrates with multiple LLM providers, including OpenAI and Google Gemini. - **CLI & Web Interfaces**: Access the system through a command-line interface or a modern web application. - **Real-time Updates**: Get live feedback during query processing via WebSockets. - **Model Browser**: Explore, search, and filter thinking models through the web UI. @@ -60,7 +60,7 @@ This two-phase approach ensures that the solutions are not just generic LLM resp - Python 3.8+ - `pip` for package management -- An OpenAI-compatible LLM API endpoint +- An API endpoint and key for an OpenAI-compatible service or Google Gemini. ### Installation @@ -79,9 +79,13 @@ This two-phase approach ensures that the solutions are not just generic LLM resp 3. **Configure your LLM API:** - Create a `.env` file in the project root and add your API credentials: + Create a `.env` file in the project root. Below are examples for configuring OpenAI and Gemini providers. + **For OpenAI-Compatible APIs:** ```env + # Set the provider to openai (this is the default) + LLM_PROVIDER=openai + # Required LLM_API_URL=https://your-llm-api-endpoint.com @@ -90,6 +94,18 @@ This two-phase approach ensures that the solutions are not just generic LLM resp LLM_MODEL_NAME=gpt-3.5-turbo ``` + **For Google Gemini:** + ```env + # Set the provider to gemini + LLM_PROVIDER=gemini + + # Required - your Gemini API key + GEMINI_API_KEY=your-gemini-api-key + + # Optional - specify a Gemini model + LLM_MODEL_NAME=gemini-1.5-flash + ``` + --- ## Usage From c90c473e0414d36cd9e23e304910d45cf87a7527 Mon Sep 17 00:00:00 2001 From: MeMe Date: Wed, 29 Oct 2025 06:56:26 +0000 Subject: [PATCH 3/3] Update requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6a9be37..1959c30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # Core dependencies requests>=2.28.0 dataclasses-json>=0.5.7 +google-generativeai>=0.8.5 # CLI dependencies click>=8.0.0 @@ -9,7 +10,7 @@ rich>=13.0.0 # Web dependencies fastapi>=0.100.0 -uvicorn>=0.23.0 +uvicorn[standard]>=0.23.0 jinja2>=3.0.0 python-multipart>=0.0.5