AgentOpt · chinganc · Jun 24, 2025 · Jun 16, 2025 · Jun 17, 2025 · Jun 21, 2025
diff --git a/opto/optimizers/optoprimemulti.py b/opto/optimizers/optoprimemulti.py
@@ -2,10 +2,9 @@
 import json
 from typing import List, Dict
 
-
-
 from opto.trace.propagators import GraphPropagator
 from opto.optimizers.optoprime import OptoPrime
+from opto.utils.llm import LLMFactory
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
@@ -19,6 +18,8 @@ def __init__(
         generation_technique: str = "temperature_variation",
         selection_technique: str = "best_of_n",
         experts_list: Optional[List[str]] = None,
+        llm_profiles: Optional[List[str]] = None,  # List of LLM profiles to use
+        llm_weights: Optional[List[float]] = None,  # Weights for each LLM (for weighted selection)
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
@@ -31,6 +32,44 @@ def __init__(
         self.selection_technique = selection_technique
         self.experts_list = experts_list
 
+        # NEW: Multiple LLM support
+        self.llm_profiles = llm_profiles
+        self.llm_weights = llm_weights or [1.0] * len(llm_profiles) if llm_profiles else None
+        self._llm_instances = {}  # Cache for LLM instances
+
+    def _get_llm_for_profile(self, profile: str = None):
+        """Get LLM instance for a profile, with caching."""
+        if profile is None:
+            return self.llm  # Use default LLM
+
+        if profile not in self._llm_instances:
+            try:
+                from opto.utils.llm import LLMFactory
+                self._llm_instances[profile] = LLMFactory.get_llm(profile)
+            except Exception as e:
+                # Fallback to default LLM if profile creation fails
+                import warnings
+                warnings.warn(f"Failed to create LLM for profile '{profile}': {e}. Using default LLM.")
+                return self.llm
+
+        return self._llm_instances[profile]
+
+    def _get_llms_for_generation(self, num_responses: int):
+        """Get list of LLMs to use for generation."""
+        if self.llm_profiles is None or len(self.llm_profiles) == 0:
+            # Fallback to single LLM (existing behavior)
+            return [self.llm] * num_responses
+
+        # Distribute responses across multiple LLMs
+        llms = []
+        for i in range(num_responses):
+            profile_idx = i % len(self.llm_profiles)
+            profile = self.llm_profiles[profile_idx]
+            llm = self._get_llm_for_profile(profile)
+            llms.append(llm)
+
+        return llms
+
     def call_llm(
         self,
         system_prompt: str,
@@ -39,20 +78,24 @@ def call_llm(
         max_tokens: int = 4096,
         num_responses: int = 1,
         temperature: float = 0.0,
+        llm = None,  # NEW: Optional specific LLM to use
     ) -> List[str]:
-        """Call the LLM with a prompt and return multiple responses."""
+        """Given a prompt, returns multiple candidate answers."""
         # if verbose not in (False, "output"):
         #     print("Prompt\n", system_prompt + user_prompt)
 
+        # Use provided LLM or fall back to default
+        active_llm = llm or self.llm
+
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt},
         ]
 
         try:
-            if hasattr(self.llm, "create"):
+            if hasattr(active_llm, "create"):
                 # Standard OpenAI/LangChain style
-                response = self.llm.create(
+                response = active_llm.create(
                     messages=messages,
                     response_format={"type": "json_object"},
                     max_tokens=max_tokens,
@@ -62,7 +105,7 @@ def call_llm(
             else:
                 # Fallback for LiteLLM (callable) or other interfaces
                 # e.g., LiteLLM(messages, max_tokens=…, n=…, temperature=…)
-                response = self.llm(
+                response = active_llm(
                     messages,
                     max_tokens=max_tokens,
                     n=num_responses,
@@ -165,6 +208,35 @@ def generate_candidates(
 
         generation_technique = generation_technique.lower()
 
+        if self.llm_profiles is not None and len(self.llm_profiles) > 0 and generation_technique == "multi_llm":
+            llms = self._get_llms_for_generation(num_responses)
+
+            # Prepare arguments for parallel execution
+            arg_dicts = []
+            for i, llm in enumerate(llms):
+                profile_name = self.llm_profiles[i % len(self.llm_profiles)] if self.llm_profiles else "default"
+                modified_system_prompt = f"{system_prompt}\n\n[Using {profile_name} model for diverse perspective]"
+
+                arg_dicts.append(dict(
+                    system_prompt=modified_system_prompt,
+                    user_prompt=user_prompt,
+                    verbose=verbose,
+                    max_tokens=max_tokens,
+                    num_responses=1,
+                    temperature=temp_min,
+                    llm=llm  # Use specific LLM
+                ))
+
+            # Execute in parallel
+            try:
+                parallel_results = self._parallel_call_llm(arg_dicts)
+                candidates.extend(parallel_results)
+            except Exception as e:
+                if verbose:
+                    print(f"Error in multi_llm mode: {e} – falling back to temperature variation")
+                generation_technique = "temperature_variation"
+                candidates = []
+
         if generation_technique == "self_refinement":
             # Generate solutions by refining previous ones
             for i in range(num_responses):
@@ -179,7 +251,7 @@ def generate_candidates(
                     verbose=verbose,
                     max_tokens=max_tokens,
                     num_responses=1,
-                    temperature=0.0,
+                    temperature=temp_min,
                 )
 
                 if response and len(response) > 0:
@@ -195,15 +267,15 @@ def generate_candidates(
                         f"CANDIDATE {idx + 1}: <<<\n{cand}\n>>>"
                         for idx, cand in enumerate(candidates)
                     )
-                    meta_prompt = f"{system_prompt}\nGiven the following candidate solutions, propose a new alternative optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"
+                    meta_prompt = f"{system_prompt}\nGiven the following prior CANDIDATE solutions, answer with a very different new CANDIDATE optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"
 
                 response = self.call_llm(
                     system_prompt=meta_prompt,
                     user_prompt=user_prompt,
                     verbose=verbose,
                     max_tokens=max_tokens,
                     num_responses=1,
-                    temperature=0.0,
+                    temperature=temp_min,
                 )
 
                 if response and len(response) > 0:
@@ -292,7 +364,7 @@ def generate_candidates(
             print("Warning: Failed to generate any candidates")
 
         if self.log is not None:
-            self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique})
+            self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique, "llm_profiles": self.llm_profiles})
             # only build a problem instance if we actually have one
             pi = self.problem_instance(summary) if summary is not None else {}
             self.summary_log.append({"problem_instance": pi, "summary": summary})

diff --git a/opto/utils/llm.py b/opto/utils/llm.py
@@ -239,6 +239,80 @@ def create(self, **config: Any):
     "CustomLLM": CustomLLM,
 }
 
+class LLMFactory:
+    """Factory for creating LLM instances with predefined profiles.
+
+    The code comes with these built-in profiles:
+
+        llm_default = LLM(profile="default")     # gpt-4o-mini
+        llm_premium = LLM(profile="premium")     # gpt-4  
+        llm_cheap = LLM(profile="cheap")         # gpt-4o-mini
+        llm_fast = LLM(profile="fast")           # gpt-3.5-turbo-mini
+        llm_reasoning = LLM(profile="reasoning") # o1-mini
+
+    You can override those built-in profiles:
+
+        LLMFactory.register_profile("default", "LiteLLM", model="gpt-4o", temperature=0.5)
+        LLMFactory.register_profile("premium", "LiteLLM", model="o1-preview", max_tokens=8000)
+        LLMFactory.register_profile("cheap", "LiteLLM", model="gpt-3.5-turbo", temperature=0.9)
+        LLMFactory.register_profile("fast", "LiteLLM", model="gpt-3.5-turbo", max_tokens=500)
+        LLMFactory.register_profile("reasoning", "LiteLLM", model="o1-preview")
+
+    An Example of using Different Backends
+
+        # Register custom profiles for different use cases
+        LLMFactory.register_profile("advanced_reasoning", "LiteLLM", model="o1-preview", max_tokens=4000)
+        LLMFactory.register_profile("claude_sonnet", "LiteLLM", model="claude-3-5-sonnet-latest", temperature=0.3)
+        LLMFactory.register_profile("custom_server", "CustomLLM", model="llama-3.1-8b")
+
+        # Use in different contexts
+        reasoning_llm = LLM(profile="advanced_reasoning")  # For complex reasoning
+        claude_llm = LLM(profile="claude_sonnet")          # For Claude responses
+        local_llm = LLM(profile="custom_server")           # For local deployment
+
+        # Single LLM optimizer with custom profile
+        optimizer1 = OptoPrime(parameters, llm=LLM(profile="advanced_reasoning"))
+
+        # Multi-LLM optimizer with multiple profiles
+        optimizer2 = OptoPrimeMulti(parameters, llm_profiles=["cheap", "premium", "claude_sonnet"], generation_technique="multi_llm")
+    """
+
+    # Default profiles for different use cases
+    _profiles = {
+        'default': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
+        'premium': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4'}},
+        'cheap': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
+        'fast': {'backend': 'LiteLLM', 'params': {'model': 'gpt-3.5-turbo-mini'}},
+        'reasoning': {'backend': 'LiteLLM', 'params': {'model': 'o1-mini'}},
+    }
+
+    @classmethod
+    def get_llm(cls, profile: str = 'default') -> AbstractModel:
+        """Get an LLM instance for the specified profile."""
+        if profile not in cls._profiles:
+            raise ValueError(f"Unknown profile '{profile}'. Available profiles: {list(cls._profiles.keys())}")
+
+        config = cls._profiles[profile]
+        backend_cls = _LLM_REGISTRY[config['backend']]
+        return backend_cls(**config['params'])
+
+    @classmethod
+    def register_profile(cls, name: str, backend: str, **params):
+        """Register a new LLM profile."""
+        cls._profiles[name] = {'backend': backend, 'params': params}
+
+    @classmethod
+    def list_profiles(cls):
+        """List all available profiles."""
+        return list(cls._profiles.keys())
+
+    @classmethod
+    def get_profile_info(cls, profile: str = None):
+        """Get information about a profile or all profiles."""
+        if profile:
+            return cls._profiles.get(profile)
+        return cls._profiles
+
 class LLM:
     """
     A unified entry point for all supported LLM backends.
@@ -248,8 +322,15 @@ class LLM:
       llm = LLM()
       # or override explicitly
       llm = LLM(backend="AutoGen", config_list=my_configs)
+      # or use predefined profiles
+      llm = LLM(profile="premium")  # Use premium model
+      llm = LLM(profile="cheap")    # Use cheaper model
+      llm = LLM(profile="reasoning")    # Use reasoning/thinking model
     """
-    def __new__(cls, *args, backend: str = None, **kwargs):
+    def __new__(cls, *args, profile: str = None, backend: str = None, **kwargs):
+        # New: if profile is specified, use LLMFactory
+        if profile:
+            return LLMFactory.get_llm(profile)
         # Decide which backend to use
         name = backend or os.getenv("TRACE_DEFAULT_LLM_BACKEND", "LiteLLM")
         try: