Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 82 additions & 10 deletions opto/optimizers/optoprimemulti.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
import json
from typing import List, Dict



from opto.trace.propagators import GraphPropagator
from opto.optimizers.optoprime import OptoPrime
from opto.utils.llm import LLMFactory

from concurrent.futures import ThreadPoolExecutor, as_completed

Expand All @@ -19,6 +18,8 @@ def __init__(
generation_technique: str = "temperature_variation",
selection_technique: str = "best_of_n",
experts_list: Optional[List[str]] = None,
llm_profiles: Optional[List[str]] = None, # List of LLM profiles to use
llm_weights: Optional[List[float]] = None, # Weights for each LLM (for weighted selection)
**kwargs,
):
super().__init__(*args, **kwargs)
Expand All @@ -31,6 +32,44 @@ def __init__(
self.selection_technique = selection_technique
self.experts_list = experts_list

# NEW: Multiple LLM support
self.llm_profiles = llm_profiles
self.llm_weights = llm_weights or [1.0] * len(llm_profiles) if llm_profiles else None
self._llm_instances = {} # Cache for LLM instances

def _get_llm_for_profile(self, profile: str = None):
"""Get LLM instance for a profile, with caching."""
if profile is None:
return self.llm # Use default LLM

if profile not in self._llm_instances:
try:
from opto.utils.llm import LLMFactory
self._llm_instances[profile] = LLMFactory.get_llm(profile)
except Exception as e:
# Fallback to default LLM if profile creation fails
import warnings
warnings.warn(f"Failed to create LLM for profile '{profile}': {e}. Using default LLM.")
return self.llm

return self._llm_instances[profile]

def _get_llms_for_generation(self, num_responses: int):
"""Get list of LLMs to use for generation."""
if self.llm_profiles is None or len(self.llm_profiles) == 0:
# Fallback to single LLM (existing behavior)
return [self.llm] * num_responses

# Distribute responses across multiple LLMs
llms = []
for i in range(num_responses):
profile_idx = i % len(self.llm_profiles)
profile = self.llm_profiles[profile_idx]
llm = self._get_llm_for_profile(profile)
llms.append(llm)

return llms

def call_llm(
self,
system_prompt: str,
Expand All @@ -39,20 +78,24 @@ def call_llm(
max_tokens: int = 4096,
num_responses: int = 1,
temperature: float = 0.0,
llm = None, # NEW: Optional specific LLM to use
) -> List[str]:
"""Call the LLM with a prompt and return multiple responses."""
"""Given a prompt, returns multiple candidate answers."""
# if verbose not in (False, "output"):
# print("Prompt\n", system_prompt + user_prompt)

# Use provided LLM or fall back to default
active_llm = llm or self.llm

messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]

try:
if hasattr(self.llm, "create"):
if hasattr(active_llm, "create"):
# Standard OpenAI/LangChain style
response = self.llm.create(
response = active_llm.create(
messages=messages,
response_format={"type": "json_object"},
max_tokens=max_tokens,
Expand All @@ -62,7 +105,7 @@ def call_llm(
else:
# Fallback for LiteLLM (callable) or other interfaces
# e.g., LiteLLM(messages, max_tokens=…, n=…, temperature=…)
response = self.llm(
response = active_llm(
messages,
max_tokens=max_tokens,
n=num_responses,
Expand Down Expand Up @@ -165,6 +208,35 @@ def generate_candidates(

generation_technique = generation_technique.lower()

if self.llm_profiles is not None and len(self.llm_profiles) > 0 and generation_technique == "multi_llm":
llms = self._get_llms_for_generation(num_responses)

# Prepare arguments for parallel execution
arg_dicts = []
for i, llm in enumerate(llms):
profile_name = self.llm_profiles[i % len(self.llm_profiles)] if self.llm_profiles else "default"
modified_system_prompt = f"{system_prompt}\n\n[Using {profile_name} model for diverse perspective]"

arg_dicts.append(dict(
system_prompt=modified_system_prompt,
user_prompt=user_prompt,
verbose=verbose,
max_tokens=max_tokens,
num_responses=1,
temperature=temp_min,
llm=llm # Use specific LLM
))

# Execute in parallel
try:
parallel_results = self._parallel_call_llm(arg_dicts)
candidates.extend(parallel_results)
except Exception as e:
if verbose:
print(f"Error in multi_llm mode: {e} – falling back to temperature variation")
generation_technique = "temperature_variation"
candidates = []

if generation_technique == "self_refinement":
# Generate solutions by refining previous ones
for i in range(num_responses):
Expand All @@ -179,7 +251,7 @@ def generate_candidates(
verbose=verbose,
max_tokens=max_tokens,
num_responses=1,
temperature=0.0,
temperature=temp_min,
)

if response and len(response) > 0:
Expand All @@ -195,15 +267,15 @@ def generate_candidates(
f"CANDIDATE {idx + 1}: <<<\n{cand}\n>>>"
for idx, cand in enumerate(candidates)
)
meta_prompt = f"{system_prompt}\nGiven the following candidate solutions, propose a new alternative optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"
meta_prompt = f"{system_prompt}\nGiven the following prior CANDIDATE solutions, answer with a very different new CANDIDATE optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"

response = self.call_llm(
system_prompt=meta_prompt,
user_prompt=user_prompt,
verbose=verbose,
max_tokens=max_tokens,
num_responses=1,
temperature=0.0,
temperature=temp_min,
)

if response and len(response) > 0:
Expand Down Expand Up @@ -292,7 +364,7 @@ def generate_candidates(
print("Warning: Failed to generate any candidates")

if self.log is not None:
self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique})
self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique, "llm_profiles": self.llm_profiles})
# only build a problem instance if we actually have one
pi = self.problem_instance(summary) if summary is not None else {}
self.summary_log.append({"problem_instance": pi, "summary": summary})
Expand Down
83 changes: 82 additions & 1 deletion opto/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,80 @@ def create(self, **config: Any):
"CustomLLM": CustomLLM,
}

class LLMFactory:
"""Factory for creating LLM instances with predefined profiles.

The code comes with these built-in profiles:

llm_default = LLM(profile="default") # gpt-4o-mini
llm_premium = LLM(profile="premium") # gpt-4
llm_cheap = LLM(profile="cheap") # gpt-4o-mini
llm_fast = LLM(profile="fast") # gpt-3.5-turbo-mini
llm_reasoning = LLM(profile="reasoning") # o1-mini

You can override those built-in profiles:

LLMFactory.register_profile("default", "LiteLLM", model="gpt-4o", temperature=0.5)
LLMFactory.register_profile("premium", "LiteLLM", model="o1-preview", max_tokens=8000)
LLMFactory.register_profile("cheap", "LiteLLM", model="gpt-3.5-turbo", temperature=0.9)
LLMFactory.register_profile("fast", "LiteLLM", model="gpt-3.5-turbo", max_tokens=500)
LLMFactory.register_profile("reasoning", "LiteLLM", model="o1-preview")

An Example of using Different Backends

# Register custom profiles for different use cases
LLMFactory.register_profile("advanced_reasoning", "LiteLLM", model="o1-preview", max_tokens=4000)
LLMFactory.register_profile("claude_sonnet", "LiteLLM", model="claude-3-5-sonnet-latest", temperature=0.3)
LLMFactory.register_profile("custom_server", "CustomLLM", model="llama-3.1-8b")

# Use in different contexts
reasoning_llm = LLM(profile="advanced_reasoning") # For complex reasoning
claude_llm = LLM(profile="claude_sonnet") # For Claude responses
local_llm = LLM(profile="custom_server") # For local deployment

# Single LLM optimizer with custom profile
optimizer1 = OptoPrime(parameters, llm=LLM(profile="advanced_reasoning"))

# Multi-LLM optimizer with multiple profiles
optimizer2 = OptoPrimeMulti(parameters, llm_profiles=["cheap", "premium", "claude_sonnet"], generation_technique="multi_llm")
"""

# Default profiles for different use cases
_profiles = {
'default': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
'premium': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4'}},
'cheap': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
'fast': {'backend': 'LiteLLM', 'params': {'model': 'gpt-3.5-turbo-mini'}},
'reasoning': {'backend': 'LiteLLM', 'params': {'model': 'o1-mini'}},
}

@classmethod
def get_llm(cls, profile: str = 'default') -> AbstractModel:
"""Get an LLM instance for the specified profile."""
if profile not in cls._profiles:
raise ValueError(f"Unknown profile '{profile}'. Available profiles: {list(cls._profiles.keys())}")

config = cls._profiles[profile]
backend_cls = _LLM_REGISTRY[config['backend']]
return backend_cls(**config['params'])

@classmethod
def register_profile(cls, name: str, backend: str, **params):
"""Register a new LLM profile."""
cls._profiles[name] = {'backend': backend, 'params': params}

@classmethod
def list_profiles(cls):
"""List all available profiles."""
return list(cls._profiles.keys())

@classmethod
def get_profile_info(cls, profile: str = None):
"""Get information about a profile or all profiles."""
if profile:
return cls._profiles.get(profile)
return cls._profiles

class LLM:
"""
A unified entry point for all supported LLM backends.
Expand All @@ -248,8 +322,15 @@ class LLM:
llm = LLM()
# or override explicitly
llm = LLM(backend="AutoGen", config_list=my_configs)
# or use predefined profiles
llm = LLM(profile="premium") # Use premium model
llm = LLM(profile="cheap") # Use cheaper model
llm = LLM(profile="reasoning") # Use reasoning/thinking model
"""
def __new__(cls, *args, backend: str = None, **kwargs):
def __new__(cls, *args, profile: str = None, backend: str = None, **kwargs):
# New: if profile is specified, use LLMFactory
if profile:
return LLMFactory.get_llm(profile)
# Decide which backend to use
name = backend or os.getenv("TRACE_DEFAULT_LLM_BACKEND", "LiteLLM")
try:
Expand Down
Loading