diff --git a/opto/optimizers/optoprimemulti.py b/opto/optimizers/optoprimemulti.py index 6134824f..be7cfa30 100644 --- a/opto/optimizers/optoprimemulti.py +++ b/opto/optimizers/optoprimemulti.py @@ -2,10 +2,9 @@ import json from typing import List, Dict - - from opto.trace.propagators import GraphPropagator from opto.optimizers.optoprime import OptoPrime +from opto.utils.llm import LLMFactory from concurrent.futures import ThreadPoolExecutor, as_completed @@ -19,6 +18,8 @@ def __init__( generation_technique: str = "temperature_variation", selection_technique: str = "best_of_n", experts_list: Optional[List[str]] = None, + llm_profiles: Optional[List[str]] = None, # List of LLM profiles to use + llm_weights: Optional[List[float]] = None, # Weights for each LLM (for weighted selection) **kwargs, ): super().__init__(*args, **kwargs) @@ -31,6 +32,44 @@ def __init__( self.selection_technique = selection_technique self.experts_list = experts_list + # NEW: Multiple LLM support + self.llm_profiles = llm_profiles + self.llm_weights = llm_weights or [1.0] * len(llm_profiles) if llm_profiles else None + self._llm_instances = {} # Cache for LLM instances + + def _get_llm_for_profile(self, profile: str = None): + """Get LLM instance for a profile, with caching.""" + if profile is None: + return self.llm # Use default LLM + + if profile not in self._llm_instances: + try: + from opto.utils.llm import LLMFactory + self._llm_instances[profile] = LLMFactory.get_llm(profile) + except Exception as e: + # Fallback to default LLM if profile creation fails + import warnings + warnings.warn(f"Failed to create LLM for profile '{profile}': {e}. Using default LLM.") + return self.llm + + return self._llm_instances[profile] + + def _get_llms_for_generation(self, num_responses: int): + """Get list of LLMs to use for generation.""" + if self.llm_profiles is None or len(self.llm_profiles) == 0: + # Fallback to single LLM (existing behavior) + return [self.llm] * num_responses + + # Distribute responses across multiple LLMs + llms = [] + for i in range(num_responses): + profile_idx = i % len(self.llm_profiles) + profile = self.llm_profiles[profile_idx] + llm = self._get_llm_for_profile(profile) + llms.append(llm) + + return llms + def call_llm( self, system_prompt: str, @@ -39,20 +78,24 @@ def call_llm( max_tokens: int = 4096, num_responses: int = 1, temperature: float = 0.0, + llm = None, # NEW: Optional specific LLM to use ) -> List[str]: - """Call the LLM with a prompt and return multiple responses.""" + """Given a prompt, returns multiple candidate answers.""" # if verbose not in (False, "output"): # print("Prompt\n", system_prompt + user_prompt) + # Use provided LLM or fall back to default + active_llm = llm or self.llm + messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] try: - if hasattr(self.llm, "create"): + if hasattr(active_llm, "create"): # Standard OpenAI/LangChain style - response = self.llm.create( + response = active_llm.create( messages=messages, response_format={"type": "json_object"}, max_tokens=max_tokens, @@ -62,7 +105,7 @@ def call_llm( else: # Fallback for LiteLLM (callable) or other interfaces # e.g., LiteLLM(messages, max_tokens=…, n=…, temperature=…) - response = self.llm( + response = active_llm( messages, max_tokens=max_tokens, n=num_responses, @@ -165,6 +208,35 @@ def generate_candidates( generation_technique = generation_technique.lower() + if self.llm_profiles is not None and len(self.llm_profiles) > 0 and generation_technique == "multi_llm": + llms = self._get_llms_for_generation(num_responses) + + # Prepare arguments for parallel execution + arg_dicts = [] + for i, llm in enumerate(llms): + profile_name = self.llm_profiles[i % len(self.llm_profiles)] if self.llm_profiles else "default" + modified_system_prompt = f"{system_prompt}\n\n[Using {profile_name} model for diverse perspective]" + + arg_dicts.append(dict( + system_prompt=modified_system_prompt, + user_prompt=user_prompt, + verbose=verbose, + max_tokens=max_tokens, + num_responses=1, + temperature=temp_min, + llm=llm # Use specific LLM + )) + + # Execute in parallel + try: + parallel_results = self._parallel_call_llm(arg_dicts) + candidates.extend(parallel_results) + except Exception as e: + if verbose: + print(f"Error in multi_llm mode: {e} – falling back to temperature variation") + generation_technique = "temperature_variation" + candidates = [] + if generation_technique == "self_refinement": # Generate solutions by refining previous ones for i in range(num_responses): @@ -179,7 +251,7 @@ def generate_candidates( verbose=verbose, max_tokens=max_tokens, num_responses=1, - temperature=0.0, + temperature=temp_min, ) if response and len(response) > 0: @@ -195,7 +267,7 @@ def generate_candidates( f"CANDIDATE {idx + 1}: <<<\n{cand}\n>>>" for idx, cand in enumerate(candidates) ) - meta_prompt = f"{system_prompt}\nGiven the following candidate solutions, propose a new alternative optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n" + meta_prompt = f"{system_prompt}\nGiven the following prior CANDIDATE solutions, answer with a very different new CANDIDATE optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n" response = self.call_llm( system_prompt=meta_prompt, @@ -203,7 +275,7 @@ def generate_candidates( verbose=verbose, max_tokens=max_tokens, num_responses=1, - temperature=0.0, + temperature=temp_min, ) if response and len(response) > 0: @@ -292,7 +364,7 @@ def generate_candidates( print("Warning: Failed to generate any candidates") if self.log is not None: - self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique}) + self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique, "llm_profiles": self.llm_profiles}) # only build a problem instance if we actually have one pi = self.problem_instance(summary) if summary is not None else {} self.summary_log.append({"problem_instance": pi, "summary": summary}) diff --git a/opto/utils/llm.py b/opto/utils/llm.py index a84e4865..9f419c26 100644 --- a/opto/utils/llm.py +++ b/opto/utils/llm.py @@ -239,6 +239,80 @@ def create(self, **config: Any): "CustomLLM": CustomLLM, } +class LLMFactory: + """Factory for creating LLM instances with predefined profiles. + + The code comes with these built-in profiles: + + llm_default = LLM(profile="default") # gpt-4o-mini + llm_premium = LLM(profile="premium") # gpt-4 + llm_cheap = LLM(profile="cheap") # gpt-4o-mini + llm_fast = LLM(profile="fast") # gpt-3.5-turbo-mini + llm_reasoning = LLM(profile="reasoning") # o1-mini + + You can override those built-in profiles: + + LLMFactory.register_profile("default", "LiteLLM", model="gpt-4o", temperature=0.5) + LLMFactory.register_profile("premium", "LiteLLM", model="o1-preview", max_tokens=8000) + LLMFactory.register_profile("cheap", "LiteLLM", model="gpt-3.5-turbo", temperature=0.9) + LLMFactory.register_profile("fast", "LiteLLM", model="gpt-3.5-turbo", max_tokens=500) + LLMFactory.register_profile("reasoning", "LiteLLM", model="o1-preview") + + An Example of using Different Backends + + # Register custom profiles for different use cases + LLMFactory.register_profile("advanced_reasoning", "LiteLLM", model="o1-preview", max_tokens=4000) + LLMFactory.register_profile("claude_sonnet", "LiteLLM", model="claude-3-5-sonnet-latest", temperature=0.3) + LLMFactory.register_profile("custom_server", "CustomLLM", model="llama-3.1-8b") + + # Use in different contexts + reasoning_llm = LLM(profile="advanced_reasoning") # For complex reasoning + claude_llm = LLM(profile="claude_sonnet") # For Claude responses + local_llm = LLM(profile="custom_server") # For local deployment + + # Single LLM optimizer with custom profile + optimizer1 = OptoPrime(parameters, llm=LLM(profile="advanced_reasoning")) + + # Multi-LLM optimizer with multiple profiles + optimizer2 = OptoPrimeMulti(parameters, llm_profiles=["cheap", "premium", "claude_sonnet"], generation_technique="multi_llm") + """ + + # Default profiles for different use cases + _profiles = { + 'default': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}}, + 'premium': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4'}}, + 'cheap': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}}, + 'fast': {'backend': 'LiteLLM', 'params': {'model': 'gpt-3.5-turbo-mini'}}, + 'reasoning': {'backend': 'LiteLLM', 'params': {'model': 'o1-mini'}}, + } + + @classmethod + def get_llm(cls, profile: str = 'default') -> AbstractModel: + """Get an LLM instance for the specified profile.""" + if profile not in cls._profiles: + raise ValueError(f"Unknown profile '{profile}'. Available profiles: {list(cls._profiles.keys())}") + + config = cls._profiles[profile] + backend_cls = _LLM_REGISTRY[config['backend']] + return backend_cls(**config['params']) + + @classmethod + def register_profile(cls, name: str, backend: str, **params): + """Register a new LLM profile.""" + cls._profiles[name] = {'backend': backend, 'params': params} + + @classmethod + def list_profiles(cls): + """List all available profiles.""" + return list(cls._profiles.keys()) + + @classmethod + def get_profile_info(cls, profile: str = None): + """Get information about a profile or all profiles.""" + if profile: + return cls._profiles.get(profile) + return cls._profiles + class LLM: """ A unified entry point for all supported LLM backends. @@ -248,8 +322,15 @@ class LLM: llm = LLM() # or override explicitly llm = LLM(backend="AutoGen", config_list=my_configs) + # or use predefined profiles + llm = LLM(profile="premium") # Use premium model + llm = LLM(profile="cheap") # Use cheaper model + llm = LLM(profile="reasoning") # Use reasoning/thinking model """ - def __new__(cls, *args, backend: str = None, **kwargs): + def __new__(cls, *args, profile: str = None, backend: str = None, **kwargs): + # New: if profile is specified, use LLMFactory + if profile: + return LLMFactory.get_llm(profile) # Decide which backend to use name = backend or os.getenv("TRACE_DEFAULT_LLM_BACKEND", "LiteLLM") try: diff --git a/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py b/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py index c9acd708..978ae302 100644 --- a/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py +++ b/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py @@ -1,6 +1,7 @@ import json import pytest from opto.optimizers.optoprimemulti import OptoPrimeMulti +from opto.utils.llm import LLMFactory from opto.trace.propagators import GraphPropagator from opto.trace.nodes import ParameterNode from opto.trace import bundle, node, GRAPH @@ -25,6 +26,18 @@ def __call__(self, messages, max_tokens=None, response_format=None): # fallback single-call (not used in multi) return self.create(messages, response_format, max_tokens, 1, 0) +class MockLLMFactory: + """Mock LLMFactory for testing multi-LLM functionality""" + @staticmethod + def get_llm(profile): + # Return different dummy LLMs for different profiles + profile_responses = { + 'cheap': [f"cheap_{profile}_response"], + 'premium': [f"premium_{profile}_response"], + 'default': [f"default_{profile}_response"], + } + return DummyLLM(responses=[profile_responses.get(profile, ["default_response"])]) + @pytest.fixture def parameter_node(): # Minimal dummy ParameterNode @@ -40,6 +53,16 @@ def default_optimizer(parameter_node): assert isinstance(opt.propagator, GraphPropagator) return opt +@pytest.fixture +def multi_llm_optimizer(parameter_node): + """Optimizer configured for multi-LLM testing""" + dummy = DummyLLM(responses=[["{\\\"suggestion\\\": {}}"]]) + opt = OptoPrimeMulti([parameter_node], + llm_profiles=['cheap', 'premium', 'default'], + generation_technique='multi_llm') + opt.llm = dummy + return opt + def test_call_llm_returns_list(default_optimizer): opt = default_optimizer # Prepare dummy response @@ -48,11 +71,25 @@ def test_call_llm_returns_list(default_optimizer): assert isinstance(results, list) assert results == ["resp1", "resp2"] +def test_call_llm_with_specific_llm(default_optimizer): + """Test that call_llm accepts and uses a specific LLM instance""" + opt = default_optimizer + specific_llm = DummyLLM(responses=[["specific_response"]]) + + # Call with specific LLM + results = opt.call_llm("sys", "usr", llm=specific_llm, num_responses=1) + assert results == ["specific_response"] + + # Verify specific_llm was called, not the default + assert len(specific_llm.call_args) == 1 + assert len(opt.llm.call_args) == 0 # Default LLM should not be called + @pytest.mark.parametrize("gen_tech", [ "temperature_variation", "self_refinement", "iterative_alternatives", - "multi_experts"] + "multi_experts", + "multi_llm"] ) def test_generate_candidates_length(default_optimizer, gen_tech, capsys): opt = default_optimizer @@ -65,6 +102,55 @@ def test_generate_candidates_length(default_optimizer, gen_tech, capsys): assert isinstance(cands, list) assert len(cands) == 3 +def test_multi_llm_initialization(): + """Test OptoPrimeMulti initialization with multi-LLM parameters""" + param = ParameterNode(name='test', value=1) + profiles = ['cheap', 'premium', 'default'] + weights = [0.5, 1.5, 1.0] + + opt = OptoPrimeMulti([param], + llm_profiles=profiles, + llm_weights=weights, + generation_technique='multi_llm') + + assert opt.llm_profiles == profiles + assert opt.llm_weights == weights + assert opt._llm_instances == {} # Should start empty + +def test_get_llm_for_profile(multi_llm_optimizer, monkeypatch): + """Test LLM profile retrieval and caching""" + opt = multi_llm_optimizer + + # Mock LLMFactory + monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory) + + # First call should create and cache + llm1 = opt._get_llm_for_profile('cheap') + assert 'cheap' in opt._llm_instances + + # Second call should return cached instance + llm2 = opt._get_llm_for_profile('cheap') + assert llm1 is llm2 + + # None profile should return default LLM + default_llm = opt._get_llm_for_profile(None) + assert default_llm is opt.llm + +def test_get_llms_for_generation(multi_llm_optimizer, monkeypatch): + """Test LLM distribution for generation""" + opt = multi_llm_optimizer + # Patch the import location where it's actually used + monkeypatch.setattr('opto.optimizers.optoprimemulti.LLMFactory', MockLLMFactory) + + llms = opt._get_llms_for_generation(5) + assert len(llms) == 5 + + # Should cycle through profiles: cheap, premium, default, cheap, premium + expected_profiles = ['cheap', 'premium', 'default', 'cheap', 'premium'] + for i, llm in enumerate(llms): + expected_profile = expected_profiles[i] + assert expected_profile in opt._llm_instances + @pytest.mark.parametrize("sel_tech,method_name", [ ("moa", "_select_moa"), ("majority", "_select_majority"), @@ -85,6 +171,25 @@ def test_select_candidate_calls_correct_method(default_optimizer, sel_tech, meth result = opt.select_candidate(cands, selection_technique=sel_tech) assert result == "c" +def test_multi_llm_generation_fallback(multi_llm_optimizer, monkeypatch): + """Test that multi_llm generation falls back gracefully on error""" + opt = multi_llm_optimizer + + # Mock LLMFactory to raise exception + def failing_get_llm(profile): + raise Exception("LLM creation failed") + + monkeypatch.setattr(MockLLMFactory, 'get_llm', failing_get_llm) + monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory) + + # Should fall back to temperature_variation + responses = [["fallback1"], ["fallback2"], ["fallback3"]] + opt.llm = DummyLLM(responses=responses) + + cands = opt.generate_candidates(None, "sys", "usr", num_responses=3, + generation_technique="multi_llm", verbose=True) + assert len(cands) == 3 + def test_integration_step_updates(default_optimizer, parameter_node): opt = default_optimizer # Dummy parameter_node initial value @@ -105,6 +210,83 @@ def test_default_model_name(default_optimizer): assert 'gpt-4.1-nano' in model_name +def test_multi_llm_step_integration(multi_llm_optimizer, parameter_node, monkeypatch): + """Test full integration of multi-LLM optimization step""" + opt = multi_llm_optimizer + monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory) + + parameter_node._data = 0 + + # Mock multiple LLM responses for multi_llm generation + suggestion = {"x": 42} + response_str = json.dumps({"reasoning": "ok", "answer": "", "suggestion": suggestion}) + + # Each profile should return a response + cheap_llm = DummyLLM(responses=[[response_str]]) + premium_llm = DummyLLM(responses=[[response_str]]) + default_llm = DummyLLM(responses=[[response_str]]) + + opt._llm_instances = { + 'cheap': cheap_llm, + 'premium': premium_llm, + 'default': default_llm + } + + # Override _parallel_call_llm to return mock responses + def mock_parallel_call(arg_dicts): + return [response_str] * len(arg_dicts) + + opt._parallel_call_llm = mock_parallel_call + + # Run optimization step + update = opt._step(verbose=False, generation_technique='multi_llm') + assert isinstance(update, dict) + +def test_llm_weights_handling(): + """Test that LLM weights are properly handled""" + param = ParameterNode(name='test', value=1) + + # Test with explicit weights + profiles = ['cheap', 'premium'] + weights = [0.3, 0.7] + opt1 = OptoPrimeMulti([param], llm_profiles=profiles, llm_weights=weights) + assert opt1.llm_weights == weights + + # Test with automatic weights (should default to 1.0 for each profile) + opt2 = OptoPrimeMulti([param], llm_profiles=profiles) + assert opt2.llm_weights == [1.0, 1.0] + + # Test without profiles (should be None) + opt3 = OptoPrimeMulti([param]) + assert opt3.llm_weights is None + +def test_multi_llm_logging(multi_llm_optimizer, monkeypatch): + """Test that multi-LLM usage is properly logged""" + opt = multi_llm_optimizer + opt.log = [] # Enable logging + + # Manually set LLM instances to avoid import issues + opt._llm_instances = { + 'cheap': DummyLLM(responses=[["response1"]]), + 'premium': DummyLLM(responses=[["response2"]]), + 'default': DummyLLM(responses=[["response3"]]) + } + + # Override _parallel_call_llm to return mock responses + def mock_parallel_call(arg_dicts): + return ["response1", "response2", "response3"] + + opt._parallel_call_llm = mock_parallel_call + + cands = opt.generate_candidates(None, "sys", "usr", num_responses=3, + generation_technique="multi_llm") + + # Check that logging includes llm_profiles + assert len(opt.log) > 0 + log_entry = opt.log[-1] + assert 'llm_profiles' in log_entry + assert log_entry['llm_profiles'] == ['cheap', 'premium', 'default'] + def user_code(output): if output < 0: return "Success." @@ -115,7 +297,8 @@ def user_code(output): "temperature_variation", "self_refinement", "iterative_alternatives", - "multi_experts" + "multi_experts", + "multi_llm" ]) @pytest.mark.parametrize("sel_tech", [ "moa", @@ -150,3 +333,25 @@ def my_fun(x): print(f"Function updated: old value: {str(old_func_value)}, new value: {str(new_func_value)}") +def test_backwards_compatibility(): + """Test that existing OptoPrimeMulti usage continues to work without changes""" + param = ParameterNode(name='test', value=1) + + # Old-style initialization should work exactly as before + opt = OptoPrimeMulti([param], + num_responses=3, + generation_technique="temperature_variation", + selection_technique="best_of_n") + + # New attributes should have sensible defaults + assert opt.llm_profiles is None + assert opt.llm_weights is None + assert opt._llm_instances == {} + + # Should fall back to single LLM behavior + llms = opt._get_llms_for_generation(3) + assert len(llms) == 3 + assert all(llm is opt.llm for llm in llms) + + # Profile retrieval should return default LLM for None + assert opt._get_llm_for_profile(None) is opt.llm \ No newline at end of file