diff --git a/opto/optimizers/optoprimemulti.py b/opto/optimizers/optoprimemulti.py
index 6134824f..be7cfa30 100644
--- a/opto/optimizers/optoprimemulti.py
+++ b/opto/optimizers/optoprimemulti.py
@@ -2,10 +2,9 @@
 import json
 from typing import List, Dict
 
-
-
 from opto.trace.propagators import GraphPropagator
 from opto.optimizers.optoprime import OptoPrime
+from opto.utils.llm import LLMFactory
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
@@ -19,6 +18,8 @@ def __init__(
         generation_technique: str = "temperature_variation",
         selection_technique: str = "best_of_n",
         experts_list: Optional[List[str]] = None,
+        llm_profiles: Optional[List[str]] = None,  # List of LLM profiles to use
+        llm_weights: Optional[List[float]] = None,  # Weights for each LLM (for weighted selection)
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
@@ -31,6 +32,44 @@ def __init__(
         self.selection_technique = selection_technique
         self.experts_list = experts_list
 
+        # NEW: Multiple LLM support
+        self.llm_profiles = llm_profiles
+        self.llm_weights = llm_weights or [1.0] * len(llm_profiles) if llm_profiles else None
+        self._llm_instances = {}  # Cache for LLM instances
+    
+    def _get_llm_for_profile(self, profile: str = None):
+        """Get LLM instance for a profile, with caching."""
+        if profile is None:
+            return self.llm  # Use default LLM
+            
+        if profile not in self._llm_instances:
+            try:
+                from opto.utils.llm import LLMFactory
+                self._llm_instances[profile] = LLMFactory.get_llm(profile)
+            except Exception as e:
+                # Fallback to default LLM if profile creation fails
+                import warnings
+                warnings.warn(f"Failed to create LLM for profile '{profile}': {e}. Using default LLM.")
+                return self.llm
+
+        return self._llm_instances[profile]
+    
+    def _get_llms_for_generation(self, num_responses: int):
+        """Get list of LLMs to use for generation."""
+        if self.llm_profiles is None or len(self.llm_profiles) == 0:
+            # Fallback to single LLM (existing behavior)
+            return [self.llm] * num_responses
+        
+        # Distribute responses across multiple LLMs
+        llms = []
+        for i in range(num_responses):
+            profile_idx = i % len(self.llm_profiles)
+            profile = self.llm_profiles[profile_idx]
+            llm = self._get_llm_for_profile(profile)
+            llms.append(llm)
+        
+        return llms
+        
     def call_llm(
         self,
         system_prompt: str,
@@ -39,20 +78,24 @@ def call_llm(
         max_tokens: int = 4096,
         num_responses: int = 1,
         temperature: float = 0.0,
+        llm = None,  # NEW: Optional specific LLM to use
     ) -> List[str]:
-        """Call the LLM with a prompt and return multiple responses."""
+        """Given a prompt, returns multiple candidate answers."""
         # if verbose not in (False, "output"):
         #     print("Prompt\n", system_prompt + user_prompt)
 
+        # Use provided LLM or fall back to default
+        active_llm = llm or self.llm
+
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt},
         ]
 
         try:
-            if hasattr(self.llm, "create"):
+            if hasattr(active_llm, "create"):
                 # Standard OpenAI/LangChain style
-                response = self.llm.create(
+                response = active_llm.create(
                     messages=messages,
                     response_format={"type": "json_object"},
                     max_tokens=max_tokens,
@@ -62,7 +105,7 @@ def call_llm(
             else:
                 # Fallback for LiteLLM (callable) or other interfaces
                 # e.g., LiteLLM(messages, max_tokens=…, n=…, temperature=…)
-                response = self.llm(
+                response = active_llm(
                     messages,
                     max_tokens=max_tokens,
                     n=num_responses,
@@ -165,6 +208,35 @@ def generate_candidates(
 
         generation_technique = generation_technique.lower()
 
+        if self.llm_profiles is not None and len(self.llm_profiles) > 0 and generation_technique == "multi_llm":
+            llms = self._get_llms_for_generation(num_responses)
+            
+            # Prepare arguments for parallel execution
+            arg_dicts = []
+            for i, llm in enumerate(llms):
+                profile_name = self.llm_profiles[i % len(self.llm_profiles)] if self.llm_profiles else "default"
+                modified_system_prompt = f"{system_prompt}\n\n[Using {profile_name} model for diverse perspective]"
+                
+                arg_dicts.append(dict(
+                    system_prompt=modified_system_prompt,
+                    user_prompt=user_prompt,
+                    verbose=verbose,
+                    max_tokens=max_tokens,
+                    num_responses=1,
+                    temperature=temp_min,
+                    llm=llm  # Use specific LLM
+                ))
+            
+            # Execute in parallel
+            try:
+                parallel_results = self._parallel_call_llm(arg_dicts)
+                candidates.extend(parallel_results)
+            except Exception as e:
+                if verbose:
+                    print(f"Error in multi_llm mode: {e} – falling back to temperature variation")
+                generation_technique = "temperature_variation"
+                candidates = []
+
         if generation_technique == "self_refinement":
             # Generate solutions by refining previous ones
             for i in range(num_responses):
@@ -179,7 +251,7 @@ def generate_candidates(
                     verbose=verbose,
                     max_tokens=max_tokens,
                     num_responses=1,
-                    temperature=0.0,
+                    temperature=temp_min,
                 )
                 
                 if response and len(response) > 0:
@@ -195,7 +267,7 @@ def generate_candidates(
                         f"CANDIDATE {idx + 1}: <<<\n{cand}\n>>>"
                         for idx, cand in enumerate(candidates)
                     )
-                    meta_prompt = f"{system_prompt}\nGiven the following candidate solutions, propose a new alternative optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"
+                    meta_prompt = f"{system_prompt}\nGiven the following prior CANDIDATE solutions, answer with a very different new CANDIDATE optimal solution to user's prompt using their same JSON format (suggest only trainable codes/variables to modify, never inputs):\n{previous_solutions}\n"
                 
                 response = self.call_llm(
                     system_prompt=meta_prompt,
@@ -203,7 +275,7 @@ def generate_candidates(
                     verbose=verbose,
                     max_tokens=max_tokens,
                     num_responses=1,
-                    temperature=0.0,
+                    temperature=temp_min,
                 )
                 
                 if response and len(response) > 0:
@@ -292,7 +364,7 @@ def generate_candidates(
             print("Warning: Failed to generate any candidates")
             
         if self.log is not None:
-            self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique})
+            self.log.append({"system_prompt": system_prompt, "user_prompt": user_prompt, "response": candidates, "generation_technique": generation_technique, "llm_profiles": self.llm_profiles})
             # only build a problem instance if we actually have one
             pi = self.problem_instance(summary) if summary is not None else {}
             self.summary_log.append({"problem_instance": pi, "summary": summary})
diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index a84e4865..9f419c26 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -239,6 +239,80 @@ def create(self, **config: Any):
     "CustomLLM": CustomLLM,
 }
 
+class LLMFactory:
+    """Factory for creating LLM instances with predefined profiles.
+    
+    The code comes with these built-in profiles:
+
+        llm_default = LLM(profile="default")     # gpt-4o-mini
+        llm_premium = LLM(profile="premium")     # gpt-4  
+        llm_cheap = LLM(profile="cheap")         # gpt-4o-mini
+        llm_fast = LLM(profile="fast")           # gpt-3.5-turbo-mini
+        llm_reasoning = LLM(profile="reasoning") # o1-mini
+    
+    You can override those built-in profiles:
+
+        LLMFactory.register_profile("default", "LiteLLM", model="gpt-4o", temperature=0.5)
+        LLMFactory.register_profile("premium", "LiteLLM", model="o1-preview", max_tokens=8000)
+        LLMFactory.register_profile("cheap", "LiteLLM", model="gpt-3.5-turbo", temperature=0.9)
+        LLMFactory.register_profile("fast", "LiteLLM", model="gpt-3.5-turbo", max_tokens=500)
+        LLMFactory.register_profile("reasoning", "LiteLLM", model="o1-preview")
+        
+    An Example of using Different Backends
+
+        # Register custom profiles for different use cases
+        LLMFactory.register_profile("advanced_reasoning", "LiteLLM", model="o1-preview", max_tokens=4000)
+        LLMFactory.register_profile("claude_sonnet", "LiteLLM", model="claude-3-5-sonnet-latest", temperature=0.3)
+        LLMFactory.register_profile("custom_server", "CustomLLM", model="llama-3.1-8b")
+
+        # Use in different contexts
+        reasoning_llm = LLM(profile="advanced_reasoning")  # For complex reasoning
+        claude_llm = LLM(profile="claude_sonnet")          # For Claude responses
+        local_llm = LLM(profile="custom_server")           # For local deployment
+
+        # Single LLM optimizer with custom profile
+        optimizer1 = OptoPrime(parameters, llm=LLM(profile="advanced_reasoning"))
+
+        # Multi-LLM optimizer with multiple profiles
+        optimizer2 = OptoPrimeMulti(parameters, llm_profiles=["cheap", "premium", "claude_sonnet"], generation_technique="multi_llm")
+    """
+    
+    # Default profiles for different use cases
+    _profiles = {
+        'default': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
+        'premium': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4'}},
+        'cheap': {'backend': 'LiteLLM', 'params': {'model': 'gpt-4o-mini'}},
+        'fast': {'backend': 'LiteLLM', 'params': {'model': 'gpt-3.5-turbo-mini'}},
+        'reasoning': {'backend': 'LiteLLM', 'params': {'model': 'o1-mini'}},
+    }
+    
+    @classmethod
+    def get_llm(cls, profile: str = 'default') -> AbstractModel:
+        """Get an LLM instance for the specified profile."""
+        if profile not in cls._profiles:
+            raise ValueError(f"Unknown profile '{profile}'. Available profiles: {list(cls._profiles.keys())}")
+        
+        config = cls._profiles[profile]
+        backend_cls = _LLM_REGISTRY[config['backend']]
+        return backend_cls(**config['params'])
+    
+    @classmethod
+    def register_profile(cls, name: str, backend: str, **params):
+        """Register a new LLM profile."""
+        cls._profiles[name] = {'backend': backend, 'params': params}
+    
+    @classmethod
+    def list_profiles(cls):
+        """List all available profiles."""
+        return list(cls._profiles.keys())
+    
+    @classmethod
+    def get_profile_info(cls, profile: str = None):
+        """Get information about a profile or all profiles."""
+        if profile:
+            return cls._profiles.get(profile)
+        return cls._profiles
+
 class LLM:
     """
     A unified entry point for all supported LLM backends.
@@ -248,8 +322,15 @@ class LLM:
       llm = LLM()
       # or override explicitly
       llm = LLM(backend="AutoGen", config_list=my_configs)
+      # or use predefined profiles
+      llm = LLM(profile="premium")  # Use premium model
+      llm = LLM(profile="cheap")    # Use cheaper model
+      llm = LLM(profile="reasoning")    # Use reasoning/thinking model
     """
-    def __new__(cls, *args, backend: str = None, **kwargs):
+    def __new__(cls, *args, profile: str = None, backend: str = None, **kwargs):
+        # New: if profile is specified, use LLMFactory
+        if profile:
+            return LLMFactory.get_llm(profile)
         # Decide which backend to use
         name = backend or os.getenv("TRACE_DEFAULT_LLM_BACKEND", "LiteLLM")
         try:
diff --git a/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py b/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py
index c9acd708..978ae302 100644
--- a/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py
+++ b/tests/llm_optimizers_tests/test_optimizer_optoprimemulti.py
@@ -1,6 +1,7 @@
 import json
 import pytest
 from opto.optimizers.optoprimemulti import OptoPrimeMulti
+from opto.utils.llm import LLMFactory
 from opto.trace.propagators import GraphPropagator
 from opto.trace.nodes import ParameterNode
 from opto.trace import bundle, node, GRAPH
@@ -25,6 +26,18 @@ def __call__(self, messages, max_tokens=None, response_format=None):
         # fallback single-call (not used in multi)
         return self.create(messages, response_format, max_tokens, 1, 0)
 
+class MockLLMFactory:
+    """Mock LLMFactory for testing multi-LLM functionality"""
+    @staticmethod
+    def get_llm(profile):
+        # Return different dummy LLMs for different profiles
+        profile_responses = {
+            'cheap': [f"cheap_{profile}_response"],
+            'premium': [f"premium_{profile}_response"],
+            'default': [f"default_{profile}_response"],
+        }
+        return DummyLLM(responses=[profile_responses.get(profile, ["default_response"])])
+
 @pytest.fixture
 def parameter_node():
     # Minimal dummy ParameterNode
@@ -40,6 +53,16 @@ def default_optimizer(parameter_node):
     assert isinstance(opt.propagator, GraphPropagator)
     return opt
 
+@pytest.fixture
+def multi_llm_optimizer(parameter_node):
+    """Optimizer configured for multi-LLM testing"""
+    dummy = DummyLLM(responses=[["{\\\"suggestion\\\": {}}"]])
+    opt = OptoPrimeMulti([parameter_node], 
+                        llm_profiles=['cheap', 'premium', 'default'],
+                        generation_technique='multi_llm')
+    opt.llm = dummy
+    return opt
+
 def test_call_llm_returns_list(default_optimizer):
     opt = default_optimizer
     # Prepare dummy response
@@ -48,11 +71,25 @@ def test_call_llm_returns_list(default_optimizer):
     assert isinstance(results, list)
     assert results == ["resp1", "resp2"]
 
+def test_call_llm_with_specific_llm(default_optimizer):
+    """Test that call_llm accepts and uses a specific LLM instance"""
+    opt = default_optimizer
+    specific_llm = DummyLLM(responses=[["specific_response"]])
+    
+    # Call with specific LLM
+    results = opt.call_llm("sys", "usr", llm=specific_llm, num_responses=1)
+    assert results == ["specific_response"]
+    
+    # Verify specific_llm was called, not the default
+    assert len(specific_llm.call_args) == 1
+    assert len(opt.llm.call_args) == 0  # Default LLM should not be called
+
 @pytest.mark.parametrize("gen_tech", [
     "temperature_variation", 
     "self_refinement", 
     "iterative_alternatives", 
-    "multi_experts"]
+    "multi_experts",
+    "multi_llm"]
     )
 def test_generate_candidates_length(default_optimizer, gen_tech, capsys):
     opt = default_optimizer
@@ -65,6 +102,55 @@ def test_generate_candidates_length(default_optimizer, gen_tech, capsys):
     assert isinstance(cands, list)
     assert len(cands) == 3
 
+def test_multi_llm_initialization():
+    """Test OptoPrimeMulti initialization with multi-LLM parameters"""
+    param = ParameterNode(name='test', value=1)
+    profiles = ['cheap', 'premium', 'default']
+    weights = [0.5, 1.5, 1.0]
+    
+    opt = OptoPrimeMulti([param], 
+                        llm_profiles=profiles,
+                        llm_weights=weights,
+                        generation_technique='multi_llm')
+    
+    assert opt.llm_profiles == profiles
+    assert opt.llm_weights == weights
+    assert opt._llm_instances == {}  # Should start empty
+
+def test_get_llm_for_profile(multi_llm_optimizer, monkeypatch):
+    """Test LLM profile retrieval and caching"""
+    opt = multi_llm_optimizer
+    
+    # Mock LLMFactory
+    monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory)
+    
+    # First call should create and cache
+    llm1 = opt._get_llm_for_profile('cheap')
+    assert 'cheap' in opt._llm_instances
+    
+    # Second call should return cached instance
+    llm2 = opt._get_llm_for_profile('cheap')
+    assert llm1 is llm2
+    
+    # None profile should return default LLM
+    default_llm = opt._get_llm_for_profile(None)
+    assert default_llm is opt.llm
+
+def test_get_llms_for_generation(multi_llm_optimizer, monkeypatch):
+    """Test LLM distribution for generation"""
+    opt = multi_llm_optimizer
+    # Patch the import location where it's actually used
+    monkeypatch.setattr('opto.optimizers.optoprimemulti.LLMFactory', MockLLMFactory)
+
+    llms = opt._get_llms_for_generation(5)
+    assert len(llms) == 5
+    
+    # Should cycle through profiles: cheap, premium, default, cheap, premium
+    expected_profiles = ['cheap', 'premium', 'default', 'cheap', 'premium']
+    for i, llm in enumerate(llms):
+        expected_profile = expected_profiles[i]
+        assert expected_profile in opt._llm_instances
+
 @pytest.mark.parametrize("sel_tech,method_name", [
     ("moa", "_select_moa"),
     ("majority", "_select_majority"),
@@ -85,6 +171,25 @@ def test_select_candidate_calls_correct_method(default_optimizer, sel_tech, meth
         result = opt.select_candidate(cands, selection_technique=sel_tech)
         assert result == "c"
 
+def test_multi_llm_generation_fallback(multi_llm_optimizer, monkeypatch):
+    """Test that multi_llm generation falls back gracefully on error"""
+    opt = multi_llm_optimizer
+    
+    # Mock LLMFactory to raise exception
+    def failing_get_llm(profile):
+        raise Exception("LLM creation failed")
+    
+    monkeypatch.setattr(MockLLMFactory, 'get_llm', failing_get_llm)
+    monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory)
+    
+    # Should fall back to temperature_variation
+    responses = [["fallback1"], ["fallback2"], ["fallback3"]]
+    opt.llm = DummyLLM(responses=responses)
+    
+    cands = opt.generate_candidates(None, "sys", "usr", num_responses=3, 
+                                  generation_technique="multi_llm", verbose=True)
+    assert len(cands) == 3
+
 def test_integration_step_updates(default_optimizer, parameter_node):
     opt = default_optimizer
     # Dummy parameter_node initial value
@@ -105,6 +210,83 @@ def test_default_model_name(default_optimizer):
     assert 'gpt-4.1-nano' in model_name
 
 
+def test_multi_llm_step_integration(multi_llm_optimizer, parameter_node, monkeypatch):
+    """Test full integration of multi-LLM optimization step"""
+    opt = multi_llm_optimizer
+    monkeypatch.setattr('opto.utils.llm.LLMFactory', MockLLMFactory)
+    
+    parameter_node._data = 0
+    
+    # Mock multiple LLM responses for multi_llm generation
+    suggestion = {"x": 42}
+    response_str = json.dumps({"reasoning": "ok", "answer": "", "suggestion": suggestion})
+    
+    # Each profile should return a response
+    cheap_llm = DummyLLM(responses=[[response_str]])
+    premium_llm = DummyLLM(responses=[[response_str]])
+    default_llm = DummyLLM(responses=[[response_str]])
+    
+    opt._llm_instances = {
+        'cheap': cheap_llm,
+        'premium': premium_llm,
+        'default': default_llm
+    }
+    
+    # Override _parallel_call_llm to return mock responses
+    def mock_parallel_call(arg_dicts):
+        return [response_str] * len(arg_dicts)
+    
+    opt._parallel_call_llm = mock_parallel_call
+    
+    # Run optimization step
+    update = opt._step(verbose=False, generation_technique='multi_llm')
+    assert isinstance(update, dict)
+
+def test_llm_weights_handling():
+    """Test that LLM weights are properly handled"""
+    param = ParameterNode(name='test', value=1)
+    
+    # Test with explicit weights
+    profiles = ['cheap', 'premium']
+    weights = [0.3, 0.7]
+    opt1 = OptoPrimeMulti([param], llm_profiles=profiles, llm_weights=weights)
+    assert opt1.llm_weights == weights
+    
+    # Test with automatic weights (should default to 1.0 for each profile)
+    opt2 = OptoPrimeMulti([param], llm_profiles=profiles)
+    assert opt2.llm_weights == [1.0, 1.0]
+    
+    # Test without profiles (should be None)
+    opt3 = OptoPrimeMulti([param])
+    assert opt3.llm_weights is None
+
+def test_multi_llm_logging(multi_llm_optimizer, monkeypatch):
+    """Test that multi-LLM usage is properly logged"""
+    opt = multi_llm_optimizer
+    opt.log = []  # Enable logging
+    
+    # Manually set LLM instances to avoid import issues
+    opt._llm_instances = {
+        'cheap': DummyLLM(responses=[["response1"]]),
+        'premium': DummyLLM(responses=[["response2"]]),
+        'default': DummyLLM(responses=[["response3"]])
+    }
+
+    # Override _parallel_call_llm to return mock responses
+    def mock_parallel_call(arg_dicts):
+        return ["response1", "response2", "response3"]
+    
+    opt._parallel_call_llm = mock_parallel_call
+    
+    cands = opt.generate_candidates(None, "sys", "usr", num_responses=3,
+                                  generation_technique="multi_llm")
+    
+    # Check that logging includes llm_profiles
+    assert len(opt.log) > 0
+    log_entry = opt.log[-1]
+    assert 'llm_profiles' in log_entry
+    assert log_entry['llm_profiles'] == ['cheap', 'premium', 'default']
+ 
 def user_code(output):
     if output < 0:
         return "Success."
@@ -115,7 +297,8 @@ def user_code(output):
     "temperature_variation", 
     "self_refinement", 
     "iterative_alternatives", 
-    "multi_experts"
+    "multi_experts",
+    "multi_llm"
 ])
 @pytest.mark.parametrize("sel_tech", [
     "moa", 
@@ -150,3 +333,25 @@ def my_fun(x):
     print(f"Function updated: old value: {str(old_func_value)}, new value: {str(new_func_value)}")
 
 
+def test_backwards_compatibility():
+    """Test that existing OptoPrimeMulti usage continues to work without changes"""
+    param = ParameterNode(name='test', value=1)
+    
+    # Old-style initialization should work exactly as before
+    opt = OptoPrimeMulti([param], 
+                        num_responses=3,
+                        generation_technique="temperature_variation",
+                        selection_technique="best_of_n")
+    
+    # New attributes should have sensible defaults
+    assert opt.llm_profiles is None
+    assert opt.llm_weights is None
+    assert opt._llm_instances == {}
+    
+    # Should fall back to single LLM behavior
+    llms = opt._get_llms_for_generation(3)
+    assert len(llms) == 3
+    assert all(llm is opt.llm for llm in llms)
+    
+    # Profile retrieval should return default LLM for None
+    assert opt._get_llm_for_profile(None) is opt.llm
\ No newline at end of file