From a78d2f84862b8809091299f20ea6a613abc676ec Mon Sep 17 00:00:00 2001
From: windweller <leo.niecn@gmail.com>
Date: Wed, 5 Feb 2025 12:30:01 -0800
Subject: [PATCH 01/14] add LiteLLM backend

---
 opto/utils/llm.py | 38 ++++++++++++++++++++++++++++++++------
 setup.py          |  1 +
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index a67ccc52..5da75e68 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -3,6 +3,8 @@
 import time
 import json
 import autogen  # We import autogen here to avoid the need of installing autogen
+import litellm
+
 
 
 class AbstractModel:
@@ -53,12 +55,7 @@ def __setstate__(self, state):
 class AutoGenLLM(AbstractModel):
     """This is the main class Trace uses to interact with the model. It is a wrapper around autogen's OpenAIWrapper. For using models not supported by autogen, subclass AutoGenLLM and override the `_factory` and  `create` method. Users can pass instances of this class to optimizers' llm argument."""
 
-    def __init__(
-        self,
-        config_list: List = None,
-        filter_dict: Dict = None,
-        reset_freq: Union[int, None] = None,
-    ) -> None:
+    def __init__(self, config_list: List = None, filter_dict: Dict = None, reset_freq: Union[int, None]  = None) -> None:
         if config_list is None:
             try:
                 config_list = autogen.config_list_from_json("OAI_CONFIG_LIST")
@@ -143,3 +140,32 @@ def auto_construct_oai_config_list_from_env() -> List:
             }
         )
     return config_list
+
+
+class LiteLLM(AbstractModel):
+    """
+    This is an LLM backend supported by LiteLLM library.
+
+    https://docs.litellm.ai/docs/completion/input
+    """
+
+    def __init__(self, model: str = "gpt-4o", reset_freq: Union[int, None] = None,
+                 cache=True) -> None:
+        self.model_name = model
+        self.cache = cache
+        factory = litellm.completion
+        super().__init__(factory, reset_freq)
+
+    @property
+    def model(self):
+        return lambda **kwargs: self.create(**kwargs)
+
+    # This is main API. We use the API of autogen's OpenAIWrapper
+    def create(self, **config: Any) -> litellm.types.utils.ModelResponse:
+        """
+        response = litellm.completion(
+            model=self.model,
+            messages=[{"content": message, "role": "user"}]
+        )
+        """
+        return self._model.completion(model=self.model_name, **config)
diff --git a/setup.py b/setup.py
index 37837b96..86fb669d 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
     "graphviz>=0.20.1",
     "scikit-learn",
     "xgboost",
+    "litellm"
 ]
 
 setuptools.setup(

From de271e850132fd7add571db1c6435ad091a8b734 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 10:30:49 -0800
Subject: [PATCH 02/14] Update litellm backend.

---
 opto/utils/llm.py            | 32 +++++++++++++++++++-------------
 tests/unit_tests/test_llm.py |  3 ++-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index 5da75e68..7cf2a72e 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -26,18 +26,16 @@ def __init__(self, factory: Callable, reset_freq: Union[int, None] = None) -> No
         self.reset_freq = reset_freq
         self._init_time = time.time()
 
+    # Overwrite this `model` property when subclassing.
     @property
     def model(self):
-        # Overwrite this when subclassing
+        """ When self.model is called, text responses should always be available at ['choices'][0].['message']['content'] """
         return self._model
 
     # This is the main API
     def __call__(self, *args, **kwargs) -> Any:
-        """The call function handles refreshing the model if needed."""
-        if (
-            self.reset_freq is not None
-            and time.time() - self._init_time > self.reset_freq
-        ):
+        """ The call function handles refreshing the model if needed. """
+        if self.reset_freq is not None and time.time() - self._init_time > self.reset_freq:
             self._model = self.factory()
             self._init_time = time.time()
         return self.model(*args, **kwargs)
@@ -53,7 +51,7 @@ def __setstate__(self, state):
 
 
 class AutoGenLLM(AbstractModel):
-    """This is the main class Trace uses to interact with the model. It is a wrapper around autogen's OpenAIWrapper. For using models not supported by autogen, subclass AutoGenLLM and override the `_factory` and  `create` method. Users can pass instances of this class to optimizers' llm argument."""
+    """ This is the main class Trace uses to interact with the model. It is a wrapper around autogen's OpenAIWrapper. For using models not supported by autogen, subclass AutoGenLLM and override the `_factory` and  `create` method. Users can pass instances of this class to optimizers' llm argument. """
 
     def __init__(self, config_list: List = None, filter_dict: Dict = None, reset_freq: Union[int, None]  = None) -> None:
         if config_list is None:
@@ -153,19 +151,27 @@ def __init__(self, model: str = "gpt-4o", reset_freq: Union[int, None] = None,
                  cache=True) -> None:
         self.model_name = model
         self.cache = cache
-        factory = litellm.completion
+        factory = lambda : self._factory(self.model_name)  # an LLM instance uses a fixed model
         super().__init__(factory, reset_freq)
 
+    @classmethod
+    def _factory(cls, model_name : str):
+        import os
+        if model_name.startswith('azure/'):  # azure model
+            azure_token_provider_scope = os.environ.get('AZURE_TOKEN_PROVIDER_SCOPE', None)
+            if azure_token_provider_scope is not None:
+                from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+                credential = get_bearer_token_provider(DefaultAzureCredential(), azure_token_provider_scope)
+                return lambda *args, **kwargs: litellm.completion(model_name, *args,
+                                            azure_ad_token_provider=credential, **kwargs)
+        return lambda *args, **kwargs: litellm.completion(model_name, *args, **kwargs)
+
     @property
     def model(self):
-        return lambda **kwargs: self.create(**kwargs)
-
-    # This is main API. We use the API of autogen's OpenAIWrapper
-    def create(self, **config: Any) -> litellm.types.utils.ModelResponse:
         """
         response = litellm.completion(
             model=self.model,
             messages=[{"content": message, "role": "user"}]
         )
         """
-        return self._model.completion(model=self.model_name, **config)
+        return lambda *args, **kwargs: self._model(*args, **kwargs)
diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index 31f33ee3..4cd5b4c5 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -1,8 +1,9 @@
-from opto.utils.llm import AutoGenLLM
+from opto.utils.llm import AutoGenLLM, LiteLLM
 from opto.optimizers.utils import print_color
 
 try:
     llm = AutoGenLLM()
+    # llm = LiteLLM()
     system_prompt = 'You are a helpful assistant.'
     user_prompt = "Hello world."
 

From 9afc50c88cfb92ef599233d6c44ee9848d766b95 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 11:35:24 -0800
Subject: [PATCH 03/14] Add DEFAULT_LITELLM_MODEL environment variable.

---
 opto/utils/llm.py            | 13 ++++++++++---
 tests/unit_tests/test_llm.py |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index 7cf2a72e..f273238b 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -4,7 +4,7 @@
 import json
 import autogen  # We import autogen here to avoid the need of installing autogen
 import litellm
-
+import os
 
 
 class AbstractModel:
@@ -145,10 +145,18 @@ class LiteLLM(AbstractModel):
     This is an LLM backend supported by LiteLLM library.
 
     https://docs.litellm.ai/docs/completion/input
+
+    To use this, set the credentials through the environment variable as
+    instructed in the LiteLLM documentation. For convenience, you can set the
+    default model name through the environment variable DEFAULT_LITELLM_MODEL.
+    When using Azure models via token provider, you can set the Azure token
+    provider scope through the environment variable AZURE_TOKEN_PROVIDER_SCOPE.
     """
 
-    def __init__(self, model: str = "gpt-4o", reset_freq: Union[int, None] = None,
+    def __init__(self, model: Union[str, None] = None, reset_freq: Union[int, None] = None,
                  cache=True) -> None:
+        if model is None:
+            model = os.environ.get('DEFAULT_LITELLM_MODEL', 'gpt-4o')
         self.model_name = model
         self.cache = cache
         factory = lambda : self._factory(self.model_name)  # an LLM instance uses a fixed model
@@ -156,7 +164,6 @@ def __init__(self, model: str = "gpt-4o", reset_freq: Union[int, None] = None,
 
     @classmethod
     def _factory(cls, model_name : str):
-        import os
         if model_name.startswith('azure/'):  # azure model
             azure_token_provider_scope = os.environ.get('AZURE_TOKEN_PROVIDER_SCOPE', None)
             if azure_token_provider_scope is not None:
diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index 4cd5b4c5..a0992a76 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -2,8 +2,8 @@
 from opto.optimizers.utils import print_color
 
 try:
-    llm = AutoGenLLM()
-    # llm = LiteLLM()
+    # llm = AutoGenLLM()
+    llm = LiteLLM()
     system_prompt = 'You are a helpful assistant.'
     user_prompt = "Hello world."
 

From 5c53b3f25ac995d485d086f20797433aaad246f7 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 11:56:26 -0800
Subject: [PATCH 04/14] Remove default requirement on AutoGenLLM. Default uses
 the LLM class, which is set to LiteLLM.

---
 opto/optimizers/optoprime.py       |  8 +++-----
 opto/optimizers/textgrad.py        |  7 +++----
 opto/trace/operators.py            |  5 ++---
 opto/utils/llm.py                  | 12 ++++++++++--
 setup.py                           |  4 +++-
 tests/unit_tests/test_copy.py      |  5 ++---
 tests/unit_tests/test_llm.py       |  6 +++---
 tests/unit_tests/test_optimizer.py | 11 +++++------
 8 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/opto/optimizers/optoprime.py b/opto/optimizers/optoprime.py
index 26dfc0fb..6ee70fc5 100644
--- a/opto/optimizers/optoprime.py
+++ b/opto/optimizers/optoprime.py
@@ -1,7 +1,6 @@
 from typing import Any, List, Dict, Union, Tuple
 from dataclasses import dataclass, asdict
 from textwrap import dedent, indent
-import autogen
 import warnings
 import json
 import re
@@ -11,7 +10,7 @@
 from opto.trace.propagators.propagators import Propagator
 from opto.optimizers.optimizer import Optimizer
 from opto.optimizers.buffers import FIFOBuffer
-from opto.utils.llm import AutoGenLLM
+from opto.utils.llm import AbstractModel, LLM
 
 
 def get_fun_name(node: MessageNode):
@@ -250,7 +249,7 @@ class OptoPrime(Optimizer):
     def __init__(
         self,
         parameters: List[ParameterNode],
-        llm: AutoGenLLM = None,
+        llm: AbstractModel = None,
         *args,
         propagator: Propagator = None,
         objective: Union[None, str] = None,
@@ -260,12 +259,11 @@ def __init__(
         max_tokens=4096,
         log=True,
         prompt_symbols=None,
-        filter_dict: Dict = None,  # autogen filter_dict
         **kwargs,
     ):
         super().__init__(parameters, *args, propagator=propagator, **kwargs)
         self.ignore_extraction_error = ignore_extraction_error
-        self.llm = llm or AutoGenLLM()
+        self.llm = llm or LLM()
         self.objective = objective or self.default_objective
         self.example_problem = ProblemInstance.problem_template.format(
             instruction=self.default_objective,
diff --git a/opto/optimizers/textgrad.py b/opto/optimizers/textgrad.py
index e63a026d..abc471f7 100644
--- a/opto/optimizers/textgrad.py
+++ b/opto/optimizers/textgrad.py
@@ -1,12 +1,11 @@
 import json
 from dataclasses import dataclass
-import autogen
 from typing import Any, List, Dict, Union, Tuple, Optional
 from opto.optimizers.optimizer import Optimizer
 from opto.trace.nodes import ParameterNode, Node, MessageNode
 from opto.trace.propagators import TraceGraph, GraphPropagator, Propagator
 from opto.trace.utils import escape_json_nested_quotes, remove_non_ascii
-from opto.utils.llm import AutoGenLLM
+from opto.utils.llm import LLM, AbstractModel
 
 from copy import copy
 import re
@@ -309,7 +308,7 @@ class TextGrad(Optimizer):
     def __init__(
         self,
         parameters: List[ParameterNode],
-        llm: AutoGenLLM = None,
+        llm: AbstractModel = None,
         *args,
         propagator: Propagator = None,
         objective: Union[None, str] = None,
@@ -318,7 +317,7 @@ def __init__(
         **kwargs,
     ):
         super().__init__(parameters, *args, **kwargs)
-        self.llm = llm or AutoGenLLM()
+        self.llm = llm or LLM()
         self.print_limit = 100
         self.max_tokens = max_tokens
         self.new_variable_tags = ["<IMPROVED_VARIABLE>", "</IMPROVED_VARIABLE>"]
diff --git a/opto/trace/operators.py b/opto/trace/operators.py
index fa7aee00..45a2f715 100644
--- a/opto/trace/operators.py
+++ b/opto/trace/operators.py
@@ -594,8 +594,7 @@ def call_llm(system_prompt, *user_prompts, **kwargs):
     messages = [{"role": "system", "content": system_prompt}]
     for user_prompt in user_prompts:
         messages.append({"role": "user", "content": user_prompt})
-    from opto.utils.llm import AutoGenLLM
-
-    llm = AutoGenLLM()
+    from opto.utils.llm import LLM
+    llm = LLM()
     response = llm(messages=messages, **kwargs)
     return response.choices[0].message.content
diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index f273238b..af1834cf 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -2,10 +2,13 @@
 import os
 import time
 import json
-import autogen  # We import autogen here to avoid the need of installing autogen
 import litellm
 import os
 
+try:
+    import autogen  # We import autogen here to avoid the need of installing autogen
+except ImportError:
+    pass
 
 class AbstractModel:
     """
@@ -77,7 +80,7 @@ def model(self):
         return lambda *args, **kwargs: self.create(*args, **kwargs)
 
     # This is main API. We use the API of autogen's OpenAIWrapper
-    def create(self, **config: Any) -> autogen.ModelClient.ModelClientResponseProtocol:
+    def create(self, **config: Any):
         """Make a completion for a given config using available clients.
         Besides the kwargs allowed in openai's [or other] client, we allow the following additional kwargs.
         The config in each client will be overridden by the config.
@@ -182,3 +185,8 @@ def model(self):
         )
         """
         return lambda *args, **kwargs: self._model(*args, **kwargs)
+
+
+
+# Set Default LLM class
+LLM = LiteLLM  # synonym
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 86fb669d..2083c601 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,6 @@
 
 
 install_requires = [
-    "autogen-agentchat==0.2.40",
     "graphviz>=0.20.1",
     "scikit-learn",
     "xgboost",
@@ -27,5 +26,8 @@
     long_description=open('README.md', encoding="utf8").read(),
     packages=setuptools.find_packages(include=["opto*"]),
     install_requires=install_requires,
+    extras_require={
+        'autogen': ["autogen-agentchat==0.2.40"],
+    },
     python_requires=">=3.9",
 )
diff --git a/tests/unit_tests/test_copy.py b/tests/unit_tests/test_copy.py
index d9b7dcc2..ad78ffd1 100644
--- a/tests/unit_tests/test_copy.py
+++ b/tests/unit_tests/test_copy.py
@@ -1,8 +1,7 @@
 from opto import trace
 from opto.optimizers import OptoPrime
-from opto.utils.llm import AutoGenLLM
 import copy
-
+from opto.utils.llm import LLM
 
 x = trace.node('x')
 copy.deepcopy(x)
@@ -24,7 +23,7 @@ def fun(x):
     optimizer = OptoPrime([x])
     optimizer2 = copy.deepcopy(optimizer)
 
-    llm = AutoGenLLM()
+    llm = LLM()
     copy.deepcopy(llm)
 except FileNotFoundError as e:
     print(f'Error: {e}')
diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index a0992a76..4460408f 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -1,9 +1,8 @@
-from opto.utils.llm import AutoGenLLM, LiteLLM
+from opto.utils.llm import LLM
 from opto.optimizers.utils import print_color
 
 try:
-    # llm = AutoGenLLM()
-    llm = LiteLLM()
+    llm = LLM()
     system_prompt = 'You are a helpful assistant.'
     user_prompt = "Hello world."
 
@@ -21,6 +20,7 @@
     print_color(f'System: {system_prompt}', 'red')
     print_color(f'User: {user_prompt}', 'blue')
     print_color(f'LLM: {response}', 'green')
+
 except FileNotFoundError as e:
     print_color(f'Error: {e}', 'red')
     print_color('Omit the test.', 'yellow')
\ No newline at end of file
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 5e67926a..50a839ca 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -1,5 +1,4 @@
 import os
-import autogen
 from opto.trace import bundle, node, GRAPH
 from opto.optimizers import OptoPrime
 
@@ -35,10 +34,10 @@ def user(x):
     else:
         return "Success."
 
-if os.path.exists("OAI_CONFIG_LIST"):
+if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL"):
     # One-step optimization example
     x = node(-1.0, trainable=True)
-    optimizer = OptoPrime([x], config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))
+    optimizer = OptoPrime([x])
     output = foobar(x)
     feedback = user(output.data)
     optimizer.zero_feedback()
@@ -125,8 +124,8 @@ def foobar_text(x):
 GRAPH.clear()
 x = node("negative point one", trainable=True)
 
-if os.path.exists("OAI_CONFIG_LIST"):
-    optimizer = OptoPrime([x], config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))
+if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL"):
+    optimizer = OptoPrime([x])
     output = foobar_text(x)
     feedback = user(output.data)
     optimizer.zero_feedback()
@@ -153,7 +152,7 @@ def my_fun(x):
 
 
     x = node(-1, trainable=False)
-    optimizer = OptoPrime([my_fun.parameter], config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))
+    optimizer = OptoPrime([my_fun.parameter])
     output = my_fun(x)
     feedback = user(output.data)
     optimizer.zero_feedback()

From 676560a50afe98f0a0e73e0a8cc6bff26818d923 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 11:56:55 -0800
Subject: [PATCH 05/14] Update version number.

---
 opto/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opto/version.py b/opto/version.py
index df98922f..2964c91c 100644
--- a/opto/version.py
+++ b/opto/version.py
@@ -1 +1 @@
-__version__ = "0.1.3.4"
\ No newline at end of file
+__version__ = "0.1.3.5"
\ No newline at end of file

From fa136cb091e2cc6e3741ce1c60621b8bf2430e53 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:30:31 -0800
Subject: [PATCH 06/14] Add autogen as an optional dependency

---
 pyproject.toml | 3 +++
 setup.py       | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3f7a3bd7..457d5acb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,9 @@ classifiers = [
   "Programming Language :: Python :: 3.9",
 ]
 
+[project.optional-dependencies]
+autogen = ["autogen-agentchat==0.2.40"]
+
 
 [project.urls]
 Homepage = "https://microsoft.github.io/Trace/"
diff --git a/setup.py b/setup.py
index 2083c601..2ae20d96 100644
--- a/setup.py
+++ b/setup.py
@@ -26,8 +26,5 @@
     long_description=open('README.md', encoding="utf8").read(),
     packages=setuptools.find_packages(include=["opto*"]),
     install_requires=install_requires,
-    extras_require={
-        'autogen': ["autogen-agentchat==0.2.40"],
-    },
     python_requires=">=3.9",
 )

From 142d95b761970ddc0ea11331ac660106ced7000b Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:34:46 -0800
Subject: [PATCH 07/14] Make test_llm.py optional.

---
 tests/unit_tests/test_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index 4460408f..317f5461 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -21,6 +21,6 @@
     print_color(f'User: {user_prompt}', 'blue')
     print_color(f'LLM: {response}', 'green')
 
-except FileNotFoundError as e:
+except Exception as e:
     print_color(f'Error: {e}', 'red')
     print_color('Omit the test.', 'yellow')
\ No newline at end of file

From 1f4c09501c1095aa431771182a95bc463bdd8978 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:37:07 -0800
Subject: [PATCH 08/14] Make llm related testing optional in CI.

---
 tests/unit_tests/test_llm.py       | 6 +-----
 tests/unit_tests/test_optimizer.py | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index 317f5461..b4ad90cc 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -1,7 +1,7 @@
 from opto.utils.llm import LLM
 from opto.optimizers.utils import print_color
 
-try:
+if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL") or os.environ.get("OPENAI_API_KEY"):
     llm = LLM()
     system_prompt = 'You are a helpful assistant.'
     user_prompt = "Hello world."
@@ -20,7 +20,3 @@
     print_color(f'System: {system_prompt}', 'red')
     print_color(f'User: {user_prompt}', 'blue')
     print_color(f'LLM: {response}', 'green')
-
-except Exception as e:
-    print_color(f'Error: {e}', 'red')
-    print_color('Omit the test.', 'yellow')
\ No newline at end of file
diff --git a/tests/unit_tests/test_optimizer.py b/tests/unit_tests/test_optimizer.py
index 50a839ca..d77d38be 100644
--- a/tests/unit_tests/test_optimizer.py
+++ b/tests/unit_tests/test_optimizer.py
@@ -34,7 +34,7 @@ def user(x):
     else:
         return "Success."
 
-if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL"):
+if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL") or os.environ.get("OPENAI_API_KEY"):
     # One-step optimization example
     x = node(-1.0, trainable=True)
     optimizer = OptoPrime([x])
@@ -124,7 +124,7 @@ def foobar_text(x):
 GRAPH.clear()
 x = node("negative point one", trainable=True)
 
-if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL"):
+if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL") or os.environ.get("OPENAI_API_KEY"):
     optimizer = OptoPrime([x])
     output = foobar_text(x)
     feedback = user(output.data)

From 5ff7e1bcb36bf461a17a5e44558b0e4f6ff8cf8e Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:40:07 -0800
Subject: [PATCH 09/14] Fix missing import

---
 tests/unit_tests/test_llm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit_tests/test_llm.py b/tests/unit_tests/test_llm.py
index b4ad90cc..d6606020 100644
--- a/tests/unit_tests/test_llm.py
+++ b/tests/unit_tests/test_llm.py
@@ -1,5 +1,6 @@
 from opto.utils.llm import LLM
 from opto.optimizers.utils import print_color
+import os
 
 if os.path.exists("OAI_CONFIG_LIST") or os.environ.get("DEFAULT_LITELLM_MODEL") or os.environ.get("OPENAI_API_KEY"):
     llm = LLM()

From 749d8fdf1e3fda318ea7d2b5547284f4a7396f3e Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:47:17 -0800
Subject: [PATCH 10/14] Update README

---
 README.md | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 8535a8d4..f676fff3 100644
--- a/README.md
+++ b/README.md
@@ -31,14 +31,13 @@ Or for development, clone the repo and run the following.
 
     pip install -e .
 
-The library requires Python >= 3.9. The installation script will git
-clone [AutoGen](https://github.com/microsoft/autogen). You may require [Git Large File Storage](https://git-lfs.com/) if
-git is unable to clone the repository otherwise.
+The library requires Python >= 3.9. By default (starting with v0.1.3.5), we use [LiteLLM](https://github.com/BerriAI/litellm) as the backend of LLMs. For backward compatibility, we provide backend-support with [AutoGen](https://github.com/microsoft/autogen); when installing, users can add `[autogen]` tag to install a compatible AutoGen version (e.g., `pip install trace-opt[autogen]`). You may require [Git Large File Storage](https://git-lfs.com/) if
+git is unable to clone the repository.
 
 ## Updates
 - **2025.2.7** Trace was featured in the [G-Research NeurIPS highlight](https://www.gresearch.com/news/neurips-paper-reviews-2024-8/) by the Science Director Hugh Salimbeni.
 - **2024.12.10** Trace was demoed in person at NeurIPS 2024 Expo.
-- **2024.11.05** Ching-An Cheng gave a talk at UW Robotics Colloquium on Trace: [video](https://www.youtube.com/watch?v=T2g1Vo3u_9g). 
+- **2024.11.05** Ching-An Cheng gave a talk at UW Robotics Colloquium on Trace: [video](https://www.youtube.com/watch?v=T2g1Vo3u_9g).
 - **2024.10.21**    New [paper](https://arxiv.org/abs/2410.15625) by Nvidia, Stanford, Visa, & Intel applies Trace to
   optimize for mapper code of parallel programming (for scientific computing and matrix multiplication). Trace (OptoPrime) learns code achieving 1.3X speed up under 10
   minutes, compared to the code optimized by a system engineer expert.
@@ -215,16 +214,16 @@ def train():
 agent = train()
 ```
 
-Defining and training an agent through Trace will give you more flexibility and control over what the agent learns. 
+Defining and training an agent through Trace will give you more flexibility and control over what the agent learns.
 
 ## Tutorials
 
 | **Level** | **Tutorial**                                                                              | **Run in Colab**                                                                                                                                                                                       | **Description**                                                                                                                                                                       |
-| --- |-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 
+| --- |-------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | Beginner | [Getting Started](https://microsoft.github.io/Trace/quickstart/quick_start.html)          | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/quickstart/quick_start.ipynb)       | Introduces basic primitives like `node` and `bundle`. Showcases a code optimization pipeline.                                                                                         |
-| Beginner | [Adaptive AI Agent](https://microsoft.github.io/Trace/quickstart/quick_start_2.html)      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/quickstart/quick_start_2.ipynb)      | Introduce primitive `model` that allows anyone to build self-improving agents that react to environment feedback. Shows how an LLM agent learns to place a shot in a Battleship game. 
-| Intermediate | [Multi-Agent Collaboration](https://microsoft.github.io/Trace/quickstart/virtualhome.html) | N/A                                                                                                                                                                                                    | Demonstrates how Trace can be used for multi-agent collaboration environment in Virtualhome.                                                                                          
-| Intermediate | [NLP Prompt Optimization](https://microsoft.github.io/Trace/examples/nlp/bigbench_hard.html) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/examples/nlp/bigbench_hard.ipynb) | Shows how Trace can optimizes prompt and code together jointly for BigBench-Hard 23 tasks.                                                                                            
+| Beginner | [Adaptive AI Agent](https://microsoft.github.io/Trace/quickstart/quick_start_2.html)      | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/quickstart/quick_start_2.ipynb)      | Introduce primitive `model` that allows anyone to build self-improving agents that react to environment feedback. Shows how an LLM agent learns to place a shot in a Battleship game.
+| Intermediate | [Multi-Agent Collaboration](https://microsoft.github.io/Trace/quickstart/virtualhome.html) | N/A                                                                                                                                                                                                    | Demonstrates how Trace can be used for multi-agent collaboration environment in Virtualhome.
+| Intermediate | [NLP Prompt Optimization](https://microsoft.github.io/Trace/examples/nlp/bigbench_hard.html) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/examples/nlp/bigbench_hard.ipynb) | Shows how Trace can optimizes prompt and code together jointly for BigBench-Hard 23 tasks.
 | Advanced | [Robotic Arm Control](https://microsoft.github.io/Trace/examples/robotics/metaworld.html) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/microsoft/Trace/blob/website/docs/examples/robotics/metaworld.ipynb)                                     | Trace can optimize code to control a robotic arm after observing a full trajectory of interactions.                                                                                   |
 
 
@@ -276,7 +275,7 @@ with TraceGraph coming soon).
 
 ## LLM API Setup
 
-Currently we rely on AutoGen for LLM caching and API-Key management. 
+Currently we rely on AutoGen for LLM caching and API-Key management.
 AutoGen relies on `OAI_CONFIG_LIST`, which is a file you put in your working directory. It has the format of:
 
 ```json lines

From e55caea539c5ea273effd427c2a92b18e38ae626 Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:47:27 -0800
Subject: [PATCH 11/14] Update README

---
 README.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f676fff3..9191e517 100644
--- a/README.md
+++ b/README.md
@@ -102,7 +102,6 @@ Now, after declaring what is trainable and what isn't, and use `node` and `bundl
 can use the optimizer to optimize the computation graph.
 
 ```python
-import autogen
 from opto.optimizers import OptoPrime
 
 
@@ -120,8 +119,7 @@ test_input = [1, 2, 3, 4]
 
 epoch = 2
 
-optimizer = OptoPrime(strange_sort_list.parameters(),
-                      config_list=autogen.config_list_from_json("OAI_CONFIG_LIST"))
+optimizer = OptoPrime(strange_sort_list.parameters())
 
 for i in range(epoch):
     print(f"Training Epoch {i}")
@@ -275,8 +273,9 @@ with TraceGraph coming soon).
 
 ## LLM API Setup
 
-Currently we rely on AutoGen for LLM caching and API-Key management.
-AutoGen relies on `OAI_CONFIG_LIST`, which is a file you put in your working directory. It has the format of:
+Currently we rely on LiteLLM or AutoGen for LLM caching and API-Key management.
+By default, LiteLLM is used. Please the documentation there to set the right environment variables for keys and end-point urls.
+On the other hand, AutoGen relies on `OAI_CONFIG_LIST`, which is a file you put in your working directory. It has the format of:
 
 ```json lines
 [

From 830cc45d1c6f4e1f2849fb9b07ca1e8be814d31a Mon Sep 17 00:00:00 2001
From: chinganc <chinganc0@gmail.com>
Date: Wed, 12 Feb 2025 12:53:32 -0800
Subject: [PATCH 12/14] Remove legacy code in `create_feedback` of
 `ExceptionNode`

---
 opto/trace/nodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opto/trace/nodes.py b/opto/trace/nodes.py
index fbadeeb6..7c3984b6 100644
--- a/opto/trace/nodes.py
+++ b/opto/trace/nodes.py
@@ -2181,7 +2181,7 @@ def __init__(
     def create_feedback(self, style="simple"):
         assert style in ("simple", "full")
         feedback = self._data
-        if style in ("line", "full"):
+        if style == "full":
             if type(self.info) is dict and self.info.get("error_comment") is not None:
                 feedback = self.info["error_comment"]
         return feedback

From 9cb32a8106617f6c12b3ba9bd23ddc04e3508f25 Mon Sep 17 00:00:00 2001
From: windweller <leo.niecn@gmail.com>
Date: Mon, 17 Feb 2025 12:13:21 -0800
Subject: [PATCH 13/14] update all notebooks with the new LLM API, also fixed a
 bug in battleship notebook

---
 docs/examples/game/battleship.ipynb        |  2 +-
 docs/quickstart/quick_start.ipynb          |  5 +----
 docs/quickstart/quick_start_2.ipynb        | 24 ++++++++++++++++------
 docs/tutorials/optimization_tutorial.ipynb | 12 +++++------
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/docs/examples/game/battleship.ipynb b/docs/examples/game/battleship.ipynb
index 0fe3ee75..08a1f043 100644
--- a/docs/examples/game/battleship.ipynb
+++ b/docs/examples/game/battleship.ipynb
@@ -923,7 +923,7 @@
     }
    ],
    "source": [
-    "optimizer = OptoPrime(policy.parameters(), memory_size=0, config_list=config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime(policy.parameters(), memory_size=0)\n",
     "\n",
     "\n",
     "feedback = \"\"\n",
diff --git a/docs/quickstart/quick_start.ipynb b/docs/quickstart/quick_start.ipynb
index fe64972a..5a93a08c 100644
--- a/docs/quickstart/quick_start.ipynb
+++ b/docs/quickstart/quick_start.ipynb
@@ -381,8 +381,7 @@
     "\n",
     "epoch = 2\n",
     "\n",
-    "optimizer = OptoPrime(strange_sort_list.parameters(),\n",
-    "                      config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime(strange_sort_list.parameters())\n",
     "\n",
     "for i in range(epoch):\n",
     "    print(f\"Training Epoch {i}\")\n",
@@ -933,10 +932,8 @@
     "epoch = 2\n",
     "\n",
     "code_optimizer = OptoPrime(strange_sort_list.parameters(),\n",
-    "                          config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"),\n",
     "                          ignore_extraction_error=True)\n",
     "verifier_optimizer = OptoPrime(verifier.parameters(),\n",
-    "                              config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"),\n",
     "                              ignore_extraction_error=True)\n",
     "\n",
     "for i in range(epoch):\n",
diff --git a/docs/quickstart/quick_start_2.ipynb b/docs/quickstart/quick_start_2.ipynb
index a7dc9f91..b668a2c4 100644
--- a/docs/quickstart/quick_start_2.ipynb
+++ b/docs/quickstart/quick_start_2.ipynb
@@ -127,14 +127,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import sys\n",
     "import os\n",
+    "import urllib.request\n",
+    "import importlib.util\n",
     "\n",
-    "# Get the absolute path of the examples folder\n",
-    "examples_path = os.path.abspath(os.path.join('..', '..', 'examples'))\n",
+    "# Define the raw URL for downloading\n",
+    "raw_url = \"https://raw.githubusercontent.com/microsoft/Trace/main/examples/battleship.py\"\n",
     "\n",
-    "# Add the examples folder to the Python path\n",
-    "sys.path.append(examples_path)"
+    "# Define the local file path\n",
+    "local_file = \"battleship.py\"\n",
+    "\n",
+    "# Download the file\n",
+    "urllib.request.urlretrieve(raw_url, local_file)\n",
+    "print(f\"Downloaded {local_file}\")\n",
+    "\n",
+    "# Load the module dynamically\n",
+    "spec = importlib.util.spec_from_file_location(\"battleship\", local_file)\n",
+    "battleship = importlib.util.module_from_spec(spec)\n",
+    "spec.loader.exec_module(battleship)\n",
+    "\n",
+    "print(\"battleship module loaded successfully!\")"
    ]
   },
   {
@@ -2189,7 +2201,7 @@
     "\n",
     "agent = Agent()\n",
     "obs = node(board.get_shots(), trainable=False)\n",
-    "optimizer = OptoPrime(agent.parameters(), config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime(agent.parameters())\n",
     "\n",
     "feedback, terminal, cum_reward = \"\", False, 0\n",
     "\n",
diff --git a/docs/tutorials/optimization_tutorial.ipynb b/docs/tutorials/optimization_tutorial.ipynb
index be2ac14e..78511199 100644
--- a/docs/tutorials/optimization_tutorial.ipynb
+++ b/docs/tutorials/optimization_tutorial.ipynb
@@ -177,7 +177,7 @@
     "\n",
     "# One-step optimization example\n",
     "x = node(-1.0, trainable=True)\n",
-    "optimizer = OptoPrime([x], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([x])\n",
     "output = foobar(x)\n",
     "feedback = user(output.data)\n",
     "optimizer.zero_feedback()\n",
@@ -415,7 +415,7 @@
     "# A small example of how to use the optimizer in a loop\n",
     "GRAPH.clear()\n",
     "x = node(-1.0, trainable=True)\n",
-    "optimizer = OptoPrime([x], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([x])\n",
     "\n",
     "history = [x.data]\n",
     "feedback = \"\"\n",
@@ -466,7 +466,7 @@
     "# A small example of how to include constraints on parameters\n",
     "GRAPH.clear()\n",
     "x = node(-1.0, trainable=True, constraint=\"The value should be greater than 2.0\")\n",
-    "optimizer = OptoPrime([x], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([x])\n",
     "\n",
     "history = [x.data]\n",
     "feedback = \"\"\n",
@@ -600,7 +600,7 @@
    "source": [
     "GRAPH.clear()\n",
     "x = node(\"negative point one\", trainable=True)\n",
-    "optimizer = OptoPrime([x], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([x])\n",
     "\n",
     "history = [x.data]\n",
     "feedback = \"\"\n",
@@ -675,7 +675,7 @@
     "\n",
     "\n",
     "x = node(-1, trainable=False)\n",
-    "optimizer = OptoPrime([my_fun.parameter], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([my_fun.parameter])\n",
     "\n",
     "feedback = \"\"\n",
     "while feedback != \"Success.\":\n",
@@ -917,7 +917,7 @@
    ],
    "source": [
     "x = node(0.005, trainable=True)\n",
-    "optimizer = OptoPrime([x], config_list=autogen.config_list_from_json(\"OAI_CONFIG_LIST\"))\n",
+    "optimizer = OptoPrime([x])\n",
     "\n",
     "history = []\n",
     "bestScore = None\n",

From 07cf80f84a0c982bfaad9d4dfd7ebbe91107ba6d Mon Sep 17 00:00:00 2001
From: windweller <leo.niecn@gmail.com>
Date: Mon, 17 Feb 2025 18:20:35 -0800
Subject: [PATCH 14/14] add a CustomLLM class based on OpenAI's server endpoint

---
 opto/utils/llm.py | 46 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/opto/utils/llm.py b/opto/utils/llm.py
index af1834cf..c0bf0cd3 100644
--- a/opto/utils/llm.py
+++ b/opto/utils/llm.py
@@ -4,12 +4,14 @@
 import json
 import litellm
 import os
+import openai
 
 try:
     import autogen  # We import autogen here to avoid the need of installing autogen
 except ImportError:
     pass
 
+
 class AbstractModel:
     """
     A minimal abstraction of a model api that refreshes the model every
@@ -56,7 +58,7 @@ def __setstate__(self, state):
 class AutoGenLLM(AbstractModel):
     """ This is the main class Trace uses to interact with the model. It is a wrapper around autogen's OpenAIWrapper. For using models not supported by autogen, subclass AutoGenLLM and override the `_factory` and  `create` method. Users can pass instances of this class to optimizers' llm argument. """
 
-    def __init__(self, config_list: List = None, filter_dict: Dict = None, reset_freq: Union[int, None]  = None) -> None:
+    def __init__(self, config_list: List = None, filter_dict: Dict = None, reset_freq: Union[int, None] = None) -> None:
         if config_list is None:
             try:
                 config_list = autogen.config_list_from_json("OAI_CONFIG_LIST")
@@ -162,18 +164,18 @@ def __init__(self, model: Union[str, None] = None, reset_freq: Union[int, None]
             model = os.environ.get('DEFAULT_LITELLM_MODEL', 'gpt-4o')
         self.model_name = model
         self.cache = cache
-        factory = lambda : self._factory(self.model_name)  # an LLM instance uses a fixed model
+        factory = lambda: self._factory(self.model_name)  # an LLM instance uses a fixed model
         super().__init__(factory, reset_freq)
 
     @classmethod
-    def _factory(cls, model_name : str):
+    def _factory(cls, model_name: str):
         if model_name.startswith('azure/'):  # azure model
             azure_token_provider_scope = os.environ.get('AZURE_TOKEN_PROVIDER_SCOPE', None)
             if azure_token_provider_scope is not None:
                 from azure.identity import DefaultAzureCredential, get_bearer_token_provider
                 credential = get_bearer_token_provider(DefaultAzureCredential(), azure_token_provider_scope)
                 return lambda *args, **kwargs: litellm.completion(model_name, *args,
-                                            azure_ad_token_provider=credential, **kwargs)
+                                                                  azure_ad_token_provider=credential, **kwargs)
         return lambda *args, **kwargs: litellm.completion(model_name, *args, **kwargs)
 
     @property
@@ -187,6 +189,40 @@ def model(self):
         return lambda *args, **kwargs: self._model(*args, **kwargs)
 
 
+class CustomLLM(AbstractModel):
+    """
+    This is for Custom server's API endpoints that are OpenAI Compatible.
+    Such server includes LiteLLM proxy server.
+    """
+
+    def __init__(self, model: Union[str, None] = None, reset_freq: Union[int, None] = None,
+                 cache=True) -> None:
+        if model is None:
+            model = os.environ.get('DEFAULT_LITELLM_CUSTOM_MODEL', 'gpt-4o')
+            base_url = os.environ.get('DEFAULT_LITELLM_CUSTOM_URL', 'http://xx.xx.xxx.xx:4000')
+            server_api_key = os.environ.get('DEFAULT_LITELLM_CUSTOM_API',
+                                            'sk-Xhg...')  # we assume the server has an API key
+            # the server API is set through `master_key` in `config.yaml` for LiteLLM proxy server
+
+        self.model_name = model
+        self.cache = cache
+        factory = lambda: self._factory(base_url, server_api_key)  # an LLM instance uses a fixed model
+        super().__init__(factory, reset_freq)
+
+    @classmethod
+    def _factory(cls, base_url: str, server_api_key: str) -> openai.OpenAI:
+        return openai.OpenAI(base_url=base_url, api_key=server_api_key)
+
+    @property
+    def model(self):
+        return lambda *args, **kwargs: self.create(*args, **kwargs)
+        # return lambda *args, **kwargs: self._model.chat.completions.create(*args, **kwargs)
+
+    def create(self, **config: Any):
+        if 'model' not in config:
+            config['model'] = self.model_name
+        return self._model.chat.completions.create(**config)
+
 
 # Set Default LLM class
-LLM = LiteLLM  # synonym
\ No newline at end of file
+LLM = LiteLLM  # synonym