diff --git a/ae/core/agents/browser_nav_agent.py b/ae/core/agents/browser_nav_agent.py index 58dbb76..bf6bb38 100644 --- a/ae/core/agents/browser_nav_agent.py +++ b/ae/core/agents/browser_nav_agent.py @@ -1,26 +1,28 @@ from datetime import datetime from string import Template +import os import autogen # type: ignore +import agentops from ae.core.memory.static_ltm import get_user_ltm from ae.core.prompts import LLM_PROMPTS from ae.core.skills.click_using_selector import click as click_element - -# from ae.core.skills.enter_text_and_click import enter_text_and_click from ae.core.skills.enter_text_using_selector import bulk_enter_text from ae.core.skills.enter_text_using_selector import entertext from ae.core.skills.get_dom_with_content_type import get_dom_with_content_type from ae.core.skills.get_url import geturl from ae.core.skills.open_url import openurl from ae.core.skills.pdf_text_extractor import extract_text_from_pdf - -#from ae.core.skills.pdf_text_extractor import extract_text_from_pdf from ae.core.skills.press_key_combination import press_key_combination +# Initialize AgentOps +agentops.init(os.getenv("AGENTOPS_API_KEY")) +@agentops.track_agent(name='BrowserNavAgent') class BrowserNavAgent: - def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): # type: ignore + @agentops.record_function('init_browser_nav_agent') + def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): """ Initialize the BrowserNavAgent and store the AssistantAgent instance as an instance attribute for external access. @@ -33,7 +35,7 @@ def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): # user_ltm = self.__get_ltm() system_message = LLM_PROMPTS["BROWSER_AGENT_PROMPT"] system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}" - if user_ltm: #add the user LTM to the system prompt if it exists + if user_ltm: user_ltm = "\n" + user_ltm system_message = Template(system_message).substitute(basic_user_information=user_ltm) @@ -50,75 +52,51 @@ def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): # ) self.__register_skills() - + @agentops.record_function('get_ltm') def __get_ltm(self): """ - Get the the long term memory of the user. + Get the long term memory of the user. returns: str | None - The user LTM or None if not found. """ return get_user_ltm() - + @agentops.record_function('register_skills') def __register_skills(self): """ Register all the skills that the agent can perform. """ - - # Register openurl skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl) - # Register openurl skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(openurl) - - # Register enter_text_and_click skill for LLM by assistant agent - # self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click) - # Register enter_text_and_click skill for execution by user_proxy_agent - # self.browser_nav_executor.register_for_execution()(enter_text_and_click) - - # Register get_dom_with_content_type skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type) - # Register get_dom_with_content_type skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(get_dom_with_content_type) - - # Register click_element skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT"])(click_element) - # Register click_element skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(click_element) - - # Register geturl skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["GET_URL_PROMPT"])(geturl) - # Register geturl skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(geturl) - - # Register bulk_enter_text skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text) - # Register bulk_enter_text skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(bulk_enter_text) - - # Register entertext skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext) - # Register entertext skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(entertext) - - # Register entertext skill for LLM by assistant agent - self.agent.register_for_llm(description=LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"])(press_key_combination) - # Register entertext skill for execution by user_proxy_agent - self.browser_nav_executor.register_for_execution()(press_key_combination) - - self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf) - self.browser_nav_executor.register_for_execution()(extract_text_from_pdf) - - ''' - # Register reply function for printing messages - self.browser_nav_executor.register_reply( # type: ignore - [autogen.Agent, None], - reply_func=print_message_from_user_proxy, - config={"callback": None}, - ) - self.agent.register_reply( # type: ignore - [autogen.Agent, None], - reply_func=print_message_from_browser_agent, - config={"callback": None}, - ) - ''' - # print(f">>> Function map: {self.browser_nav_executor.function_map}") # type: ignore - # print(">>> Registered skills for BrowserNavAgent and BrowserNavExecutorAgent") + skills = [ + (openurl, LLM_PROMPTS["OPEN_URL_PROMPT"]), + (get_dom_with_content_type, LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"]), + (click_element, LLM_PROMPTS["CLICK_PROMPT"]), + (geturl, LLM_PROMPTS["GET_URL_PROMPT"]), + (bulk_enter_text, LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"]), + (entertext, LLM_PROMPTS["ENTER_TEXT_PROMPT"]), + (press_key_combination, LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"]), + (extract_text_from_pdf, LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"]), + ] + + for skill, prompt in skills: + self.__register_skill(skill, prompt) + + @agentops.record_function('register_skill') + def __register_skill(self, skill, prompt): + """ + Register a single skill for both the agent and the executor. + """ + self.agent.register_for_llm(description=prompt)(skill) + self.browser_nav_executor.register_for_execution()(skill) + +# Example usage +if __name__ == "__main__": + # This is just a placeholder. You'd typically create this with actual config and executor. + config_list = [{}] + browser_nav_executor = autogen.UserProxyAgent(name="executor") + + browser_nav_agent = BrowserNavAgent(config_list, browser_nav_executor) + + # Simulate some actions + browser_nav_agent.agent.generate_reply("Open https://www.example.com") + + # End the AgentOps session + agentops.end_session('Success') \ No newline at end of file diff --git a/ae/core/agents/high_level_planner_agent.py b/ae/core/agents/high_level_planner_agent.py index 2d440b9..e9bb7b0 100644 --- a/ae/core/agents/high_level_planner_agent.py +++ b/ae/core/agents/high_level_planner_agent.py @@ -1,17 +1,23 @@ from datetime import datetime from string import Template +import os import autogen # type: ignore from autogen import ConversableAgent # type: ignore +import agentops from ae.core.memory.static_ltm import get_user_ltm from ae.core.post_process_responses import final_reply_callback_planner_agent as print_message_as_planner # type: ignore from ae.core.prompts import LLM_PROMPTS from ae.core.skills.get_user_input import get_user_input +# Initialize AgentOps +agentops.init(os.getenv("AGENTOPS_API_KEY")) +@agentops.track_agent(name='PlannerAgent') class PlannerAgent: - def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: ignore + @agentops.record_function('init_planner_agent') + def __init__(self, config_list, user_proxy_agent: ConversableAgent): """ Initialize the PlannerAgent and store the AssistantAgent instance as an instance attribute for external access. @@ -20,14 +26,14 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno - config_list: A list of configuration parameters required for AssistantAgent. - user_proxy_agent: An instance of the UserProxyAgent class. """ - user_ltm = self.__get_ltm() system_message = LLM_PROMPTS["PLANNER_AGENT_PROMPT"] - if user_ltm: #add the user LTM to the system prompt if it exists + if user_ltm: # add the user LTM to the system prompt if it exists user_ltm = "\n" + user_ltm system_message = Template(system_message).substitute(basic_user_information=user_ltm) system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}" + self.agent = autogen.AssistantAgent( name="planner_agent", system_message=system_message, @@ -36,26 +42,53 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno "cache_seed": None, "temperature": 0.0, "top_p": 0.001, - "seed":12345 + "seed": 12345 }, ) + self.__register_skills(user_proxy_agent) + self.__register_reply() + + @agentops.record_function('get_ltm') + def __get_ltm(self): + """ + Get the long term memory of the user. + returns: str | None - The user LTM or None if not found. + """ + return get_user_ltm() + + @agentops.record_function('register_skills') + def __register_skills(self, user_proxy_agent: ConversableAgent): + """ + Register all the skills that the agent can perform. + """ # Register get_user_input skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input) # Register get_user_input skill for execution by user_proxy_agent user_proxy_agent.register_for_execution()(get_user_input) - self.agent.register_reply( # type: ignore + @agentops.record_function('register_reply') + def __register_reply(self): + """ + Register the reply function for the agent. + """ + self.agent.register_reply( # type: ignore [autogen.AssistantAgent, None], reply_func=print_message_as_planner, config={"callback": None}, ignore_async_in_sync_chat=True ) - def __get_ltm(self): - """ - Get the the long term memory of the user. - returns: str | None - The user LTM or None if not found. - """ - return get_user_ltm() - +# Example usage +if __name__ == "__main__": + # This is just a placeholder. You'd typically create this with actual config and user_proxy_agent. + config_list = [{}] + user_proxy_agent = ConversableAgent(name="user_proxy") + + planner_agent = PlannerAgent(config_list, user_proxy_agent) + + # Simulate some actions + planner_agent.agent.generate_reply("Plan a task") + + # End the AgentOps session + agentops.end_session('Success') \ No newline at end of file diff --git a/ae/core/autogen_wrapper.py b/ae/core/autogen_wrapper.py index 9b53864..be41571 100644 --- a/ae/core/autogen_wrapper.py +++ b/ae/core/autogen_wrapper.py @@ -10,8 +10,8 @@ import autogen # type: ignore import nest_asyncio # type: ignore import openai +import agentops # Add this import -#from autogen import Cache from dotenv import load_dotenv from ae.config import SOURCE_LOG_FOLDER_PATH @@ -48,7 +48,11 @@ def __init__(self, max_chat_round: int = 1000): self.config_list: list[dict[str, str]] | None = None self.chat_logs_dir: str = SOURCE_LOG_FOLDER_PATH + # Initialize AgentOps + agentops.init(os.getenv("AGENTOPS_API_KEY")) + @classmethod + @agentops.record_function('create_autogen_wrapper') async def create(cls, agents_needed: list[str] | None = None, max_chat_round: int = 1000): """ Create an instance of AutogenWrapper. @@ -162,7 +166,6 @@ def reflection_message(recipient, messages, sender, config): # type: ignore return self - def get_chat_logs_dir(self) -> str|None: """ Get the directory for saving chat logs. @@ -183,14 +186,14 @@ def set_chat_logs_dir(self, chat_logs_dir: str): """ self.chat_logs_dir = chat_logs_dir - + @agentops.record_function('save_chat_log') def __save_chat_log(self, chat_log: list[dict[str, Any]]): chat_logs_file = os.path.join(self.get_chat_logs_dir() or "", f"nested_chat_log_{str(time_ns())}.json") # Save the chat log to a file with open(chat_logs_file, "w") as file: json.dump(chat_log, file, indent=4) - + @agentops.record_function('initialize_agents') async def __initialize_agents(self, agents_needed: list[str]): """ Instantiate all agents with their appropriate prompts/skills. @@ -223,7 +226,7 @@ async def __initialize_agents(self, agents_needed: list[str]): raise ValueError(f"Unknown agent type: {agent_needed}") return agents_map - + @agentops.record_function('create_user_delegate_agent') async def __create_user_delegate_agent(self) -> autogen.ConversableAgent: """ Create a ConversableAgent instance. @@ -267,6 +270,7 @@ def is_planner_termination_message(x: dict[str, str])->bool: # type: ignore ) return task_delegate_agent + @agentops.record_function('create_browser_nav_executor_agent') def __create_browser_nav_executor_agent(self): """ Create a UserProxyAgent instance for executing browser control. @@ -297,6 +301,7 @@ def is_browser_executor_termination_message(x: dict[str, str])->bool: # type: ig print(">>> Created browser_nav_executor_agent:", browser_nav_executor_agent) return browser_nav_executor_agent + @agentops.record_function('create_browser_nav_agent') def __create_browser_nav_agent(self, user_proxy_agent: UserProxyAgent_SequentialFunctionExecution) -> autogen.ConversableAgent: """ Create a BrowserNavAgent instance. @@ -312,6 +317,7 @@ def __create_browser_nav_agent(self, user_proxy_agent: UserProxyAgent_Sequential #print(">>> browser agent tools:", json.dumps(browser_nav_agent.agent.llm_config.get("tools"), indent=2)) return browser_nav_agent.agent + @agentops.record_function('create_planner_agent') def __create_planner_agent(self, assistant_agent: autogen.ConversableAgent): """ Create a Planner Agent instance. This is mainly used for exploration at this point @@ -323,6 +329,7 @@ def __create_planner_agent(self, assistant_agent: autogen.ConversableAgent): planner_agent = PlannerAgent(self.config_list, assistant_agent) # type: ignore return planner_agent.agent + @agentops.record_function('process_command') async def process_command(self, command: str, current_url: str | None = None) -> autogen.ChatResult | None: """ Process a command by sending it to one or more agents. @@ -341,15 +348,13 @@ async def process_command(self, command: str, current_url: str | None = None) -> prompt = Template(LLM_PROMPTS["COMMAND_EXECUTION_PROMPT"]).substitute(command=command, current_url_prompt_segment=current_url_prompt_segment) logger.info(f"Prompt for command: {prompt}") - #with Cache.disk() as cache: try: if self.agents_map is None: raise ValueError("Agents map is not initialized.") - result=await self.agents_map["user"].a_initiate_chat( # type: ignore + result = await self.agents_map["user"].a_initiate_chat( # type: ignore self.agents_map["planner_agent"], # self.manager # type: ignore max_turns=self.number_of_rounds, - #clear_history=True, message=prompt, silent=False, cache=None, @@ -362,4 +367,16 @@ async def process_command(self, command: str, current_url: str | None = None) -> except openai.BadRequestError as bre: logger.error(f"Unable to process command: \"{command}\". {bre}") traceback.print_exc() + finally: + agentops.end_session('Success') # Or 'Fail' based on the outcome + + # Additional methods or code that might be part of the AutogenWrapper class can be added here + +# Example usage of the AutogenWrapper class +async def main(): + wrapper = await AutogenWrapper.create() + result = await wrapper.process_command("Your command here") + print(result) +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/ae/core/post_process_responses.py b/ae/core/post_process_responses.py index 1a7e492..9ee0fae 100644 --- a/ae/core/post_process_responses.py +++ b/ae/core/post_process_responses.py @@ -1,13 +1,20 @@ import asyncio from typing import Any +import os import autogen # type: ignore +import agentops from ae.core.playwright_manager import PlaywrightManager from ae.utils.logger import logger from ae.utils.ui_messagetype import MessageType +# Initialize AgentOps +agentops.init(os.getenv("AGENTOPS_API_KEY")) +last_agent_response = "" + +@agentops.record_function('final_reply_callback_user_proxy') def final_reply_callback_user_proxy(recipient: autogen.ConversableAgent, messages: list[dict[str, Any]], sender: autogen.Agent, config: dict[str, Any]): """ Callback function that is called each time the user proxy agent receives a message. @@ -36,8 +43,25 @@ def final_reply_callback_user_proxy(recipient: autogen.ConversableAgent, message return False, None -def final_reply_callback_planner_agent(message:str, message_type:MessageType = MessageType.STEP): # type: ignore - browser_manager = PlaywrightManager(browser_type='chromium', headless=False) - loop = asyncio.get_event_loop() - loop.run_until_complete(browser_manager.notify_user(message, message_type=message_type)) - return False, None # required to ensure the agent communication flow continues +@agentops.record_function('final_reply_callback_planner_agent') +def final_reply_callback_planner_agent(message: str, message_type: MessageType = MessageType.STEP): + browser_manager = PlaywrightManager(browser_type='chromium', headless=False) + loop = asyncio.get_event_loop() + loop.run_until_complete(browser_manager.notify_user(message, message_type=message_type)) + return False, None # required to ensure the agent communication flow continues + +# Example usage (you may want to remove or modify this based on your needs) +async def main(): + # Simulate a message for testing + test_message = "Test message with ##TERMINATE##" + result = final_reply_callback_user_proxy(None, [{"content": test_message}], None, {}) + print(f"User proxy result: {result}") + + # Simulate a planner agent message + await final_reply_callback_planner_agent("Test planner message", MessageType.STEP) + + # End the AgentOps session + agentops.end_session('Success') + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file