Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions cli/olaf/src/olaf/cli/run_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
PACKAGE_DATASETS_DIR = PACKAGE_ROOT / "datasets"
PACKAGE_AUTO_METRICS_DIR = PACKAGE_ROOT / "auto_metrics"

# Define static in-container paths for primary and reference datasets
SANDBOX_DATA_PATH = "/workspace/dataset.h5ad"
SANDBOX_REF_DATA_PATH = "/workspace/reference.h5ad"


def _prompt_for_file(
Expand Down Expand Up @@ -103,6 +105,7 @@ def __init__(self):
self.llm_client: object | None = None
self.initial_history: List[dict] | None = None
self.dataset_path: Path | None = None
self.reference_dataset_path: Optional[Path] = None
self.resources: List[Tuple[Path, str]] = []
self.sandbox_details: dict = {}

Expand All @@ -111,7 +114,8 @@ def main_run_callback(
ctx: typer.Context,
blueprint: Path = typer.Option(None, "--blueprint", "-bp", help="Path to the agent system JSON blueprint.", readable=True),
driver_agent: str = typer.Option(None, "--driver-agent", "-d", help="Name of the agent to start with."),
dataset: Path = typer.Option(None, "--dataset", "-ds", help="Path to the dataset file (.h5ad).", readable=True),
dataset: Path = typer.Option(None, "--dataset", "-ds", help="Path to the primary dataset file (.h5ad).", readable=True),
reference_dataset: Path = typer.Option(None, "--reference-dataset", "-ref", help="Path to an optional reference dataset file (.h5ad).", readable=True),
resources_dir: Path = typer.Option(None, "--resources", help="Path to a directory of resource files to mount.", exists=True, file_okay=False),
llm_backend: str = typer.Option(None, "--llm", help="LLM backend to use: 'chatgpt' or 'ollama'."),
ollama_host: str = typer.Option("http://localhost:11434", "--ollama-host", help="Base URL for Ollama backend."),
Expand All @@ -135,9 +139,14 @@ def main_run_callback(
app_context.roster_instructions = app_context.agent_system.get_instructions()

if dataset is None:
dataset = _prompt_for_file(console, get_datasets_dir(), PACKAGE_DATASETS_DIR, ".h5ad", "Dataset")
dataset = _prompt_for_file(console, get_datasets_dir(), PACKAGE_DATASETS_DIR, ".h5ad", "Primary Dataset")
app_context.dataset_path = dataset

if reference_dataset is None:
if Prompt.ask("Do you want to add a reference dataset?", choices=["y", "n"], default="n").lower() == 'y':
reference_dataset = _prompt_for_file(console, get_datasets_dir(), PACKAGE_DATASETS_DIR, ".h5ad", "Reference Dataset")
app_context.reference_dataset_path = reference_dataset

if sandbox is None:
sandbox = Prompt.ask("Choose a sandbox backend", choices=["docker", "singularity"], default="docker")

Expand All @@ -148,12 +157,10 @@ def main_run_callback(
if sandbox == "docker":
manager_class, handle, copy_cmd, exec_endpoint, status_endpoint = init_docker(script_dir, subprocess, console, force_refresh=force_refresh)
elif sandbox == "singularity":
# This now correctly maps to the 'singularity-exec' implementation
manager_class, handle, copy_cmd, exec_endpoint, status_endpoint = init_singularity_exec(script_dir, SANDBOX_DATA_PATH, subprocess, console, force_refresh=force_refresh)
else:
raise typer.BadParameter(f"Unknown sandbox type '{sandbox}'. Supported: 'docker', 'singularity'.")
app_context.sandbox_manager = manager_class()
# This check now correctly identifies the exec-style singularity backend
app_context.sandbox_details = {"handle": handle, "copy_cmd": copy_cmd, "is_exec_mode": sandbox == "singularity"}

if llm_backend is None:
Expand All @@ -172,8 +179,14 @@ def main_run_callback(
raise typer.BadParameter(f"Unknown LLM backend '{llm_backend}'.")

app_context.resources = collect_resources(console, resources_dir) if resources_dir else []

app_context.analysis_context = textwrap.dedent(f"Dataset path: **{SANDBOX_DATA_PATH}**\n...")
if app_context.reference_dataset_path:
app_context.resources.append((app_context.reference_dataset_path, SANDBOX_REF_DATA_PATH))

# Build the analysis context string, including the reference dataset if it exists
analysis_context_str = f"Primary dataset path: **{SANDBOX_DATA_PATH}**\n"
if app_context.reference_dataset_path:
analysis_context_str += f"Reference dataset path: **{SANDBOX_REF_DATA_PATH}**\n"
app_context.analysis_context = textwrap.dedent(analysis_context_str)

driver = app_context.agent_system.get_agent(driver_agent)
system_prompt = (app_context.roster_instructions + "\n\n" + driver.get_full_prompt(app_context.agent_system.global_policy) + "\n\n" + app_context.analysis_context)
Expand All @@ -189,15 +202,18 @@ def _setup_and_run_session(context: AppContext, history: list, is_auto: bool, ma
details = context.sandbox_details
dataset_path = cast(Path, context.dataset_path)
if details["is_exec_mode"] and hasattr(sandbox_manager, "set_data"):
sandbox_manager.set_data(dataset_path, context.resources)

# Pass all resources, including the reference dataset, for bind mounting
all_resources = [(dataset_path, SANDBOX_DATA_PATH)] + context.resources
sandbox_manager.set_data(all_resources)
if not sandbox_manager.start_container():
console.print("[bold red]Failed to start sandbox container.[/bold red]")
raise typer.Exit(1)

try:
if not details["is_exec_mode"]:
# Copy primary dataset
details["copy_cmd"](str(dataset_path), f"{details['handle']}:{SANDBOX_DATA_PATH}")
# Copy all other resources, including the reference dataset
for hp, cp in context.resources:
details["copy_cmd"](str(hp), f"{details['handle']}:{cp}")

Expand Down
Loading