humanloop
diff --git a/‎src/humanloop/evals/run.py‎
Lines changed: 3 additions & 3 deletions b/‎src/humanloop/evals/run.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/integration/conftest.py‎
Lines changed: 137 additions & 1 deletion b/‎tests/integration/conftest.py‎
Lines changed: 137 additions & 1 deletion
diff --git a/‎tests/integration/test_decorators.py‎ b/‎tests/integration/test_decorators.py‎
@@ -454,13 +454,13 @@ def _resolve_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple
             "You are trying to create a local Evaluation while requesting a specific File version by version ID or environment."
         )
 
-    if callable:
+    if version:
         # User responsibility to provide adequate file.version for upserting the file
         print_info(
             "Upserting a new File version based on `file.version`. Will use provided callable for generating Logs."
         )
         try:
-            return (_upsert_file(file_config=file_config, client=client), callable)
+            return (_upsert_file(file_config=file_config, client=client), callable or None)
         except Exception as e:
             raise HumanloopRuntimeError(f"Error upserting the File. Please ensure `file.version` is valid: {e}") from e
 
@@ -652,7 +652,7 @@ def _get_file_callable(file_config: FileEvalConfig) -> Optional[Callable]:
     function_ = typing.cast(Optional[Callable], file_config.pop("callable", None))
     if function_ is None:
         if type_ == "flow":
-            raise ValueError("You must provide a `callable` for your Flow `file` to run a local eval.")
+            raise HumanloopRuntimeError("You must provide a `callable` for your Flow `file` to run a local eval.")
         else:
             print_info(
                 f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop.\n\n"
 
@@ -1,8 +1,19 @@
 from contextlib import contextmanager, redirect_stdout
-from typing import ContextManager
+from dataclasses import dataclass
+import os
+from typing import Any, ContextManager, Generator
 import io
 from typing import TextIO
+import uuid
 import pytest
+import dotenv
+from humanloop.client import Humanloop
+
+
+@dataclass
+class TestIdentifiers:
+    file_id: str
+    file_path: str
 
 
 @pytest.fixture()
@@ -14,3 +25,128 @@ def _context_manager():
             yield f
 
     return _context_manager  # type: ignore [return-value]
+
+
+@pytest.fixture(scope="session")
+def openai_key() -> str:
+    dotenv.load_dotenv()
+    if not os.getenv("OPENAI_API_KEY"):
+        pytest.fail("OPENAI_API_KEY is not set for integration tests")
+    return os.getenv("OPENAI_API_KEY")  # type: ignore [return-value]
+
+
+@pytest.fixture(scope="session")
+def humanloop_test_client() -> Humanloop:
+    dotenv.load_dotenv()
+    if not os.getenv("HUMANLOOP_API_KEY"):
+        pytest.fail("HUMANLOOP_API_KEY is not set for integration tests")
+    return Humanloop(api_key=os.getenv("HUMANLOOP_API_KEY"))  # type: ignore [return-value]
+
+
+@pytest.fixture(scope="function")
+def sdk_test_dir(humanloop_test_client: Humanloop) -> Generator[str, None, None]:
+    path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}"
+    try:
+        response = humanloop_test_client.directories.create(path=path)
+        yield response.path
+        humanloop_test_client.directories.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create directory {path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def test_prompt_config() -> dict[str, Any]:
+    return {
+        "provider": "openai",
+        "model": "gpt-4o-mini",
+        "temperature": 0.5,
+        "template": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.",
+            },
+            {
+                "role": "user",
+                "content": "{{question}}",
+            },
+        ],
+    }
+
+
+@pytest.fixture(scope="function")
+def eval_dataset(humanloop_test_client: Humanloop, sdk_test_dir: str) -> Generator[TestIdentifiers, None, None]:
+    dataset_path = f"{sdk_test_dir}/eval_dataset"
+    try:
+        response = humanloop_test_client.datasets.upsert(
+            path=dataset_path,
+            datapoints=[
+                {
+                    "inputs": {
+                        "question": "What is the capital of the France?",
+                    },
+                },
+                {
+                    "inputs": {
+                        "question": "What is the capital of the Germany?",
+                    },
+                },
+                {
+                    "inputs": {
+                        "question": "What is 2+2?",
+                    },
+                },
+            ],
+        )
+        yield TestIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_test_client.datasets.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create dataset {dataset_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def eval_prompt(
+    humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
+) -> Generator[TestIdentifiers, None, None]:
+    prompt_path = f"{sdk_test_dir}/eval_prompt"
+    try:
+        response = humanloop_test_client.prompts.upsert(
+            path=prompt_path,
+            **test_prompt_config,
+        )
+        yield TestIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_test_client.prompts.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def output_not_null_evaluator(
+    humanloop_test_client: Humanloop, sdk_test_dir: str
+) -> Generator[TestIdentifiers, None, None]:
+    evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator"
+    try:
+        response = humanloop_test_client.evaluators.upsert(
+            path=evaluator_path,
+            spec={
+                "arguments_type": "target_required",
+                "return_type": "boolean",
+                "code": """
+def output_not_null(log: dict) -> bool:
+    return log["output"] is not None
+                """,
+                "evaluator_type": "python",
+            },
+        )
+        yield TestIdentifiers(file_id=response.id, file_path=response.path)
+        humanloop_test_client.evaluators.delete(id=response.id)
+    except Exception as e:
+        pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}")
+
+
+@pytest.fixture(scope="function")
+def id_for_staging_environment(humanloop_test_client: Humanloop, eval_prompt: TestIdentifiers) -> str:
+    response = humanloop_test_client.prompts.list_environments(id=eval_prompt.file_id)
+    for environment in response:
+        if environment.name == "staging":
+            return environment.id
+    pytest.fail("Staging environment not found")