Skip to content

Commit 907144e

Browse files
author
Andrei Bratu
committed
add tests for new eval run functionality
1 parent b98a342 commit 907144e

File tree

4 files changed

+485
-4
lines changed

4 files changed

+485
-4
lines changed

src/humanloop/evals/run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -454,13 +454,13 @@ def _resolve_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple
454454
"You are trying to create a local Evaluation while requesting a specific File version by version ID or environment."
455455
)
456456

457-
if callable:
457+
if version:
458458
# User responsibility to provide adequate file.version for upserting the file
459459
print_info(
460460
"Upserting a new File version based on `file.version`. Will use provided callable for generating Logs."
461461
)
462462
try:
463-
return (_upsert_file(file_config=file_config, client=client), callable)
463+
return (_upsert_file(file_config=file_config, client=client), callable or None)
464464
except Exception as e:
465465
raise HumanloopRuntimeError(f"Error upserting the File. Please ensure `file.version` is valid: {e}") from e
466466

@@ -652,7 +652,7 @@ def _get_file_callable(file_config: FileEvalConfig) -> Optional[Callable]:
652652
function_ = typing.cast(Optional[Callable], file_config.pop("callable", None))
653653
if function_ is None:
654654
if type_ == "flow":
655-
raise ValueError("You must provide a `callable` for your Flow `file` to run a local eval.")
655+
raise HumanloopRuntimeError("You must provide a `callable` for your Flow `file` to run a local eval.")
656656
else:
657657
print_info(
658658
f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop.\n\n"

tests/integration/conftest.py

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
from contextlib import contextmanager, redirect_stdout
2-
from typing import ContextManager
2+
from dataclasses import dataclass
3+
import os
4+
from typing import Any, ContextManager, Generator
35
import io
46
from typing import TextIO
7+
import uuid
58
import pytest
9+
import dotenv
10+
from humanloop.client import Humanloop
11+
12+
13+
@dataclass
14+
class TestIdentifiers:
15+
file_id: str
16+
file_path: str
617

718

819
@pytest.fixture()
@@ -14,3 +25,128 @@ def _context_manager():
1425
yield f
1526

1627
return _context_manager # type: ignore [return-value]
28+
29+
30+
@pytest.fixture(scope="session")
31+
def openai_key() -> str:
32+
dotenv.load_dotenv()
33+
if not os.getenv("OPENAI_API_KEY"):
34+
pytest.fail("OPENAI_API_KEY is not set for integration tests")
35+
return os.getenv("OPENAI_API_KEY") # type: ignore [return-value]
36+
37+
38+
@pytest.fixture(scope="session")
39+
def humanloop_test_client() -> Humanloop:
40+
dotenv.load_dotenv()
41+
if not os.getenv("HUMANLOOP_API_KEY"):
42+
pytest.fail("HUMANLOOP_API_KEY is not set for integration tests")
43+
return Humanloop(api_key=os.getenv("HUMANLOOP_API_KEY")) # type: ignore [return-value]
44+
45+
46+
@pytest.fixture(scope="function")
47+
def sdk_test_dir(humanloop_test_client: Humanloop) -> Generator[str, None, None]:
48+
path = f"SDK_INTEGRATION_TEST_{uuid.uuid4()}"
49+
try:
50+
response = humanloop_test_client.directories.create(path=path)
51+
yield response.path
52+
humanloop_test_client.directories.delete(id=response.id)
53+
except Exception as e:
54+
pytest.fail(f"Failed to create directory {path}: {e}")
55+
56+
57+
@pytest.fixture(scope="function")
58+
def test_prompt_config() -> dict[str, Any]:
59+
return {
60+
"provider": "openai",
61+
"model": "gpt-4o-mini",
62+
"temperature": 0.5,
63+
"template": [
64+
{
65+
"role": "system",
66+
"content": "You are a helpful assistant. You must answer the user's question truthfully and at the level of a 5th grader.",
67+
},
68+
{
69+
"role": "user",
70+
"content": "{{question}}",
71+
},
72+
],
73+
}
74+
75+
76+
@pytest.fixture(scope="function")
77+
def eval_dataset(humanloop_test_client: Humanloop, sdk_test_dir: str) -> Generator[TestIdentifiers, None, None]:
78+
dataset_path = f"{sdk_test_dir}/eval_dataset"
79+
try:
80+
response = humanloop_test_client.datasets.upsert(
81+
path=dataset_path,
82+
datapoints=[
83+
{
84+
"inputs": {
85+
"question": "What is the capital of the France?",
86+
},
87+
},
88+
{
89+
"inputs": {
90+
"question": "What is the capital of the Germany?",
91+
},
92+
},
93+
{
94+
"inputs": {
95+
"question": "What is 2+2?",
96+
},
97+
},
98+
],
99+
)
100+
yield TestIdentifiers(file_id=response.id, file_path=response.path)
101+
humanloop_test_client.datasets.delete(id=response.id)
102+
except Exception as e:
103+
pytest.fail(f"Failed to create dataset {dataset_path}: {e}")
104+
105+
106+
@pytest.fixture(scope="function")
107+
def eval_prompt(
108+
humanloop_test_client: Humanloop, sdk_test_dir: str, openai_key: str, test_prompt_config: dict[str, Any]
109+
) -> Generator[TestIdentifiers, None, None]:
110+
prompt_path = f"{sdk_test_dir}/eval_prompt"
111+
try:
112+
response = humanloop_test_client.prompts.upsert(
113+
path=prompt_path,
114+
**test_prompt_config,
115+
)
116+
yield TestIdentifiers(file_id=response.id, file_path=response.path)
117+
humanloop_test_client.prompts.delete(id=response.id)
118+
except Exception as e:
119+
pytest.fail(f"Failed to create prompt {prompt_path}: {e}")
120+
121+
122+
@pytest.fixture(scope="function")
123+
def output_not_null_evaluator(
124+
humanloop_test_client: Humanloop, sdk_test_dir: str
125+
) -> Generator[TestIdentifiers, None, None]:
126+
evaluator_path = f"{sdk_test_dir}/output_not_null_evaluator"
127+
try:
128+
response = humanloop_test_client.evaluators.upsert(
129+
path=evaluator_path,
130+
spec={
131+
"arguments_type": "target_required",
132+
"return_type": "boolean",
133+
"code": """
134+
def output_not_null(log: dict) -> bool:
135+
return log["output"] is not None
136+
""",
137+
"evaluator_type": "python",
138+
},
139+
)
140+
yield TestIdentifiers(file_id=response.id, file_path=response.path)
141+
humanloop_test_client.evaluators.delete(id=response.id)
142+
except Exception as e:
143+
pytest.fail(f"Failed to create evaluator {evaluator_path}: {e}")
144+
145+
146+
@pytest.fixture(scope="function")
147+
def id_for_staging_environment(humanloop_test_client: Humanloop, eval_prompt: TestIdentifiers) -> str:
148+
response = humanloop_test_client.prompts.list_environments(id=eval_prompt.file_id)
149+
for environment in response:
150+
if environment.name == "staging":
151+
return environment.id
152+
pytest.fail("Staging environment not found")

tests/integration/test_decorators.py

Whitespace-only changes.

0 commit comments

Comments
 (0)