Skip to content

Commit 287f87b

Browse files
author
Andrei Bratu
committed
fix eval utility regressions
1 parent 2cb64f7 commit 287f87b

File tree

20 files changed

+3131
-738
lines changed

20 files changed

+3131
-738
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ poetry.toml
55
.ruff_cache/
66
.vscode
77
.env
8+
tests/assets/*.jsonl
9+
tests/assets/*.parquet

poetry.lock

Lines changed: 1617 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ packages = [
2727
{ include = "humanloop", from = "src"}
2828
]
2929

30+
[tool.poetry.group.dev.dependencies]
31+
python-dotenv = "^1.0.1"
32+
33+
chromadb = "^0.6.3"
34+
pandas = "^2.2.3"
35+
pyarrow = "^19.0.0"
3036
[project.urls]
3137
Repository = 'https://github.com/humanloop/humanloop-python'
3238

src/humanloop/client.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from contextvars import ContextVar
21
import os
32
import typing
43
from typing import List, Optional, Sequence
@@ -11,7 +10,6 @@
1110

1211
from humanloop.core.client_wrapper import SyncClientWrapper
1312
from humanloop.utilities.types import DecoratorPromptKernelRequestParams
14-
from humanloop.eval_utils.context import EVALUATION_CONTEXT_VARIABLE_NAME, EvaluationContext
1513

1614
from humanloop.eval_utils import log_with_evaluation_context, run_eval
1715
from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
@@ -38,10 +36,8 @@ def __init__(
3836
self,
3937
*,
4038
client_wrapper: SyncClientWrapper,
41-
evaluation_context_variable: ContextVar[Optional[EvaluationContext]],
4239
):
4340
super().__init__(client_wrapper=client_wrapper)
44-
self._evaluation_context_variable = evaluation_context_variable
4541

4642
def run(
4743
self,
@@ -70,7 +66,6 @@ def run(
7066
dataset=dataset,
7167
evaluators=evaluators,
7268
workers=workers,
73-
evaluation_context_variable=self._evaluation_context_variable,
7469
)
7570

7671

@@ -118,31 +113,14 @@ def __init__(
118113
httpx_client=httpx_client,
119114
)
120115

121-
self.evaluation_context_variable: ContextVar[Optional[EvaluationContext]] = ContextVar(
122-
EVALUATION_CONTEXT_VARIABLE_NAME
123-
)
124-
125-
eval_client = ExtendedEvalsClient(
126-
client_wrapper=self._client_wrapper,
127-
evaluation_context_variable=self.evaluation_context_variable,
128-
)
116+
eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
129117
eval_client.client = self
130118
self.evaluations = eval_client
131119
self.prompts = ExtendedPromptsClient(client_wrapper=self._client_wrapper)
132120

133121
# Overload the .log method of the clients to be aware of Evaluation Context
134-
# TODO: Overload the log for Evaluators and Tools once run_id is added
135-
# to them.
136-
self.prompts = log_with_evaluation_context(
137-
client=self.prompts,
138-
evaluation_context_variable=self.evaluation_context_variable,
139-
)
140-
# self.evaluators = log_with_evaluation_context(client=self.evaluators)
141-
# self.tools = log_with_evaluation_context(client=self.tools)
142-
self.flows = log_with_evaluation_context(
143-
client=self.flows,
144-
evaluation_context_variable=self.evaluation_context_variable,
145-
)
122+
self.prompts = log_with_evaluation_context(client=self.prompts)
123+
self.flows = log_with_evaluation_context(client=self.flows)
146124

147125
if opentelemetry_tracer_provider is not None:
148126
self._tracer_provider = opentelemetry_tracer_provider
@@ -157,9 +135,7 @@ def __init__(
157135
instrument_provider(provider=self._tracer_provider)
158136
self._tracer_provider.add_span_processor(
159137
HumanloopSpanProcessor(
160-
exporter=HumanloopSpanExporter(
161-
client=self,
162-
)
138+
exporter=HumanloopSpanExporter(client=self),
163139
),
164140
)
165141

src/humanloop/eval_utils/context.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
from typing import Callable, TypedDict
1+
from contextvars import ContextVar
2+
from dataclasses import dataclass
3+
from typing import Any, Callable
24

35

4-
class EvaluationContext(TypedDict):
6+
@dataclass
7+
class EvaluationContext:
58
"""Context Log to Humanloop.
69
710
Per datapoint state that is set when an Evaluation is ran.
@@ -24,3 +27,43 @@ class EvaluationContext(TypedDict):
2427

2528

2629
EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
30+
31+
_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar(EVALUATION_CONTEXT_VARIABLE_NAME)
32+
33+
_UnsafeEvaluationContextRead = RuntimeError("EvaluationContext not set in the current thread.")
34+
35+
36+
def set_evaluation_context(context: EvaluationContext):
37+
_EVALUATION_CONTEXT_VAR.set(context)
38+
39+
40+
def get_evaluation_context() -> EvaluationContext:
41+
try:
42+
return _EVALUATION_CONTEXT_VAR.get()
43+
except LookupError:
44+
raise _UnsafeEvaluationContextRead
45+
46+
47+
def evaluation_context_set() -> bool:
48+
try:
49+
_EVALUATION_CONTEXT_VAR.get()
50+
return True
51+
except LookupError:
52+
return False
53+
54+
55+
def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
56+
try:
57+
evaluation_context: EvaluationContext = _EVALUATION_CONTEXT_VAR.get()
58+
return evaluation_context.file_id == log_args.get("id") or evaluation_context.path == log_args.get("path")
59+
except LookupError:
60+
# Not in an evaluation context
61+
return False
62+
63+
64+
def is_evaluated_file(file_path) -> bool:
65+
try:
66+
evaluation_context = _EVALUATION_CONTEXT_VAR.get()
67+
return evaluation_context.path == file_path
68+
except LookupError:
69+
raise _UnsafeEvaluationContextRead

0 commit comments

Comments
 (0)