humanloop
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎poetry.lock‎
Lines changed: 1617 additions & 2 deletions b/‎poetry.lock‎
Lines changed: 1617 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/humanloop/client.py‎
Lines changed: 4 additions & 28 deletions b/‎src/humanloop/client.py‎
Lines changed: 4 additions & 28 deletions
diff --git a/‎src/humanloop/eval_utils/context.py‎
Lines changed: 45 additions & 2 deletions b/‎src/humanloop/eval_utils/context.py‎
Lines changed: 45 additions & 2 deletions
@@ -5,3 +5,5 @@ poetry.toml
 .ruff_cache/
 .vscode
 .env
+tests/assets/*.jsonl
+tests/assets/*.parquet
@@ -27,6 +27,12 @@ packages = [
     { include = "humanloop", from = "src"}
 ]
 
+[tool.poetry.group.dev.dependencies]
+python-dotenv = "^1.0.1"
+
+chromadb = "^0.6.3"
+pandas = "^2.2.3"
+pyarrow = "^19.0.0"
 [project.urls]
 Repository = 'https://github.com/humanloop/humanloop-python'
 
 
@@ -1,4 +1,3 @@
-from contextvars import ContextVar
 import os
 import typing
 from typing import List, Optional, Sequence
@@ -11,7 +10,6 @@
 
 from humanloop.core.client_wrapper import SyncClientWrapper
 from humanloop.utilities.types import DecoratorPromptKernelRequestParams
-from humanloop.eval_utils.context import EVALUATION_CONTEXT_VARIABLE_NAME, EvaluationContext
 
 from humanloop.eval_utils import log_with_evaluation_context, run_eval
 from humanloop.eval_utils.types import Dataset, Evaluator, EvaluatorCheck, File
@@ -38,10 +36,8 @@ def __init__(
         self,
         *,
         client_wrapper: SyncClientWrapper,
-        evaluation_context_variable: ContextVar[Optional[EvaluationContext]],
     ):
         super().__init__(client_wrapper=client_wrapper)
-        self._evaluation_context_variable = evaluation_context_variable
 
     def run(
         self,
@@ -70,7 +66,6 @@ def run(
             dataset=dataset,
             evaluators=evaluators,
             workers=workers,
-            evaluation_context_variable=self._evaluation_context_variable,
         )
 
 
@@ -118,31 +113,14 @@ def __init__(
             httpx_client=httpx_client,
         )
 
-        self.evaluation_context_variable: ContextVar[Optional[EvaluationContext]] = ContextVar(
-            EVALUATION_CONTEXT_VARIABLE_NAME
-        )
-
-        eval_client = ExtendedEvalsClient(
-            client_wrapper=self._client_wrapper,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
+        eval_client = ExtendedEvalsClient(client_wrapper=self._client_wrapper)
         eval_client.client = self
         self.evaluations = eval_client
         self.prompts = ExtendedPromptsClient(client_wrapper=self._client_wrapper)
 
         # Overload the .log method of the clients to be aware of Evaluation Context
-        # TODO: Overload the log for Evaluators and Tools once run_id is added
-        # to them.
-        self.prompts = log_with_evaluation_context(
-            client=self.prompts,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
-        # self.evaluators = log_with_evaluation_context(client=self.evaluators)
-        # self.tools = log_with_evaluation_context(client=self.tools)
-        self.flows = log_with_evaluation_context(
-            client=self.flows,
-            evaluation_context_variable=self.evaluation_context_variable,
-        )
+        self.prompts = log_with_evaluation_context(client=self.prompts)
+        self.flows = log_with_evaluation_context(client=self.flows)
 
         if opentelemetry_tracer_provider is not None:
             self._tracer_provider = opentelemetry_tracer_provider
@@ -157,9 +135,7 @@ def __init__(
         instrument_provider(provider=self._tracer_provider)
         self._tracer_provider.add_span_processor(
             HumanloopSpanProcessor(
-                exporter=HumanloopSpanExporter(
-                    client=self,
-                )
+                exporter=HumanloopSpanExporter(client=self),
             ),
         )
 
 
@@ -1,7 +1,10 @@
-from typing import Callable, TypedDict
+from contextvars import ContextVar
+from dataclasses import dataclass
+from typing import Any, Callable
 
 
-class EvaluationContext(TypedDict):
+@dataclass
+class EvaluationContext:
     """Context Log to Humanloop.
 
     Per datapoint state that is set when an Evaluation is ran.
@@ -24,3 +27,43 @@ class EvaluationContext(TypedDict):
 
 
 EVALUATION_CONTEXT_VARIABLE_NAME = "__EVALUATION_CONTEXT"
+
+_EVALUATION_CONTEXT_VAR: ContextVar[EvaluationContext] = ContextVar(EVALUATION_CONTEXT_VARIABLE_NAME)
+
+_UnsafeEvaluationContextRead = RuntimeError("EvaluationContext not set in the current thread.")
+
+
+def set_evaluation_context(context: EvaluationContext):
+    _EVALUATION_CONTEXT_VAR.set(context)
+
+
+def get_evaluation_context() -> EvaluationContext:
+    try:
+        return _EVALUATION_CONTEXT_VAR.get()
+    except LookupError:
+        raise _UnsafeEvaluationContextRead
+
+
+def evaluation_context_set() -> bool:
+    try:
+        _EVALUATION_CONTEXT_VAR.get()
+        return True
+    except LookupError:
+        return False
+
+
+def log_belongs_to_evaluated_file(log_args: dict[str, Any]) -> bool:
+    try:
+        evaluation_context: EvaluationContext = _EVALUATION_CONTEXT_VAR.get()
+        return evaluation_context.file_id == log_args.get("id") or evaluation_context.path == log_args.get("path")
+    except LookupError:
+        # Not in an evaluation context
+        return False
+
+
+def is_evaluated_file(file_path) -> bool:
+    try:
+        evaluation_context = _EVALUATION_CONTEXT_VAR.get()
+        return evaluation_context.path == file_path
+    except LookupError:
+        raise _UnsafeEvaluationContextRead