You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Use `upsert` to get existing dataset ID if no datapoints provided, given we can't `get` on path.
325
+
dataset["action"] ="add"
326
+
hl_dataset=client.datasets.upsert(
327
+
**dataset,
328
+
)
329
+
hl_dataset=client.datasets.get(
330
+
id=hl_dataset.id,
331
+
version_id=hl_dataset.version_id,
332
+
include_datapoints=True,
333
+
)
321
334
322
335
# Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
323
336
local_evaluators: List[Evaluator] = []
@@ -329,7 +342,9 @@ def run_eval(
329
342
# TODO: support the case where `file` logs generated on Humanloop but Evaluator logs generated locally
330
343
iffunction_isNone:
331
344
raiseValueError(
332
-
f"Local Evaluators are only supported when generating Logs locally using your {type_}'s `callable`. Please provide a `callable` for your file in order to run Evaluators locally."
345
+
"Local Evaluators are only supported when generating Logs locally using your "
346
+
f"{type_}'s `callable`. Please provide a `callable` for your file in order "
Copy file name to clipboardExpand all lines: src/humanloop/eval_utils/types.py
+6-30Lines changed: 6 additions & 30 deletions
Original file line number
Diff line number
Diff line change
@@ -46,23 +46,13 @@ class File(Identifiers):
46
46
"""The function being evaluated.
47
47
It will be called using your Dataset `inputs` as follows: `output = callable(**datapoint.inputs)`.
48
48
If `messages` are defined in your Dataset, then `output = callable(**datapoint.inputs, messages=datapoint.messages)`.
49
-
It should return a single string output. If not, you must provide a `custom_logger`.
49
+
It should return a string or json serializable output.
50
50
"""
51
-
custom_logger: NotRequired[Callable]
52
-
"""function that logs the output of your function to Humanloop, replacing the default logging.
53
-
If provided, it will be called as follows:
54
-
```
55
-
output = callable(**datapoint.inputs).
56
-
log = custom_logger(client, output)
57
-
```
58
-
Inside the custom_logger, you can use the Humanloop `client` to log the output of your function.
59
-
If not provided your pipeline must return a single string.
60
-
"""
61
-
is_decorated: NotRequired[Literal[True]]
51
+
is_decorated: NotRequired[bool]
62
52
63
53
64
54
classDataset(Identifiers):
65
-
datapoints: Sequence[DatapointDict]
55
+
datapoints: NotRequired[Sequence[DatapointDict]]
66
56
"""The datapoints to map your function over to produce the outputs required by the evaluation."""
67
57
action: NotRequired[UpdateDatasetAction]
68
58
"""How to update the Dataset given the provided Datapoints;
@@ -72,26 +62,12 @@ class Dataset(Identifiers):
72
62
classEvaluator(Identifiers):
73
63
"""The Evaluator to provide judgments for this Evaluation."""
74
64
75
-
custom_logger: NotRequired[Callable]
76
-
77
-
"""The type of arguments the Evaluator expects - only required for local Evaluators."""
78
65
args_type: NotRequired[EvaluatorArgumentsType]
79
-
80
-
"""The type of return value the Evaluator produces - only required for local Evaluators."""
66
+
"""The type of arguments the Evaluator expects - only required for local Evaluators."""
81
67
return_type: NotRequired[EvaluatorReturnTypeEnum]
82
-
83
-
"""The function to run on the logs to produce the judgment - only required for local Evaluators."""
68
+
"""The type of return value the Evaluator produces - only required for local Evaluators."""
84
69
callable: NotRequired[Callable]
85
-
86
-
"""optional function that logs the output judgment from your Evaluator to Humanloop, if provided, it will be called as follows:
87
-
custom_logger: NotRequired[Callable]
88
-
```
89
-
judgment = callable(log_dict)
90
-
log = custom_logger(client, judgment)
91
-
```
92
-
Inside the custom_logger, you can use the Humanloop `client` to log the judgment to Humanloop.
93
-
If not provided your function must return a single string and by default the code will be used to inform the version of the external Evaluator on Humanloop.
94
-
"""
70
+
"""The function to run on the logs to produce the judgment - only required for local Evaluators."""
95
71
threshold: NotRequired[float]
96
72
"""The threshold to check the Evaluator against. If the aggregate value of the Evaluator is below this threshold, the check will fail."""
0 commit comments