Skip to content

Commit 3affc28

Browse files
Fix runs stats logging (#29)
* use run_stats[1] to account for new run stats sorting (with latest first); formatting * use run-level status
1 parent 7dd0d92 commit 3affc28

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/humanloop/eval_utils.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,16 +233,16 @@ def _run_eval(
233233
raise NotImplementedError(f"Unsupported File type: {type_}")
234234

235235
# Upsert the Dataset
236-
action = dataset.get("action", "set") # set is the server default - None not allowed.
236+
action = dataset.get(
237+
"action", "set"
238+
) # set is the server default - None not allowed.
237239
if "datapoints" not in dataset:
238240
dataset["datapoints"] = []
239241
# Use `upsert` to get existing dataset ID if no datapoints provided, given we can't `get` on path.
240242
action = "add"
241243
hl_dataset = client.datasets.upsert(**dataset, action=action)
242244
hl_dataset = client.datasets.get(
243-
id=hl_dataset.id,
244-
version_id=hl_dataset.version_id,
245-
include_datapoints=True
245+
id=hl_dataset.id, version_id=hl_dataset.version_id, include_datapoints=True
246246
)
247247

248248
# Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
@@ -422,7 +422,11 @@ def process_datapoint(datapoint: Datapoint):
422422
while not complete:
423423
stats = client.evaluations.get_stats(id=evaluation.id)
424424
logger.info(f"\r{stats.progress}")
425-
complete = stats.status == "completed"
425+
run_stats = next(
426+
(run_stats for run_stats in stats.run_stats if run_stats.run_id == run_id),
427+
None,
428+
)
429+
complete = run_stats is not None and run_stats.status == "completed"
426430
if not complete:
427431
time.sleep(5)
428432

@@ -615,7 +619,8 @@ def check_evaluation_improvement(
615619
return True, 0, 0
616620

617621
previous_evaluator_stats_by_path = get_evaluator_stats_by_path(
618-
stat=stats.run_stats[-2], evaluation=evaluation
622+
stat=stats.run_stats[1], # Latest Run is at index 0; previous Run is at index 1
623+
evaluation=evaluation,
619624
)
620625
if (
621626
evaluator_path in latest_evaluator_stats_by_path
@@ -625,6 +630,8 @@ def check_evaluation_improvement(
625630
previous_evaluator_stat = previous_evaluator_stats_by_path[evaluator_path]
626631
latest_score = get_score_from_evaluator_stat(stat=latest_evaluator_stat)
627632
previous_score = get_score_from_evaluator_stat(stat=previous_evaluator_stat)
633+
if latest_score is None or previous_score is None:
634+
raise ValueError(f"Could not find score for Evaluator {evaluator_path}.")
628635
diff = round(latest_score - previous_score, 2)
629636
if diff >= 0:
630637
logger.info(

0 commit comments

Comments
 (0)