@@ -233,16 +233,16 @@ def _run_eval(
233233 raise NotImplementedError (f"Unsupported File type: { type_ } " )
234234
235235 # Upsert the Dataset
236- action = dataset .get ("action" , "set" ) # set is the server default - None not allowed.
236+ action = dataset .get (
237+ "action" , "set"
238+ ) # set is the server default - None not allowed.
237239 if "datapoints" not in dataset :
238240 dataset ["datapoints" ] = []
239241 # Use `upsert` to get existing dataset ID if no datapoints provided, given we can't `get` on path.
240242 action = "add"
241243 hl_dataset = client .datasets .upsert (** dataset , action = action )
242244 hl_dataset = client .datasets .get (
243- id = hl_dataset .id ,
244- version_id = hl_dataset .version_id ,
245- include_datapoints = True
245+ id = hl_dataset .id , version_id = hl_dataset .version_id , include_datapoints = True
246246 )
247247
248248 # Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
@@ -422,7 +422,11 @@ def process_datapoint(datapoint: Datapoint):
422422 while not complete :
423423 stats = client .evaluations .get_stats (id = evaluation .id )
424424 logger .info (f"\r { stats .progress } " )
425- complete = stats .status == "completed"
425+ run_stats = next (
426+ (run_stats for run_stats in stats .run_stats if run_stats .run_id == run_id ),
427+ None ,
428+ )
429+ complete = run_stats is not None and run_stats .status == "completed"
426430 if not complete :
427431 time .sleep (5 )
428432
@@ -615,7 +619,8 @@ def check_evaluation_improvement(
615619 return True , 0 , 0
616620
617621 previous_evaluator_stats_by_path = get_evaluator_stats_by_path (
618- stat = stats .run_stats [- 2 ], evaluation = evaluation
622+ stat = stats .run_stats [1 ], # Latest Run is at index 0; previous Run is at index 1
623+ evaluation = evaluation ,
619624 )
620625 if (
621626 evaluator_path in latest_evaluator_stats_by_path
@@ -625,6 +630,8 @@ def check_evaluation_improvement(
625630 previous_evaluator_stat = previous_evaluator_stats_by_path [evaluator_path ]
626631 latest_score = get_score_from_evaluator_stat (stat = latest_evaluator_stat )
627632 previous_score = get_score_from_evaluator_stat (stat = previous_evaluator_stat )
633+ if latest_score is None or previous_score is None :
634+ raise ValueError (f"Could not find score for Evaluator { evaluator_path } ." )
628635 diff = round (latest_score - previous_score , 2 )
629636 if diff >= 0 :
630637 logger .info (
0 commit comments