@@ -56,7 +56,7 @@ client.prompts.log(
5656 messages = [{" role" : " user" , " content" : " What really happened at Roswell?" }],
5757 inputs = {" person" : " Trump" },
5858 created_at = datetime.datetime.fromisoformat(
59- " 2024-07-18 23 :29:35.178000+00:00" ,
59+ " 2024-07-19 00 :29:35.178000+00:00" ,
6060 ),
6161 provider_latency = 6.5931549072265625 ,
6262 output_message = {
@@ -5164,6 +5164,14 @@ client.evaluators.log(
51645164<dl >
51655165<dd >
51665166
5167+ ** marked_completed:** ` typing.Optional[bool] ` — Whether the Log has been manually marked as completed by a user.
5168+
5169+ </dd >
5170+ </dl >
5171+
5172+ <dl >
5173+ <dd >
5174+
51675175** spec:** ` typing.Optional[CreateEvaluatorLogRequestSpecParams] `
51685176
51695177</dd >
@@ -5362,7 +5370,7 @@ client.evaluators.upsert(
53625370<dl >
53635371<dd >
53645372
5365- ** spec:** ` SrcExternalAppModelsV5EvaluatorsEvaluatorRequestSpecParams `
5373+ ** spec:** ` EvaluatorRequestSpecParams `
53665374
53675375</dd >
53685376</dl >
@@ -6225,10 +6233,10 @@ client.flows.log(
62256233 output = " The patient is likely experiencing a myocardial infarction. Immediate medical attention is required." ,
62266234 trace_status = " incomplete" ,
62276235 start_time = datetime.datetime.fromisoformat(
6228- " 2024-07-08 21 :40:35+00:00" ,
6236+ " 2024-07-08 22 :40:35+00:00" ,
62296237 ),
62306238 end_time = datetime.datetime.fromisoformat(
6231- " 2024-07-08 21 :40:39+00:00" ,
6239+ " 2024-07-08 22 :40:39+00:00" ,
62326240 ),
62336241)
62346242
@@ -8075,9 +8083,7 @@ client.files.list()
80758083<dl >
80768084<dd >
80778085
8078- List all Evaluations for the specified ` file_id ` .
8079-
8080- Retrieve a list of Evaluations that evaluate versions of the specified File.
8086+ Retrieve a list of Evaluations for the specified File.
80818087</dd >
80828088</dl >
80838089</dd >
@@ -8171,9 +8177,8 @@ for page in response.iter_pages():
81718177
81728178Create an Evaluation.
81738179
8174- Create an Evaluation by specifying the File to evaluate, and a name
8180+ Create a new Evaluation by specifying the File to evaluate, and a name
81758181for the Evaluation.
8176-
81778182You can then add Runs to this Evaluation using the ` POST /evaluations/{id}/runs ` endpoint.
81788183</dd >
81798184</dl >
@@ -8195,7 +8200,7 @@ client = Humanloop(
81958200 api_key = " YOUR_API_KEY" ,
81968201)
81978202client.evaluations.create(
8198- evaluators = [{}],
8203+ evaluators = [{" version_id " : " version_id " }],
81998204)
82008205
82018206```
@@ -8212,7 +8217,7 @@ client.evaluations.create(
82128217<dl >
82138218<dd >
82148219
8215- ** evaluators:** ` typing.Sequence[EvaluationsRequestParams ] ` — The Evaluators used to evaluate.
8220+ ** evaluators:** ` typing.Sequence[CreateEvaluationRequestEvaluatorsItemParams ] ` — The Evaluators used to evaluate.
82168221
82178222</dd >
82188223</dl >
@@ -8262,8 +8267,7 @@ client.evaluations.create(
82628267
82638268Add Evaluators to an Evaluation.
82648269
8265- Add new Evaluators to an Evaluation. The Evaluators will be run on the Logs
8266- generated for the Evaluation.
8270+ The Evaluators will be run on the Logs generated for the Evaluation.
82678271</dd >
82688272</dl >
82698273</dd >
@@ -8285,7 +8289,7 @@ client = Humanloop(
82858289)
82868290client.evaluations.add_evaluators(
82878291 id = " id" ,
8288- evaluators = [{}],
8292+ evaluators = [{" version_id " : " version_id " }],
82898293)
82908294
82918295```
@@ -8310,7 +8314,7 @@ client.evaluations.add_evaluators(
83108314<dl >
83118315<dd >
83128316
8313- ** evaluators:** ` typing.Sequence[EvaluationsRequestParams ] ` — The Evaluators to add to this Evaluation.
8317+ ** evaluators:** ` typing.Sequence[AddEvaluatorsRequestEvaluatorsItemParams ] ` — The Evaluators to add to this Evaluation.
83148318
83158319</dd >
83168320</dl >
@@ -8344,8 +8348,7 @@ client.evaluations.add_evaluators(
83448348
83458349Remove an Evaluator from an Evaluation.
83468350
8347- Remove an Evaluator from an Evaluation. The Evaluator will no longer be run on the Logs
8348- generated for the Evaluation.
8351+ The Evaluator will no longer be run on the Logs in the Evaluation.
83498352</dd >
83508353</dl >
83518354</dd >
@@ -8425,6 +8428,12 @@ client.evaluations.remove_evaluator(
84258428<dd >
84268429
84278430Get an Evaluation.
8431+
8432+ This includes the Evaluators associated with the Evaluation and metadata about the Evaluation,
8433+ such as its name.
8434+
8435+ To get the Runs associated with the Evaluation, use the ` GET /evaluations/{id}/runs ` endpoint.
8436+ To retrieve stats for the Evaluation, use the ` GET /evaluations/{id}/stats ` endpoint.
84288437</dd >
84298438</dl >
84308439</dd >
@@ -8496,8 +8505,7 @@ client.evaluations.get(
84968505
84978506Delete an Evaluation.
84988507
8499- Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
8500- will not be deleted.
8508+ The Runs and Evaluators in the Evaluation will not be deleted.
85018509</dd >
85028510</dl >
85038511</dd >
@@ -8639,20 +8647,15 @@ client.evaluations.list_runs_for_evaluation(
86398647
86408648Create an Evaluation Run.
86418649
8642- Create a new Evaluation Run. Optionally specify the Dataset and version to be
8643- evaluated.
8650+ Optionally specify the Dataset and version to be evaluated.
86448651
86458652Humanloop will automatically start generating Logs and running Evaluators where
86468653` orchestrated=true ` . If you are generating Logs yourself, you can set ` orchestrated=false `
86478654and then generate and submit the required Logs via the API.
86488655
8649- The ` logs ` parameter controls which Logs are associated with the Run. Defaults to ` dynamic `
8650- if ` dataset ` and ` version ` are provided. This means that Logs will automatically be retrieved
8651- if they're associated with the specified Version and has ` source_datapoint_id ` referencing
8652- a datapoint in the specified Dataset.
8653- If ` logs ` is set to ` fixed ` , no existing Logs will be automatically associated with the Run.
8654- You can then add Logs to the Run using the ` POST /evaluations/{id}/runs/{run_id}/logs ` endpoint,
8655- or by adding ` run_id ` to your ` POST /prompts/logs ` requests.
8656+ If ` dataset ` and ` version ` are provided, you can set ` use_existing_logs=True ` to reuse existing Logs,
8657+ avoiding generating new Logs unnecessarily. Logs that are associated with the specified Version and have ` source_datapoint_id `
8658+ referencing a datapoint in the specified Dataset will be associated with the Run.
86568659
86578660To keep updated on the progress of the Run, you can poll the Run using
86588661the ` GET /evaluations/{id}/runs ` endpoint and check its status.
@@ -8701,15 +8704,15 @@ client.evaluations.create_run(
87018704<dl >
87028705<dd >
87038706
8704- ** dataset:** ` typing.Optional[EvaluationsDatasetRequestParams ] ` — Dataset to use in this Run.
8707+ ** dataset:** ` typing.Optional[CreateRunRequestDatasetParams ] ` — Dataset to use in this Run.
87058708
87068709</dd >
87078710</dl >
87088711
87098712<dl >
87108713<dd >
87118714
8712- ** version:** ` typing.Optional[VersionSpecificationParams ] ` — Version to use in this Run.
8715+ ** version:** ` typing.Optional[CreateRunRequestVersionParams ] ` — Version to use in this Run.
87138716
87148717</dd >
87158718</dl >
@@ -8725,7 +8728,7 @@ client.evaluations.create_run(
87258728<dl >
87268729<dd >
87278730
8728- ** logs :** ` typing.Optional[LogsAssociationType ] ` — How the Logs are associated with the Run. If ` dynamic ` , the latest relevant Logs will be inferred from the Dataset and Version. If ` fixed ` , the Logs will be explicitly associated. You can provide a list of Log IDs to associate with the Run, or add them to the Run later. Defaults to ` dynamic ` if ` dataset ` and ` version ` are provided; otherwise, defaults to ` fixed ` .
8731+ ** use_existing_logs :** ` typing.Optional[bool ] ` — If ` True ` , the Run will be initialized with existing Logs associated with the Dataset and Version. If ` False ` , the Run will be initialized with no Logs. Can only be set to ` True ` when both ` dataset ` and ` version ` are provided.
87298732
87308733</dd >
87318734</dl >
@@ -8757,7 +8760,10 @@ client.evaluations.create_run(
87578760<dl >
87588761<dd >
87598762
8760- Add an existing Run to an Evaluation.
8763+ Add an existing Run to the specified Evaluation.
8764+
8765+ This is useful if you want to compare the Runs in this Evaluation with an existing Run
8766+ that exists within another Evaluation.
87618767</dd >
87628768</dl >
87638769</dd >
@@ -8824,7 +8830,7 @@ client.evaluations.add_existing_run(
88248830</dl >
88258831</details >
88268832
8827- <details ><summary ><code >client.evaluations.<a href =" src/humanloop/evaluations/client.py " >remove_run_from_evaluation </a >(...)</code ></summary >
8833+ <details ><summary ><code >client.evaluations.<a href =" src/humanloop/evaluations/client.py " >remove_run </a >(...)</code ></summary >
88288834<dl >
88298835<dd >
88308836
@@ -8838,7 +8844,7 @@ client.evaluations.add_existing_run(
88388844
88398845Remove a Run from an Evaluation.
88408846
8841- Remove a Run from an Evaluation. The Logs and Versions used in the Run will not be deleted.
8847+ The Logs and Versions used in the Run will not be deleted.
88428848If this Run is used in any other Evaluations, it will still be available in those Evaluations.
88438849</dd >
88448850</dl >
@@ -8859,7 +8865,7 @@ from humanloop import Humanloop
88598865client = Humanloop(
88608866 api_key = " YOUR_API_KEY" ,
88618867)
8862- client.evaluations.remove_run_from_evaluation (
8868+ client.evaluations.remove_run (
88638869 id = " id" ,
88648870 run_id = " run_id" ,
88658871)
@@ -8920,7 +8926,8 @@ client.evaluations.remove_run_from_evaluation(
89208926
89218927Update an Evaluation Run.
89228928
8923- Update the Dataset and version to be evaluated for an existing Run.
8929+ Specify ` control=true ` to use this Run as the control Run for the Evaluation.
8930+ You can cancel a running/pending Run, or mark a Run that uses external or human Evaluators as completed.
89248931</dd >
89258932</dl >
89268933</dd >
@@ -8943,7 +8950,6 @@ client = Humanloop(
89438950client.evaluations.update_evaluation_run(
89448951 id = " id" ,
89458952 run_id = " run_id" ,
8946- control = True ,
89478953)
89488954
89498955```
@@ -8976,7 +8982,15 @@ client.evaluations.update_evaluation_run(
89768982<dl >
89778983<dd >
89788984
8979- ** control:** ` bool ` — If ` True ` , this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
8985+ ** control:** ` typing.Optional[bool] ` — If ` True ` , this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
8986+
8987+ </dd >
8988+ </dl >
8989+
8990+ <dl >
8991+ <dd >
8992+
8993+ ** status:** ` typing.Optional[EvaluationStatus] ` — Used to set the Run to ` cancelled ` or ` completed ` . Can only be used if the Run is currently ` pending ` or ` running ` .
89808994
89818995</dd >
89828996</dl >
@@ -9008,11 +9022,7 @@ client.evaluations.update_evaluation_run(
90089022<dl >
90099023<dd >
90109024
9011- Add Logs to an Evaluation Run.
9012-
9013- This is supported only for Runs that have a fixed set of Logs.
9014- (Runs can either have a fixed set of Logs, or can be set to dynamically retrieve the latest Logs
9015- if a Dataset and Version are provided.)
9025+ Add the specified Logs to a Run.
90169026</dd >
90179027</dl >
90189028</dd >
@@ -9102,9 +9112,7 @@ client.evaluations.add_logs_to_run(
91029112
91039113Get Evaluation Stats.
91049114
9105- Retrieve aggregate stats for the specified Evaluation.
9106-
9107- This includes the number of generated Logs for each Run and the
9115+ Retrieve aggregate stats for the specified Evaluation. This includes the number of generated Logs for each Run and the
91089116corresponding Evaluator statistics (such as the mean and percentiles).
91099117</dd >
91109118</dl >
@@ -9176,6 +9184,8 @@ client.evaluations.get_stats(
91769184<dd >
91779185
91789186Get the Logs associated to a specific Evaluation.
9187+
9188+ This returns the Logs associated to all Runs within with the Evaluation.
91799189</dd >
91809190</dl >
91819191</dd >
@@ -9394,6 +9404,14 @@ for page in response.iter_pages():
93949404<dl >
93959405<dd >
93969406
9407+ ** sample_n:** ` typing.Optional[int] ` — If provided, only a random sample of approximately N Logs will be returned.
9408+
9409+ </dd >
9410+ </dl >
9411+
9412+ <dl >
9413+ <dd >
9414+
93979415** request_options:** ` typing.Optional[RequestOptions] ` — Request-specific configuration.
93989416
93999417</dd >
0 commit comments