Skip to content

Commit 403110e

Browse files
committed
Release 0.8.9a1
1 parent f5edac2 commit 403110e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+802
-494
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ client.prompts.log(
4141
messages=[{"role": "user", "content": "What really happened at Roswell?"}],
4242
inputs={"person": "Trump"},
4343
created_at=datetime.datetime.fromisoformat(
44-
"2024-07-18 23:29:35.178000+00:00",
44+
"2024-07-19 00:29:35.178000+00:00",
4545
),
4646
provider_latency=6.5931549072265625,
4747
output_message={
@@ -88,7 +88,7 @@ async def main() -> None:
8888
],
8989
inputs={"person": "Trump"},
9090
created_at=datetime.datetime.fromisoformat(
91-
"2024-07-18 23:29:35.178000+00:00",
91+
"2024-07-19 00:29:35.178000+00:00",
9292
),
9393
provider_latency=6.5931549072265625,
9494
output_message={

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "humanloop"
3-
version = "0.8.9"
3+
version = "0.8.9a1"
44
description = ""
55
readme = "README.md"
66
authors = []

reference.md

Lines changed: 64 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ client.prompts.log(
5656
messages=[{"role": "user", "content": "What really happened at Roswell?"}],
5757
inputs={"person": "Trump"},
5858
created_at=datetime.datetime.fromisoformat(
59-
"2024-07-18 23:29:35.178000+00:00",
59+
"2024-07-19 00:29:35.178000+00:00",
6060
),
6161
provider_latency=6.5931549072265625,
6262
output_message={
@@ -5164,6 +5164,14 @@ client.evaluators.log(
51645164
<dl>
51655165
<dd>
51665166

5167+
**marked_completed:** `typing.Optional[bool]` — Whether the Log has been manually marked as completed by a user.
5168+
5169+
</dd>
5170+
</dl>
5171+
5172+
<dl>
5173+
<dd>
5174+
51675175
**spec:** `typing.Optional[CreateEvaluatorLogRequestSpecParams]`
51685176

51695177
</dd>
@@ -5362,7 +5370,7 @@ client.evaluators.upsert(
53625370
<dl>
53635371
<dd>
53645372

5365-
**spec:** `SrcExternalAppModelsV5EvaluatorsEvaluatorRequestSpecParams`
5373+
**spec:** `EvaluatorRequestSpecParams`
53665374

53675375
</dd>
53685376
</dl>
@@ -6225,10 +6233,10 @@ client.flows.log(
62256233
output="The patient is likely experiencing a myocardial infarction. Immediate medical attention is required.",
62266234
trace_status="incomplete",
62276235
start_time=datetime.datetime.fromisoformat(
6228-
"2024-07-08 21:40:35+00:00",
6236+
"2024-07-08 22:40:35+00:00",
62296237
),
62306238
end_time=datetime.datetime.fromisoformat(
6231-
"2024-07-08 21:40:39+00:00",
6239+
"2024-07-08 22:40:39+00:00",
62326240
),
62336241
)
62346242

@@ -8075,9 +8083,7 @@ client.files.list()
80758083
<dl>
80768084
<dd>
80778085

8078-
List all Evaluations for the specified `file_id`.
8079-
8080-
Retrieve a list of Evaluations that evaluate versions of the specified File.
8086+
Retrieve a list of Evaluations for the specified File.
80818087
</dd>
80828088
</dl>
80838089
</dd>
@@ -8171,9 +8177,8 @@ for page in response.iter_pages():
81718177

81728178
Create an Evaluation.
81738179

8174-
Create an Evaluation by specifying the File to evaluate, and a name
8180+
Create a new Evaluation by specifying the File to evaluate, and a name
81758181
for the Evaluation.
8176-
81778182
You can then add Runs to this Evaluation using the `POST /evaluations/{id}/runs` endpoint.
81788183
</dd>
81798184
</dl>
@@ -8195,7 +8200,7 @@ client = Humanloop(
81958200
api_key="YOUR_API_KEY",
81968201
)
81978202
client.evaluations.create(
8198-
evaluators=[{}],
8203+
evaluators=[{"version_id": "version_id"}],
81998204
)
82008205

82018206
```
@@ -8212,7 +8217,7 @@ client.evaluations.create(
82128217
<dl>
82138218
<dd>
82148219

8215-
**evaluators:** `typing.Sequence[EvaluationsRequestParams]` — The Evaluators used to evaluate.
8220+
**evaluators:** `typing.Sequence[CreateEvaluationRequestEvaluatorsItemParams]` — The Evaluators used to evaluate.
82168221

82178222
</dd>
82188223
</dl>
@@ -8262,8 +8267,7 @@ client.evaluations.create(
82628267

82638268
Add Evaluators to an Evaluation.
82648269

8265-
Add new Evaluators to an Evaluation. The Evaluators will be run on the Logs
8266-
generated for the Evaluation.
8270+
The Evaluators will be run on the Logs generated for the Evaluation.
82678271
</dd>
82688272
</dl>
82698273
</dd>
@@ -8285,7 +8289,7 @@ client = Humanloop(
82858289
)
82868290
client.evaluations.add_evaluators(
82878291
id="id",
8288-
evaluators=[{}],
8292+
evaluators=[{"version_id": "version_id"}],
82898293
)
82908294

82918295
```
@@ -8310,7 +8314,7 @@ client.evaluations.add_evaluators(
83108314
<dl>
83118315
<dd>
83128316

8313-
**evaluators:** `typing.Sequence[EvaluationsRequestParams]` — The Evaluators to add to this Evaluation.
8317+
**evaluators:** `typing.Sequence[AddEvaluatorsRequestEvaluatorsItemParams]` — The Evaluators to add to this Evaluation.
83148318

83158319
</dd>
83168320
</dl>
@@ -8344,8 +8348,7 @@ client.evaluations.add_evaluators(
83448348

83458349
Remove an Evaluator from an Evaluation.
83468350

8347-
Remove an Evaluator from an Evaluation. The Evaluator will no longer be run on the Logs
8348-
generated for the Evaluation.
8351+
The Evaluator will no longer be run on the Logs in the Evaluation.
83498352
</dd>
83508353
</dl>
83518354
</dd>
@@ -8425,6 +8428,12 @@ client.evaluations.remove_evaluator(
84258428
<dd>
84268429

84278430
Get an Evaluation.
8431+
8432+
This includes the Evaluators associated with the Evaluation and metadata about the Evaluation,
8433+
such as its name.
8434+
8435+
To get the Runs associated with the Evaluation, use the `GET /evaluations/{id}/runs` endpoint.
8436+
To retrieve stats for the Evaluation, use the `GET /evaluations/{id}/stats` endpoint.
84288437
</dd>
84298438
</dl>
84308439
</dd>
@@ -8496,8 +8505,7 @@ client.evaluations.get(
84968505

84978506
Delete an Evaluation.
84988507

8499-
Remove an Evaluation from Humanloop. The Logs and Versions used in the Evaluation
8500-
will not be deleted.
8508+
The Runs and Evaluators in the Evaluation will not be deleted.
85018509
</dd>
85028510
</dl>
85038511
</dd>
@@ -8639,20 +8647,15 @@ client.evaluations.list_runs_for_evaluation(
86398647

86408648
Create an Evaluation Run.
86418649

8642-
Create a new Evaluation Run. Optionally specify the Dataset and version to be
8643-
evaluated.
8650+
Optionally specify the Dataset and version to be evaluated.
86448651

86458652
Humanloop will automatically start generating Logs and running Evaluators where
86468653
`orchestrated=true`. If you are generating Logs yourself, you can set `orchestrated=false`
86478654
and then generate and submit the required Logs via the API.
86488655

8649-
The `logs` parameter controls which Logs are associated with the Run. Defaults to `dynamic`
8650-
if `dataset` and `version` are provided. This means that Logs will automatically be retrieved
8651-
if they're associated with the specified Version and has `source_datapoint_id` referencing
8652-
a datapoint in the specified Dataset.
8653-
If `logs` is set to `fixed`, no existing Logs will be automatically associated with the Run.
8654-
You can then add Logs to the Run using the `POST /evaluations/{id}/runs/{run_id}/logs` endpoint,
8655-
or by adding `run_id` to your `POST /prompts/logs` requests.
8656+
If `dataset` and `version` are provided, you can set `use_existing_logs=True` to reuse existing Logs,
8657+
avoiding generating new Logs unnecessarily. Logs that are associated with the specified Version and have `source_datapoint_id`
8658+
referencing a datapoint in the specified Dataset will be associated with the Run.
86568659

86578660
To keep updated on the progress of the Run, you can poll the Run using
86588661
the `GET /evaluations/{id}/runs` endpoint and check its status.
@@ -8701,15 +8704,15 @@ client.evaluations.create_run(
87018704
<dl>
87028705
<dd>
87038706

8704-
**dataset:** `typing.Optional[EvaluationsDatasetRequestParams]` — Dataset to use in this Run.
8707+
**dataset:** `typing.Optional[CreateRunRequestDatasetParams]` — Dataset to use in this Run.
87058708

87068709
</dd>
87078710
</dl>
87088711

87098712
<dl>
87108713
<dd>
87118714

8712-
**version:** `typing.Optional[VersionSpecificationParams]` — Version to use in this Run.
8715+
**version:** `typing.Optional[CreateRunRequestVersionParams]` — Version to use in this Run.
87138716

87148717
</dd>
87158718
</dl>
@@ -8725,7 +8728,7 @@ client.evaluations.create_run(
87258728
<dl>
87268729
<dd>
87278730

8728-
**logs:** `typing.Optional[LogsAssociationType]`How the Logs are associated with the Run. If `dynamic`, the latest relevant Logs will be inferred from the Dataset and Version. If `fixed`, the Logs will be explicitly associated. You can provide a list of Log IDs to associate with the Run, or add them to the Run later. Defaults to `dynamic` if `dataset` and `version` are provided; otherwise, defaults to `fixed`.
8731+
**use_existing_logs:** `typing.Optional[bool]` — If `True`, the Run will be initialized with existing Logs associated with the Dataset and Version. If `False`, the Run will be initialized with no Logs. Can only be set to `True` when both `dataset` and `version` are provided.
87298732

87308733
</dd>
87318734
</dl>
@@ -8757,7 +8760,10 @@ client.evaluations.create_run(
87578760
<dl>
87588761
<dd>
87598762

8760-
Add an existing Run to an Evaluation.
8763+
Add an existing Run to the specified Evaluation.
8764+
8765+
This is useful if you want to compare the Runs in this Evaluation with an existing Run
8766+
that exists within another Evaluation.
87618767
</dd>
87628768
</dl>
87638769
</dd>
@@ -8824,7 +8830,7 @@ client.evaluations.add_existing_run(
88248830
</dl>
88258831
</details>
88268832

8827-
<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">remove_run_from_evaluation</a>(...)</code></summary>
8833+
<details><summary><code>client.evaluations.<a href="src/humanloop/evaluations/client.py">remove_run</a>(...)</code></summary>
88288834
<dl>
88298835
<dd>
88308836

@@ -8838,7 +8844,7 @@ client.evaluations.add_existing_run(
88388844

88398845
Remove a Run from an Evaluation.
88408846

8841-
Remove a Run from an Evaluation. The Logs and Versions used in the Run will not be deleted.
8847+
The Logs and Versions used in the Run will not be deleted.
88428848
If this Run is used in any other Evaluations, it will still be available in those Evaluations.
88438849
</dd>
88448850
</dl>
@@ -8859,7 +8865,7 @@ from humanloop import Humanloop
88598865
client = Humanloop(
88608866
api_key="YOUR_API_KEY",
88618867
)
8862-
client.evaluations.remove_run_from_evaluation(
8868+
client.evaluations.remove_run(
88638869
id="id",
88648870
run_id="run_id",
88658871
)
@@ -8920,7 +8926,8 @@ client.evaluations.remove_run_from_evaluation(
89208926

89218927
Update an Evaluation Run.
89228928

8923-
Update the Dataset and version to be evaluated for an existing Run.
8929+
Specify `control=true` to use this Run as the control Run for the Evaluation.
8930+
You can cancel a running/pending Run, or mark a Run that uses external or human Evaluators as completed.
89248931
</dd>
89258932
</dl>
89268933
</dd>
@@ -8943,7 +8950,6 @@ client = Humanloop(
89438950
client.evaluations.update_evaluation_run(
89448951
id="id",
89458952
run_id="run_id",
8946-
control=True,
89478953
)
89488954

89498955
```
@@ -8976,7 +8982,15 @@ client.evaluations.update_evaluation_run(
89768982
<dl>
89778983
<dd>
89788984

8979-
**control:** `bool` — If `True`, this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
8985+
**control:** `typing.Optional[bool]` — If `True`, this Run will be used as the control in the Evaluation. Stats for other Runs will be compared to this Run. This will replace any existing control Run.
8986+
8987+
</dd>
8988+
</dl>
8989+
8990+
<dl>
8991+
<dd>
8992+
8993+
**status:** `typing.Optional[EvaluationStatus]` — Used to set the Run to `cancelled` or `completed`. Can only be used if the Run is currently `pending` or `running`.
89808994

89818995
</dd>
89828996
</dl>
@@ -9008,11 +9022,7 @@ client.evaluations.update_evaluation_run(
90089022
<dl>
90099023
<dd>
90109024

9011-
Add Logs to an Evaluation Run.
9012-
9013-
This is supported only for Runs that have a fixed set of Logs.
9014-
(Runs can either have a fixed set of Logs, or can be set to dynamically retrieve the latest Logs
9015-
if a Dataset and Version are provided.)
9025+
Add the specified Logs to a Run.
90169026
</dd>
90179027
</dl>
90189028
</dd>
@@ -9102,9 +9112,7 @@ client.evaluations.add_logs_to_run(
91029112

91039113
Get Evaluation Stats.
91049114

9105-
Retrieve aggregate stats for the specified Evaluation.
9106-
9107-
This includes the number of generated Logs for each Run and the
9115+
Retrieve aggregate stats for the specified Evaluation. This includes the number of generated Logs for each Run and the
91089116
corresponding Evaluator statistics (such as the mean and percentiles).
91099117
</dd>
91109118
</dl>
@@ -9176,6 +9184,8 @@ client.evaluations.get_stats(
91769184
<dd>
91779185

91789186
Get the Logs associated to a specific Evaluation.
9187+
9188+
This returns the Logs associated to all Runs within with the Evaluation.
91799189
</dd>
91809190
</dl>
91819191
</dd>
@@ -9394,6 +9404,14 @@ for page in response.iter_pages():
93949404
<dl>
93959405
<dd>
93969406

9407+
**sample_n:** `typing.Optional[int]` — If provided, only a random sample of approximately N Logs will be returned.
9408+
9409+
</dd>
9410+
</dl>
9411+
9412+
<dl>
9413+
<dd>
9414+
93979415
**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration.
93989416

93999417
</dd>

0 commit comments

Comments
 (0)