QA pass

Andrei Bratu · Andrei Bratu · commit f538cc7e9035 · 2025-04-25T12:50:02.000+01:00
diff --git a/src/humanloop/evals/run.py b/src/humanloop/evals/run.py
@@ -95,7 +95,7 @@
 
 EvaluatorDict = Union[CodeEvaluatorDict, LLMEvaluatorDict, HumanEvaluatorDict, ExternalEvaluator]
 Version = Union[FlowDict, PromptDict, ToolDict, EvaluatorDict]
-FileType = Literal["flow", "prompt", "tool", "evaluator"]
+FileType = Literal["flow", "prompt", "agent"]
 
 
 # ANSI escape codes for logging colors
@@ -153,7 +153,8 @@ def run_eval(
     evaluators_worker_pool = ThreadPoolExecutor(max_workers=workers)
 
     hl_file, function_ = _get_hl_file(client=client, file_config=file)
-    type_ = hl_file.type
+    # cast is safe, we can only fetch Files allowed by FileType
+    type_ = typing.cast(FileType, hl_file.type)
     try:
         hl_dataset = _upsert_dataset(dataset=dataset, client=client)
     except Exception as e:
@@ -211,11 +212,11 @@ def handle_exit_signal(signum, frame):
     # Generate locally if a file `callable` is provided
     if function_ is None:
         # TODO: trigger run when updated API is available
-        print_info(f"\nRunning '{hl_file.name}' {hl_file.type.capitalize()} over the Dataset '{hl_dataset.name}'")
+        print_info(f"\nRunning '{hl_file.name}' {type_.capitalize()} over the '{hl_dataset.name}' Dataset")
     else:
         # Running the evaluation locally
         print_info(
-            f"\nRunning '{hl_file.name}' {hl_file.type.capitalize()} over the Dataset '{hl_dataset.name}' using {workers} workers...\n"
+            f"\nRunning '{hl_file.name}' {type_.capitalize()} over the '{hl_dataset.name}' Dataset using {workers} workers...\n"
         )
 
     _PROGRESS_BAR = _SimpleProgressBar(len(hl_dataset.datapoints))
@@ -420,58 +421,71 @@ def _safe_get_default_file_version(client: "BaseHumanloop", file_config: FileEva
             raise HumanloopRuntimeError(
                 f"File in Humanloop workspace at {path} is not of type {type}, but {hl_file.type}."
             )
-        return hl_file
-    else:
+        # cast is safe, we can only fetch Files allowed by FileType
+        return typing.cast(EvaluatedFile, hl_file)
+    elif file_id is not None:
         subclient = _get_subclient(client=client, file_config=file_config)
         return subclient.get(id=file_id)
+    else:
+        raise HumanloopRuntimeError("You must provide a path or id in your `file` config.")
 
 
 def _resolve_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple[EvaluatedFile, Optional[Callable]]:
     """Resolve the File to be evaluated. Will return a FileResponse and an optional callable.
 
     If the callable is null, the File will be evaluated on Humanloop. Otherwise, the File will be evaluated locally.
     """
-    hl_file = _safe_get_default_file_version(client=client, file_config=file_config)
     file_id = file_config.get("id")
     path = file_config.get("path")
     version_id = file_config.get("version_id")
     environment = file_config.get("environment")
     callable = _get_file_callable(file_config=file_config)
     version = file_config.get("version")
 
-    if version is not None and path is None and file_id:
+    if callable and path is None and file_id is None:
+        raise HumanloopRuntimeError(
+            "You are trying to create a new version of the File by passing the `version` argument. "
+            "You must pass either the `file.path` or `file.id` argument and provider proper `file.version` for upserting the File."
+        )
+    hl_file = _safe_get_default_file_version(client=client, file_config=file_config)
+
+    if (version_id or environment) and (callable or version):
         raise HumanloopRuntimeError(
-            "You are trying to create a new version of the File by passing the `version` argument. You must pass either the `file.path` or `file.id` argument."
+            "You are trying to create a local Evaluation while requesting a specific File version by version ID or environment."
         )
 
-    if version:
-        # User wants to upsert a version
-        return (_upsert_file(file_config=file_config, client=client), callable)
+    if callable:
+        # User responsibility to provide adequate file.version for upserting the file
+        print_info(
+            "Upserting a new File version based on `file.version`. Will use provided callable for generating Logs."
+        )
+        try:
+            return (_upsert_file(file_config=file_config, client=client), callable)
+        except Exception as e:
+            raise HumanloopRuntimeError(f"Error upserting the File. Please ensure `file.version` is valid: {e}") from e
 
     if version_id is None and environment is None:
         # Return default version of the File
         return hl_file, callable
 
-    if callable:
-        raise HumanloopRuntimeError(
-            "You cannot request local evaluation while requesting a specific File version by version ID or environment"
-        )
-
     if file_id is None and (version_id or environment):
         raise HumanloopRuntimeError(
             "You must provide the `file.id` when addressing a file by version ID or environment"
         )
+
     # Use version_id or environment to retrieve specific version of the File
     subclient = _get_subclient(client=client, file_config=file_config)
     # Let backend handle case where both or none of version_id and environment are provided
     return subclient.get(
-        version_id=file_config.get("version_id"),
-        environment=file_config.get("environment"),
+        # Earlier if checked that file_id is not None
+        id=file_id,  # type: ignore [arg-type]
+        version_id=version_id,
+        environment=environment,
     ), None
 
 
 def _get_hl_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple[EvaluatedFile, Optional[Callable]]:
-    """Check if the config object is valid, and resolve the File to be evaluated
+    """Check if the config object is valid, and resolve the File to be evaluated.
 
     The callable will be null if the evaluation will happen on Humanloop runtime.
     Otherwise, the evaluation will happen locally.
@@ -617,20 +631,19 @@ def _file_or_file_inside_hl_decorator(file_config: FileEvalConfig) -> FileEvalCo
     return file_
 
 
-def _check_file_type(file: FileEvalConfig) -> FileEvalConfig:
+def _check_file_type(file_config: FileEvalConfig) -> FileEvalConfig:
     """Check that the file type is provided, or set it to `flow` if not provided."""
     try:
-        type_ = typing.cast(FileType, file.pop("type"))  # type: ignore [arg-type, misc]
+        type_ = typing.cast(FileType, file_config.pop("type"))  # type: ignore [arg-type, misc]
         print_info(
-            f"Evaluating your {type_} function corresponding to `{file.get('path') or file.get('id')}` on Humanloop\n\n"
+            f"Evaluating your {type_} function corresponding to `{file_config.get('path') or file_config.get('id')}` on Humanloop\n\n"
         )
-        if type_ is None:
-            file["type"] = "flow"
+        file_config["type"] = type_ or "flow"
     except KeyError as _:
         type_ = "flow"
         print_warning("No `file` type specified, defaulting to flow.")
-        file["type"] = type_
-    return file
+        file_config["type"] = type_
+    return file_config
 
 
 def _get_file_callable(file_config: FileEvalConfig) -> Optional[Callable]:
@@ -653,6 +666,7 @@ def _upsert_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> Evalua
     # Get or create the file on Humanloop
     version = file_config.pop("version", {})
     file_dict = {**file_config, **version}
+    del file_dict["type"]
     type_ = file_config.get("type")
     subclient = _get_subclient(client=client, file_config=file_config)
 
@@ -672,7 +686,8 @@ def _upsert_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> Evalua
     else:
         raise NotImplementedError(f"Unsupported File type: {type_}")
 
-    return subclient.upsert(**file_dict)
+    # mypy complains about the polymorphic subclient
+    return subclient.upsert(**file_dict)  # type: ignore [arg-type]
 
 
 def _upsert_dataset(dataset: DatasetEvalConfig, client: "BaseHumanloop"):
diff --git a/src/humanloop/types/version_id_response_version.py b/src/humanloop/types/version_id_response_version.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 import typing
 from .dataset_response import DatasetResponse
-import typing
 
 if typing.TYPE_CHECKING:
     from .prompt_response import PromptResponse
diff --git a/src/humanloop/types/version_reference_response.py b/src/humanloop/types/version_reference_response.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 import typing
-import typing
 
 if typing.TYPE_CHECKING:
     from .version_deployment_response import VersionDeploymentResponse
diff --git a/tests/custom/test_client.py b/tests/custom/test_client.py
@@ -4,4 +4,4 @@
 # Get started with writing tests with pytest at https://docs.pytest.org
 @pytest.mark.skip(reason="Unimplemented")
 def test_client() -> None:
-    assert True == True
+    assert True is True
diff --git a/tests/utils/assets/models/circle.py b/tests/utils/assets/models/circle.py
@@ -2,7 +2,6 @@
 
 # This file was auto-generated by Fern from our API Definition.
 
-import typing_extensions
 import typing_extensions
 from humanloop.core.serialization import FieldMetadata
 
diff --git a/tests/utils/assets/models/object_with_optional_field.py b/tests/utils/assets/models/object_with_optional_field.py
@@ -4,7 +4,6 @@
 
 import typing_extensions
 import typing
-import typing_extensions
 from humanloop.core.serialization import FieldMetadata
 import datetime as dt
 import uuid
diff --git a/tests/utils/assets/models/shape.py b/tests/utils/assets/models/shape.py
@@ -4,7 +4,6 @@
 
 from __future__ import annotations
 import typing_extensions
-import typing_extensions
 import typing
 from humanloop.core.serialization import FieldMetadata
 
diff --git a/tests/utils/assets/models/square.py b/tests/utils/assets/models/square.py
@@ -2,7 +2,6 @@
 
 # This file was auto-generated by Fern from our API Definition.
 
-import typing_extensions
 import typing_extensions
 from humanloop.core.serialization import FieldMetadata
 
diff --git a/tests/utils/test_query_encoding.py b/tests/utils/test_query_encoding.py
@@ -34,4 +34,4 @@ def test_query_encoding_deep_object_arrays() -> None:
 
 def test_encode_query_with_none() -> None:
     encoded = encode_query(None)
-    assert encoded == None
+    assert encoded is None