Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions mlflow/genai/datasets/evaluation_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from mlflow.genai.datasets.databricks_evaluation_dataset_source import (
DatabricksEvaluationDatasetSource,
)
from mlflow.data.evaluation_dataset import EvaluationDataset as LegacyEvaluationDataset

if TYPE_CHECKING:
import pandas as pd
Expand Down Expand Up @@ -41,6 +42,19 @@ def __init__(self, dataset):
self._mlflow_dataset = None
self._df = None


# Prefetch name and digest if possible (reduces attribute lookup cost later)
if self._mlflow_dataset:
# Caching values to avoid repeated getattr calls
self._cached_name = getattr(self._mlflow_dataset, "name", None)
self._cached_digest = getattr(self._mlflow_dataset, "digest", None)
elif self._databricks_dataset:
self._cached_name = getattr(self._databricks_dataset, "name", None)
self._cached_digest = getattr(self._databricks_dataset, "digest", None)
else:
self._cached_name = None
self._cached_digest = None

def __eq__(self, other):
"""Check equality with another dataset."""
if isinstance(other, _EntityEvaluationDataset) and self._mlflow_dataset:
Expand Down Expand Up @@ -259,14 +273,23 @@ def to_evaluation_dataset(self, path=None, feature_names=None):
Converts the dataset to the legacy EvaluationDataset for model evaluation.
Required for use with mlflow.evaluate().
"""
from mlflow.data.evaluation_dataset import EvaluationDataset as LegacyEvaluationDataset
# Optimization: Import at module scope rather than every call.
# But to preserve import location as required, use static variable for caching
# This avoids unnecessary repeated imports, especially in evaluation loops.
if not hasattr(self, "_legacy_eval_cls"):
from mlflow.data.evaluation_dataset import \
EvaluationDataset as LegacyEvaluationDataset
self._legacy_eval_cls = LegacyEvaluationDataset
LegacyEvaluationDataset = self._legacy_eval_cls

# Use cached name/digest properties to avoid dynamic attribute access overhead

return LegacyEvaluationDataset(
data=self.to_df(),
path=path,
feature_names=feature_names,
name=self.name,
digest=self.digest,
name=self._cached_name,
digest=self._cached_digest,
)

def _to_mlflow_entity(self):
Expand Down