From 397eedea4b6d12a48bf00a528489d000d86eacb3 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Mon, 27 May 2024 00:49:11 -0500 Subject: [PATCH 01/31] Paired `mlflow` data structures to types --- src/types/artifact.jl | 37 ++---- src/types/dataset.jl | 49 +++++++ src/types/enums.jl | 50 +++++++ src/types/experiment.jl | 44 +++--- src/types/model_version.jl | 44 ++++++ src/types/registered_model.jl | 43 ++++++ src/types/run.jl | 243 ++++++++++------------------------ src/types/tag.jl | 14 ++ 8 files changed, 294 insertions(+), 230 deletions(-) create mode 100644 src/types/dataset.jl create mode 100644 src/types/enums.jl create mode 100644 src/types/model_version.jl create mode 100644 src/types/registered_model.jl create mode 100644 src/types/tag.jl diff --git a/src/types/artifact.jl b/src/types/artifact.jl index 1b7b984..85e2070 100644 --- a/src/types/artifact.jl +++ b/src/types/artifact.jl @@ -1,31 +1,14 @@ """ - MLFlowArtifactFileInfo - -Metadata of a single artifact file -- result of [`listartifacts`](@ref). + FileInfo # Fields -- `filepath::String`: File path, including the root artifact directory of a run. -- `filesize::Int64`: Size in bytes. -""" -struct MLFlowArtifactFileInfo - filepath::String - filesize::Int64 -end -Base.show(io::IO, t::MLFlowArtifactFileInfo) = show(io, ShowCase(t, new_lines=true)) -get_path(mlfafi::MLFlowArtifactFileInfo) = mlfafi.filepath -get_size(mlfafi::MLFlowArtifactFileInfo) = mlfafi.filesize - -""" - MLFlowArtifactDirInfo - -Metadata of a single artifact directory -- result of [`listartifacts`](@ref). - -# Fields -- `dirpath::String`: Directory path, including the root artifact directory of a run. -""" -struct MLFlowArtifactDirInfo - dirpath::String +- `path::String`: Path relative to the root artifact directory run. +- `is_dir::Bool`: Whether the path is a directory. +- `file_size::Int64`: Size in bytes. Unset for directories. +""" +struct FileInfo + path::String + is_dir::Bool + file_size::Int64 end -Base.show(io::IO, t::MLFlowArtifactDirInfo) = show(io, ShowCase(t, new_lines=true)) -get_path(mlfadi::MLFlowArtifactDirInfo) = mlfadi.dirpath -get_size(mlfadi::MLFlowArtifactDirInfo) = 0 +Base.show(io::IO, t::FileInfo) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/dataset.jl b/src/types/dataset.jl new file mode 100644 index 0000000..7bb028c --- /dev/null +++ b/src/types/dataset.jl @@ -0,0 +1,49 @@ +""" + Dataset + +Represents a reference to data used for training, testing, or evaluation during +the model development process. + +# Fields +- `name::String`: The name of the dataset. +- `digest::String`: The digest of the dataset. +- `source_type::String`: The type of the dataset source. +- `source::String`: Source information for the dataset. +- `schema::String`: The schema of the dataset. This field is optional. +- `profile::String`: The profile of the dataset. This field is optional. + +# Constructors +- `Dataset(name, digest, source_type, source, schema, profile)` +- `Dataset(name, digest, source_type, source; schema=nothing, profile=nothing)` +""" +struct Dataset + name::String + digest::String + source_type::String + source::String + schema::Union{String, Nothing} + profile::Union{String, Nothing} +end +Dataset(name, digest, source_type, source; schema=nothing, profile=nothing) = + Dataset(name, digest, source_type, source, schema, profile) +Base.show(io::IO, t::Dataset) = show(io, ShowCase(t, new_lines=true)) + +""" + DatasetInput + +Represents a dataset and input tags. + +# Fields +- `tags::Array{Tag}`: A list of tags for the dataset input. +- `dataset::Dataset`: The dataset being used as a Run input. + +# Constructors +- `DatasetInput(tags, dataset)` +- `DatasetInput(dataset; tags=[])` +""" +struct DatasetInput + tags::Array{Tag} + dataset::Dataset +end +DatasetInput(dataset; tags=[]) = DatasetInput(tags, dataset) +Base.show(io::IO, t::DatasetInput) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/enums.jl b/src/types/enums.jl new file mode 100644 index 0000000..902219c --- /dev/null +++ b/src/types/enums.jl @@ -0,0 +1,50 @@ +""" + ModelVersionStatus + +# Members +- `PENDING_REGISTRATION`: Request to register a new model version is pending as +server performs background tasks. +- `FAILED_REGISTRATION`: Request to register a new model version has failed. +- `READY`: Model version is ready for use. +""" +@enum ModelVersionStatus begin + PENDING_REGISTRATION + FAILED_REGISTRATION + READY +end + +""" + RunStatus + +Status of a run. + +# Members +- `RUNNING`: Run has been initiated. +- `SCHEDULED`: Run is scheduled to run at a later time. +- `FINISHED`: Run has completed. +- `FAILED`: Run execution failed. +- `KILLED`: Run killed by user. +""" +@enum RunStatus begin + RUNNING + SCHEDULED + FINISHED + FAILED + KILLED +end + +""" + ViewType + +View type for ListExperiments query. + +# Members +- `ACTIVE_ONLY`: Default. Return only active experiments. +- `DELETED_ONLY`: Return only deleted experiments. +- `ALL`: Get all experiments. +""" +@enum ViewType begin + ACTIVE_ONLY + DELETED_ONLY + ALL +end diff --git a/src/types/experiment.jl b/src/types/experiment.jl index 7c4921c..093c11c 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -1,34 +1,24 @@ """ - MLFlowExperiment - -Represents an MLFlow experiment. + Experiment # Fields -- `name::String`: experiment name. -- `lifecycle_stage::String`: life cycle stage, one of ["active", "deleted"] -- `experiment_id::Integer`: experiment identifier. -- `tags::Any`: list of tags. -- `artifact_location::String`: where are experiment artifacts stored. - -# Constructors - -- `MLFlowExperiment(name, lifecycle_stage, experiment_id, tags, artifact_location)` -- `MLFlowExperiment(exp::Dict{String,Any})` - +- `experiment_id::Integer`: Unique identifier for the experiment. +- `name::String`: Human readable name that identifies the experiment. +- `artifact_location::String`: Location where artifacts for the experiment are +stored. +- `lifecycle_stage::String`: Current life cycle stage of the experiment: +“active” or “deleted”. Deleted experiments are not returned by APIs. +- `last_update_time::Int64`: Last update time. +- `creation_time::Int64`: Creation time. +- `tags::Array{Tag}`: Additional metadata key-value pairs. """ -struct MLFlowExperiment +struct Experiment + experiment_id::String name::String - lifecycle_stage::String - experiment_id::Integer - tags::Any artifact_location::String + lifecycle_stage::String + last_update_time::Int64 + creation_time::Int64 + tags::Array{Tag} end -function MLFlowExperiment(exp::Dict{String,Any}) - name = get(exp, "name", missing) - lifecycle_stage = get(exp, "lifecycle_stage", missing) - experiment_id = parse(Int, get(exp, "experiment_id", missing)) - tags = get(exp, "tags", missing) - artifact_location = get(exp, "artifact_location", missing) - MLFlowExperiment(name, lifecycle_stage, experiment_id, tags, artifact_location) -end -Base.show(io::IO, t::MLFlowExperiment) = show(io, ShowCase(t, new_lines=true)) +Base.show(io::IO, t::Experiment) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/model_version.jl b/src/types/model_version.jl new file mode 100644 index 0000000..fcbd146 --- /dev/null +++ b/src/types/model_version.jl @@ -0,0 +1,44 @@ +""" + ModelVersion + +# Fields +- `name::String`: Unique name of the model. +- `version::String`: Model’s version number. +- `creation_timestamp::Int64`: Timestamp recorded when this model_version was +created. +- `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this +model_version was last updated. +- `user_id::String`: User that created this model_version. +- `current_stage::String`: Current stage for this model_version. +- `description::String`: Description of this model_version. +- `source::String`: URI indicating the location of the source model artifacts, +used when creating model_version. +- `run_id::String`: MLflow run ID used when creating model_version, if source +was generated by an experiment run stored in MLflow tracking server. +- `status::ModelVersionStatus`: Current status of model_version. +- `status_message::String`: Details on current status, if it is pending or +failed. +- `tags::Array{Tag}`: Additional metadata key-value pairs. +- `run_link::String`: Direct link to the run that generated this version. This +field is set at model version creation time only for model versions whose +source run is from a tracking server that is different from the registry +server. +- `aliases::Array{String}`: Aliases pointing to this model_version. +""" +struct ModelVersion + name::String + version::String + creation_timestamp::Int64 + last_updated_timestamp::Int64 + user_id::String + current_stage::String + description::String + source::String + run_id::String + status::ModelVersionStatus + status_message::String + tags::Array{Tag} + run_link::String + aliases::Array{String} +end +Base.show(io::IO, t::ModelVersion) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl new file mode 100644 index 0000000..8420538 --- /dev/null +++ b/src/types/registered_model.jl @@ -0,0 +1,43 @@ +""" + RegisteredModelAlias + +Alias for a registered model. + +# Fields +- `alias::String`: The name of the alias. +- `version::String`: The model version number that the alias points to. +""" +struct RegisteredModelAlias + alias::String + version::String +end +Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true)) + +""" + RegisteredModel + +# Fields +- `name::String`: Unique name for the model. +- `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was +created. +- `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this +RegisteredModel was last updated. +- `user_id::String`: User that created this RegisteredModel. +- `description::String`: Description of this RegisteredModel. +- `latest_versions::Array{ModelVersion}`: Collection of latest model versions +for each stage. Only contains models with current READY status. +- `tags::Array{Tag}`: Additional metadata key-value pairs. +- `aliases::Array{RegisteredModelAlias}`: Aliases pointing to model versions +associated with this RegisteredModel. +""" +struct RegisteredModel + name::String + creation_timestamp::Int64 + last_updated_timestamp::Int64 + user_id::String + description::String + latest_versions::Array{ModelVersion} + tags::Array{Tag} + aliases::Array{RegisteredModelAlias} +end +Base.show(io::IO, t::RegisteredModel) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/run.jl b/src/types/run.jl index 13920a9..a18331c 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -1,215 +1,106 @@ """ - MLFlowRunStatus + Metric -Represents the status of an MLFlow Run. +Metric associated with a run, represented as a key-value pair. # Fields -- `status::String`: one of RUNNING/SCHEDULED/FINISHED/FAILED/KILLED - -# Constructors - -- `MLFlowRunStatus(status::String)` +- `key::String`: Key identifying this metric. +- `value::Float64`: Value associated with this metric. +- `timestamp::Int64`: The timestamp at which this metric was recorded. +- `step::Int64`: Step at which to log the metric. """ -struct MLFlowRunStatus - status::String - function MLFlowRunStatus(status::String) - acceptable_statuses = ["RUNNING", "SCHEDULED", "FINISHED", "FAILED", "KILLED"] - status ∈ acceptable_statuses || error("Invalid status $status - choose one of $acceptable_statuses") - new(status) - end +struct Metric + key::String + value::Float64 + timestamp::Int64 + step::Int64 end -Base.show(io::IO, t::MLFlowRunStatus) = show(io, ShowCase(t, new_lines=true)) +Base.show(io::IO, t::Metric) = show(io, ShowCase(t, new_lines=true)) """ - MLFlowRunInfo + Param -Represents run metadata. +Param associated with a run. # Fields -- `run_id::String`: run identifier. -- `experiment_id::Integer`: experiment identifier. -- `status::MLFlowRunStatus`: run status. -- `run_name::String`: run name. -- `start_time::Union{Int64,Missing}`: when was the run started, UNIX time in milliseconds. -- `end_time::Union{Int64,Missing}`: when did the run end, UNIX time in milliseconds. -- `artifact_uri::String`: where are artifacts from this run stored. -- `lifecycle_stage::String`: one of `active` or `deleted`. - -# Constructors - -- `MLFlowRunInfo(run_id, experiment_id, status, run_name, start_time, end_time, artifact_uri, lifecycle_stage)` -- `MLFlowRunInfo(info::Dict{String,Any})` +- `key::String`: Key identifying this param. +- `value::String`: Value associated with this param. """ -struct MLFlowRunInfo - run_id::String - experiment_id::Integer - status::MLFlowRunStatus - run_name::String - start_time::Union{Int64,Missing} - end_time::Union{Int64,Missing} - artifact_uri::String - lifecycle_stage::String -end -function MLFlowRunInfo(info::Dict{String,Any}) - run_id = get(info, "run_id", missing) - experiment_id = get(info, "experiment_id", missing) - status = get(info, "status", missing) - run_name = get(info, "run_name", missing) - start_time = get(info, "start_time", missing) - end_time = get(info, "end_time", missing) - artifact_uri = get(info, "artifact_uri", "") - lifecycle_stage = get(info, "lifecycle_stage", "") - - experiment_id = ismissing(experiment_id) ? experiment_id : parse(Int64, experiment_id) - status = ismissing(status) ? status : MLFlowRunStatus(status) - - # support for mlflow 1.21.0 - if !ismissing(start_time) && !(typeof(start_time) <: Int) - start_time = parse(Int64, start_time) - end - if !ismissing(end_time) && !(typeof(end_time) <: Int) - end_time = parse(Int64, end_time) - end - MLFlowRunInfo(run_id, experiment_id, status, run_name, start_time, end_time, artifact_uri, lifecycle_stage) +struct Param + key::String + value::String end -Base.show(io::IO, t::MLFlowRunInfo) = show(io, ShowCase(t, new_lines=true)) -get_run_id(runinfo::MLFlowRunInfo) = runinfo.run_id +Base.show(io::IO, t::Param) = show(io, ShowCase(t, new_lines=true)) """ - MLFlowRunDataMetric + RunInfo -Represents a metric. +Metadata of a single run. # Fields -- `key::String`: metric identifier. -- `value::Float64`: metric value. -- `step::Int64`: step. -- `timestamp::Int64`: timestamp in UNIX time in milliseconds. - -# Constructors - -- `MLFlowRunDataMetric(d::Dict{String,Any})` - +- `run_id::String`: Unique identifier for the run. +- `run_name::String`: The name of the run. +- `experiment_id::String`: The experiment ID. +- `status::RunStatus`: Current status of the run. +- `start_time::Int64`: Unix timestamp of when the run started in milliseconds. +- `end_time::Int64`: Unix timestamp of when the run ended in milliseconds. +- `artifact_uri::String`: URI of the directory where artifacts should be +uploaded. This can be a local path (starting with “/”), or a distributed file +system (DFS) path, like s3://bucket/directory or dbfs:/my/directory. If not +set, the local ./mlruns directory is chosen. +- `lifecycle_stage::String`: Current life cycle stage of the experiment: +"active" or "deleted". """ -struct MLFlowRunDataMetric - key::String - value::Float64 - step::Int64 - timestamp::Int64 -end -function MLFlowRunDataMetric(d::Dict{String,Any}) - key = d["key"] - value = d["value"] - if typeof(d["step"]) <: Int - step = d["step"] - else - step = parse(Int64, d["step"]) - end - if typeof(d["timestamp"]) <: Int - timestamp = d["timestamp"] - else - timestamp = parse(Int64, d["timestamp"]) - end - MLFlowRunDataMetric(key, value, step, timestamp) +struct RunInfo + run_id::String + run_name::String + experiment_id::String + status::RunStatus + start_time::Int64 + end_time::Int64 + artifact_uri::String + lifecycle_stage::String end -Base.show(io::IO, t::MLFlowRunDataMetric) = show(io, ShowCase(t, new_lines=true)) +Base.show(io::IO, t::RunInfo) = show(io, ShowCase(t, new_lines=true)) """ - MLFlowRunDataParam + RunInputs -Represents a parameter. +Run data (metrics, params, and tags). # Fields -- `key::String`: parameter identifier. -- `value::String`: parameter value. - -# Constructors -- `MLFlowRunDataParam(d::Dict{String,String})` - +- `metrics::Array{Metric}`: Run metrics. +- `params::Array{Param}`: Run parameters. +- `tags::Array{Tag}`: Additional metadata key-value pairs. """ -struct MLFlowRunDataParam - key::String - value::String -end -function MLFlowRunDataParam(d::Dict{String,String}) - key = d["key"] - value = d["value"] - MLFlowRunDataParam(key, value) +struct RunData + metrics::Array{Metric} + params::Array{Param} + tags::Array{Tag} end -Base.show(io::IO, t::MLFlowRunDataParam) = show(io, ShowCase(t, new_lines=true)) +Base.show(io::IO, t::RunData) = show(io, ShowCase(t, new_lines=true)) """ - MLFlowRunData + RunInputs -Represents run data. +Run inputs. # Fields -- `metrics::Dict{String,MLFlowRunDataMetric}`: run metrics. -- `params::Dict{String,MLFlowRunDataParam}`: run parameters. -- `tags`: list of run tags. - -# Constructors - -- `MLFlowRunData(data::Dict{String,Any})` - +- `dataset_inputs::Array{DatasetInput}`: Dataset inputs to the Run. """ -struct MLFlowRunData - metrics::Dict{String,MLFlowRunDataMetric} - params::Union{Dict{String,MLFlowRunDataParam},Missing} - tags +struct RunInputs + dataset_inputs::Array{DatasetInput} end -function MLFlowRunData(data::Dict{String,Any}) - metrics = Dict{String,MLFlowRunDataMetric}() - if haskey(data, "metrics") - for metric in data["metrics"] - new_metric = MLFlowRunDataMetric(metric) - metrics[new_metric.key] = new_metric - end - end - params = Dict{String,MLFlowRunDataParam}() - if haskey(data, "params") - for param in data["params"] - new_param = MLFlowRunDataParam(param["key"], param["value"]) - params[new_param.key] = new_param - end - end - tags = haskey(data, "tags") ? data["tags"] : missing - MLFlowRunData(metrics, params, tags) -end -Base.show(io::IO, t::MLFlowRunData) = show(io, ShowCase(t, new_lines=true)) -get_params(rundata::MLFlowRunData) = rundata.params +Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) """ - MLFlowRun - -Represents an MLFlow run. - -# Fields -- `info::MLFlowRunInfo`: Run metadata. -- `data::MLFlowRunData`: Run data. - -# Constructors - -- `MLFlowRun(rundata::MLFlowRunData)` -- `MLFlowRun(runinfo::MLFlowRunInfo)` -- `MLFlowRun(info::Dict{String,Any})` -- `MLFlowRun(info::Dict{String,Any}, data::Dict{String,Any})` + Run +A single run. """ -struct MLFlowRun - info::Union{MLFlowRunInfo,Missing} - data::Union{MLFlowRunData,Missing} +struct Run + info::RunInfo + data::RunData + inputs::RunInputs end -MLFlowRun(rundata::MLFlowRunData) = - MLFlowRun(missing, rundata) -MLFlowRun(runinfo::MLFlowRunInfo) = - MLFlowRun(runinfo, missing) -MLFlowRun(info::Dict{String,Any}) = - MLFlowRun(MLFlowRunInfo(info), missing) -MLFlowRun(info::Dict{String,Any}, data::Dict{String,Any}) = - MLFlowRun(MLFlowRunInfo(info), MLFlowRunData(data)) -Base.show(io::IO, t::MLFlowRun) = show(io, ShowCase(t, new_lines=true)) -get_info(run::MLFlowRun) = run.info -get_data(run::MLFlowRun) = run.data -get_run_id(run::MLFlowRun) = get_run_id(run.info) -get_params(run::MLFlowRun) = get_params(run.data) +Base.show(io::IO, t::Run) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/tag.jl b/src/types/tag.jl new file mode 100644 index 0000000..d3b166e --- /dev/null +++ b/src/types/tag.jl @@ -0,0 +1,14 @@ +""" + Tag + +Generic tag type for MLFlow entities. + +# Fields +- `key::String`: The tag key. +- `value::String`: The tag value. +""" +struct Tag + key::String + value::String +end +Base.show(io::IO, t::Tag) = show(io, ShowCase(t, new_lines=true)) From 63825695bbc8973dd095774660b5e56aa108baa9 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Mon, 27 May 2024 00:49:47 -0500 Subject: [PATCH 02/31] Reimplementing experiment service: `createexperiment` and `getexperiment` --- src/MLFlowClient.jl | 67 +++++--------------------------- src/services/experiment.jl | 78 ++++++++++++++++++++++++++++++++++++++ src/utils.jl | 65 +++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 58 deletions(-) create mode 100644 src/services/experiment.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 18c7ddb..303468d 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -20,68 +20,19 @@ using JSON using ShowCases using FilePathsBase: AbstractPath -include("types/mlflow.jl") -export - MLFlow - +include("types/tag.jl") +include("types/enums.jl") +include("types/dataset.jl") +include("types/artifact.jl") +include("types/model_version.jl") +include("types/registered_model.jl") include("types/experiment.jl") -export - MLFlowExperiment - include("types/run.jl") -export - MLFlowRunStatus, - MLFlowRunInfo, - MLFlowRunDataMetric, - MLFlowRunDataParam, - MLFlowRunData, - MLFlowRun, - get_info, - get_data, - get_run_id, - get_params - -include("types/artifact.jl") -export - MLFlowArtifactFileInfo, - MLFlowArtifactDirInfo, - get_path, - get_size - -include("api.jl") +include("types/mlflow.jl") include("utils.jl") -export - generatefilterfromparams - generatefilterfromattributes - generatefilterfromentity_type - -include("experiments.jl") -export - createexperiment, - getexperiment, - getorcreateexperiment, - deleteexperiment, - restoreexperiment, - searchexperiments - -include("runs.jl") -export - createrun, - getrun, - updaterun, - deleterun, - searchruns - -include("loggers.jl") -export - logbatch, - logparam, - logmetric, - logartifact, - listartifacts, - settag +include("api.jl") -include("deprecated.jl") +include("services/experiment.jl") end diff --git a/src/services/experiment.jl b/src/services/experiment.jl new file mode 100644 index 0000000..608cedf --- /dev/null +++ b/src/services/experiment.jl @@ -0,0 +1,78 @@ +""" + createexperiment(instance::MLFlow; name=missing, artifact_location=missing, + tags=[]) + +Create an experiment with a name. Returns the newly created experiment. +Validates that another experiment with the same name does not already exist and +fails if another experiment with the same name already exists. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name::String`: Experiment name. This field is required. +- `artifact_location::String`: Location where all artifacts for the experiment +are stored. If not provided, the remote server will select an appropriate +default. +- `tags`: A collection of tags to set on the experiment. + +# Returns +An object of type [`Experiment`](@ref). +""" +function createexperiment(instance::MLFlow; name::String=missing, + artifact_location::String=missing, tags::Array{Dict{Any, Any}}=[]) + if ismissing(name) + name = string(UUIDs.uuid4()) + end + + try + result = mlfpost(instance, "experiments/create"; name=name, + artifact_location=artifact_location, tags=tags) + return getexperiment(instance, result["experiment_id"]) + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 + error_code = JSON.parse(String(e.response.body))["error_code"] + if error_code == MLFLOW_ERROR_CODES.RESOURCE_ALREADY_EXISTS + error("Experiment with name \"$name\" already exists") + end + end + throw(e) + end +end +createexperiment(instance::MLFlow; name::String=missing, + artifact_location::String=missing, tags::Array{Pair{Any, Any}}=[]) = + createexperiment(instance, name=name, artifact_location=artifact_location, + tags=tags |> transform_pair_array_to_dict_array) +createexperiment(instance::MLFlow; name::String=missing, + artifact_location::String=missing, tags::Dict{Any, Any}=[]) = + createexperiment(instance, name=name, artifact_location=artifact_location, + tags=tags |> transform_dict_to_dict_array) +createexperiment(instance::MLFlow; name::String=missing, + artifact_location::String=missing, tags::Array{Tag}=[]) = + createexperiment(instance, name=name, artifact_location=artifact_location, + tags=tags |> transform_tag_array_to_dict_array) + +""" + getexperiment(instance::MLFlow, experiment_id::String) + +Get metadata for an experiment. This method works on deleted experiments. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: ID of the associated experiment. + +# Returns +An object of type [`Experiment`](@ref). +""" +function getexperiment(instance::MLFlow, experiment_id::String) + try + arguments = (:experiment_id => experiment_id,) + result = mlfget(instance, "experiments/get"; arguments...) + return Experiment(result["experiment"]) + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 + return missing + end + throw(e) + end +end +getexperiment(instance::MLFlow, experiment_id::Integer) = + getexperiment(instance, experiment_id) diff --git a/src/utils.jl b/src/utils.jl index bc8e225..02f7783 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -68,3 +68,68 @@ const MLFLOW_ERROR_CODES = (; RESOURCE_ALREADY_EXISTS = "RESOURCE_ALREADY_EXISTS", RESOURCE_DOES_NOT_EXIST = "RESOURCE_DOES_NOT_EXIST", ) + +""" + transform_pair_array_to_dict_array(pair_array::Array{Pair{Any, Any}}) + +Transforms an array of `Pair` into an array of `Dict`. + +```@example +# Having an array of pairs +["foo" => "bar", "missy" => "gala"] + +# Will be transformed into an array of dictionaries +[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] +``` +""" +function transform_pair_array_to_dict_array(pair_array::Array{Pair{Any, Any}}) + dict_array = Dict{String, String}[] + for pair in pair_array + key = string(pair.first) + value = string(pair.second) + push!(dict_array, Dict(key => value)) + end + return dict_array +end + +""" + transform_dict_to_dict_array(dict::Dict{Any, Any}) + +Transforms a dictionary into an array of `Dict`. + +```@example +# Having a dictionary +Dict("foo" => "bar", "missy" => "gala") + +# Will be transformed into an array of dictionaries +[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] +``` +""" +function transform_dict_to_dict_array(dict::Dict{Any, Any}) + dict_array = Dict{String, String}[] + for (key, value) in dict + push!(dict_array, Dict(string(key) => string(value))) + end + return dict_array +end + +""" + transform_tag_array_to_dict_array(tag_array::Array{Tag}) + +Transforms an array of `Tag` into an array of `Dict`. + +```@example +# Having an array of tags +[Tag("foo", "bar"), Tag("missy", "gala")] + +# Will be transformed into an array of dictionaries +[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] +``` +""" +function transform_tag_array_to_dict_array(tag_array::Array{Tag}) + dict_array = Dict{String, String}[] + for tag in tag_array + push!(dict_array, Dict(tag.key => tag.value)) + end + return dict_array +end From dd904fa799747c565cb7900c4fc33a846ff02418 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 15 Sep 2024 07:42:05 -0500 Subject: [PATCH 03/31] Finishing with experiments endpoints (`searchexperiments` not complete) --- src/MLFlowClient.jl | 51 ++++++--- src/api.jl | 35 +++++- src/services/experiment.jl | 213 +++++++++++++++++++++++++++++++---- src/types/enums.jl | 22 ++-- src/types/experiment.jl | 4 + src/types/tag.jl | 1 + src/utils.jl | 72 ++++++------ test/runtests.jl | 9 +- test/services/experiments.jl | 167 +++++++++++++++++++++++++++ 9 files changed, 477 insertions(+), 97 deletions(-) create mode 100644 test/services/experiments.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 303468d..5ba6b8f 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -1,15 +1,3 @@ -""" - MLFlowClient - -[MLFlowClient](https://github.com/JuliaAI.jl) is a [Julia](https://julialang.org/) package for working with [MLFlow](https://mlflow.org/) using the REST [API v2.0](https://www.mlflow.org/docs/latest/rest-api.html). - -`MLFlowClient` allows you to create and manage `MLFlow` experiments, runs, and log metrics and artifacts. If you are not familiar with `MLFlow` and its concepts, please refer to [MLFlow documentation](https://mlflow.org/docs/latest/index.html). - -# Limitations - -- no authentication support. -- when storing artifacts, the assumption is that MLFlow and this library run on the same server. Artifacts are stored using plain filesystem operations. Therefore, `/mlruns` or the specified `artifact_location` must be accessible to both the MLFlow server (read), and this library (write). -""" module MLFlowClient using Dates @@ -21,18 +9,55 @@ using ShowCases using FilePathsBase: AbstractPath include("types/tag.jl") +export Tag + include("types/enums.jl") +export + ViewType, + RunStatus, + ModelVersionStatus + include("types/dataset.jl") +export + Dataset, + DatasetInput + include("types/artifact.jl") +export FileInfo + include("types/model_version.jl") +export ModelVersion + include("types/registered_model.jl") +export + RegisteredModel, + RegisteredModelAlias + include("types/experiment.jl") +export Experiment + include("types/run.jl") +export + Run, + Param, + Metric, + RunData, + RunInfo, + RunInputs + include("types/mlflow.jl") +export MLFlow include("utils.jl") include("api.jl") include("services/experiment.jl") - +export + getexperiment, + createexperiment, + deleteexperiment, + updateexperiment, + restoreexperiment, + searchexperiments, + getexperimentbyname end diff --git a/src/api.jl b/src/api.jl index fb2edf8..3f74dc4 100644 --- a/src/api.jl +++ b/src/api.jl @@ -1,11 +1,38 @@ +""" + uri(mlf::MLFlow, endpoint::String; parameters=missing) + +Retrieves an URI based on `mlf`, `endpoint`, and, optionally, `parameters`. + +# Examples +```@example +MLFlowClient.uri(mlf, "experiments/get", Dict(:experiment_id=>10)) +``` +""" +uri(mlf::MLFlow, endpoint::String; + parameters::Dict{Symbol, <:Any}=Dict{Symbol, IntOrString}()) = + URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)"; + query=parameters) + +""" + headers(mlf::MLFlow,custom_headers::AbstractDict) + +Retrieves HTTP headers based on `mlf` and merges with user-provided `custom_headers` + +# Examples +```@example +headers(mlf,Dict("Content-Type"=>"application/json")) +``` +""" +headers(mlf::MLFlow, custom_headers::AbstractDict) = merge(mlf.headers, custom_headers) + """ mlfget(mlf, endpoint; kwargs...) Performs a HTTP GET to a specified endpoint. kwargs are turned into GET params. """ function mlfget(mlf, endpoint; kwargs...) - apiuri = uri(mlf, endpoint, kwargs) - apiheaders = headers(mlf, Dict("Content-Type" => "application/json")) + apiuri = uri(mlf, endpoint; parameters=kwargs |> Dict) + apiheaders = headers(mlf, ("Content-Type" => "application/json") |> Dict) try response = HTTP.get(apiuri, apiheaders) @@ -21,7 +48,7 @@ end Performs a HTTP POST to the specified endpoint. kwargs are converted to JSON and become the POST body. """ function mlfpost(mlf, endpoint; kwargs...) - apiuri = uri(mlf, endpoint) + apiuri = uri(mlf, endpoint;) apiheaders = headers(mlf, Dict("Content-Type" => "application/json")) body = JSON.json(kwargs) @@ -31,4 +58,4 @@ function mlfpost(mlf, endpoint; kwargs...) catch e throw(e) end -end \ No newline at end of file +end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 608cedf..0e43369 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -1,6 +1,7 @@ """ - createexperiment(instance::MLFlow; name=missing, artifact_location=missing, - tags=[]) + createexperiment(instance::MLFlow; name::String="", + artifact_location::String="", + tags::Union{Dict{<:Any}, Array{<:Any}}=[]) Create an experiment with a name. Returns the newly created experiment. Validates that another experiment with the same name does not already exist and @@ -8,28 +9,31 @@ fails if another experiment with the same name already exists. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `name::String`: Experiment name. This field is required. -- `artifact_location::String`: Location where all artifacts for the experiment +- `name`: Experiment name. This field is required. +- `artifact_location`: Location where all artifacts for the experiment are stored. If not provided, the remote server will select an appropriate default. - `tags`: A collection of tags to set on the experiment. # Returns -An object of type [`Experiment`](@ref). +The ID of the newly created experiment. """ -function createexperiment(instance::MLFlow; name::String=missing, - artifact_location::String=missing, tags::Array{Dict{Any, Any}}=[]) - if ismissing(name) - name = string(UUIDs.uuid4()) +function createexperiment(instance::MLFlow; name::String="", + artifact_location::String="", + tags::Union{Dict{<:Any}, Array{<:Any}}=[])::String + if name |> isempty + name = UUIDs.uuid4() |> string end + tags = tags |> parsetags + try result = mlfpost(instance, "experiments/create"; name=name, artifact_location=artifact_location, tags=tags) - return getexperiment(instance, result["experiment_id"]) + return result["experiment_id"] catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 - error_code = JSON.parse(String(e.response.body))["error_code"] + error_code = (e.response.body |> String |> JSON.parse)["error_code"] if error_code == MLFLOW_ERROR_CODES.RESOURCE_ALREADY_EXISTS error("Experiment with name \"$name\" already exists") end @@ -37,21 +41,10 @@ function createexperiment(instance::MLFlow; name::String=missing, throw(e) end end -createexperiment(instance::MLFlow; name::String=missing, - artifact_location::String=missing, tags::Array{Pair{Any, Any}}=[]) = - createexperiment(instance, name=name, artifact_location=artifact_location, - tags=tags |> transform_pair_array_to_dict_array) -createexperiment(instance::MLFlow; name::String=missing, - artifact_location::String=missing, tags::Dict{Any, Any}=[]) = - createexperiment(instance, name=name, artifact_location=artifact_location, - tags=tags |> transform_dict_to_dict_array) -createexperiment(instance::MLFlow; name::String=missing, - artifact_location::String=missing, tags::Array{Tag}=[]) = - createexperiment(instance, name=name, artifact_location=artifact_location, - tags=tags |> transform_tag_array_to_dict_array) """ getexperiment(instance::MLFlow, experiment_id::String) + getexperiment(instance::MLFlow, experiment_id::Integer) Get metadata for an experiment. This method works on deleted experiments. @@ -66,7 +59,7 @@ function getexperiment(instance::MLFlow, experiment_id::String) try arguments = (:experiment_id => experiment_id,) result = mlfget(instance, "experiments/get"; arguments...) - return Experiment(result["experiment"]) + return result["experiment"] |> Experiment catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 return missing @@ -75,4 +68,174 @@ function getexperiment(instance::MLFlow, experiment_id::String) end end getexperiment(instance::MLFlow, experiment_id::Integer) = - getexperiment(instance, experiment_id) + getexperiment(instance, string(experiment_id)) + +""" + getexperimentbyname(instance::MLFlow, experiment_name::String) + +Get metadata for an experiment. + +This endpoint will return deleted experiments, but prefers the active +experiment if an active and deleted experiment share the same name. If multiple +deleted experiments share the same name, the API will return one of them. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_name`: Name of the associated experiment. + +# Returns +An object of type [`Experiment`](@ref). +""" +function getexperimentbyname(instance::MLFlow, experiment_name::String) + try + arguments = (:experiment_name => experiment_name,) + result = mlfget(instance, "experiments/get-by-name"; arguments...) + return result["experiment"] |> Experiment + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 + return missing + end + throw(e) + end +end + +""" + deleteexperiment(mlf::MLFlow, experiment_id::String) + deleteexperiment(mlf::MLFlow, experiment_id::Integer) + deleteexperiment(mlf::MLFlow, experiment::Experiment) + +Mark an experiment and associated metadata, runs, metrics, params, and tags for +deletion. If the experiment uses FileStore, artifacts associated with +experiment are also deleted. + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `experiment_id`: ID of the associated experiment. + +# Returns + +`true` if successful. Otherwise, raises exception. +""" +function deleteexperiment(mlf::MLFlow, experiment_id::String) + endpoint = "experiments/delete" + try + mlfpost(mlf, endpoint; experiment_id=experiment_id) + return true + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 + # experiment already deleted + return true + end + throw(e) + end +end +deleteexperiment(mlf::MLFlow, experiment_id::Integer) = + deleteexperiment(mlf, string(experiment_id)) +deleteexperiment(mlf::MLFlow, experiment::Experiment) = + deleteexperiment(mlf, experiment.experiment_id) + +""" + restoreexperiment(mlf::MLFlow, experiment_id::String) + restoreexperiment(mlf::MLFlow, experiment_id::Integer) + restoreexperiment(mlf::MLFlow, experiment::Experiment) + +Restore an experiment marked for deletion. This also restores associated +metadata, runs, metrics, params, and tags. If experiment uses FileStore, +underlying artifacts associated with experiment are also restored. + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `experiment_id`: ID of the associated experiment. + +# Returns + +`true` if successful. Otherwise, raises exception. +""" +function restoreexperiment(mlf::MLFlow, experiment_id::String) + endpoint = "experiments/restore" + try + mlfpost(mlf, endpoint; experiment_id=experiment_id) + return true + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 + error_code = JSON.parse(String(e.response.body))["error_code"] + if error_code == MLFLOW_ERROR_CODES.RESOURCE_DOES_NOT_EXIST + error("Experiment with id \"$experiment_id\" does not exist") + end + end + throw(e) + end +end +restoreexperiment(mlf::MLFlow, experiment_id::Integer) = + deleteexperiment(mlf, string(experiment_id)) +restoreexperiment(mlf::MLFlow, experiment::Experiment) = + deleteexperiment(mlf, experiment.experiment_id) + +""" + updateexperiment(mlf::MLFlow, experiment_id::String, new_name::String) + updateexperiment(mlf::MLFlow, experiment_id::Integer, new_name::String) + updateexperiment(mlf::MLFlow, experiment::Experiment, new_name::String) + +Update experiment metadata. + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `experiment_id`: ID of the associated experiment. +- `new_name`: If provided, the experiment’s name is changed to the new name. +The new name must be unique. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function updateexperiment(mlf::MLFlow, experiment_id::String, new_name::String) + endpoint = "experiments/update" + try + mlfpost(mlf, endpoint; experiment_id=experiment_id, new_name=new_name) + return true + catch e + throw(e) + end +end +updateexperiment(mlf::MLFlow, experiment_id::Integer, new_name::String) = + updateexperiment(mlf, string(experiment_id), new_name) +updateexperiment(mlf::MLFlow, experiment::Experiment, new_name::String) = + updateexperiment(mlf, experiment.experiment_id, new_name::String) + +""" + searchexperiments(mlf::MLFlow; max_results::Integer=20000, + page_token::String="", filter::String="", order_by::Array{String}=[], + view_type::ViewType=ACTIVE_ONLY) + +# Arguments +- `mlf`: [`MLFlow`](@ref) configuration. +- `max_results`: Maximum number of experiments desired. +- `page_token`: Token indicating the page of experiments to fetch. +- `filter`: A filter expression over experiment attributes and tags that allows +returning a subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). +- `order_by`: List of columns for ordering search results, which can include +experiment name and id with an optional “DESC” or “ASC” annotation, where “ASC” +is the default. +- `view_type`: Qualifier for type of experiments to be returned. If +unspecified, return only active experiments. + +# Returns +- vector of [`MLFlowExperiment`](@ref) experiments that were found in the MLFlow instance +""" +function searchexperiments(mlf::MLFlow; max_results::Integer=20000, + page_token::String="", filter::String="", order_by::Array{String}=String[], + view_type::ViewType=ACTIVE_ONLY) + endpoint = "experiments/search" + parameters = (; max_results, page_token, filter, + :view_type => view_type |> Integer) + + if order_by |> !isempty + parameters = (; order_by, parameters...) + end + + try + result = mlfget(mlf, endpoint; parameters...) + return result["experiments"] |> (x -> [Experiment(y) for y in x]) + catch e + throw(e) + end +end diff --git a/src/types/enums.jl b/src/types/enums.jl index 902219c..6009fcd 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -8,9 +8,9 @@ server performs background tasks. - `READY`: Model version is ready for use. """ @enum ModelVersionStatus begin - PENDING_REGISTRATION - FAILED_REGISTRATION - READY + PENDING_REGISTRATION=1 + FAILED_REGISTRATION=2 + READY=3 end """ @@ -26,11 +26,11 @@ Status of a run. - `KILLED`: Run killed by user. """ @enum RunStatus begin - RUNNING - SCHEDULED - FINISHED - FAILED - KILLED + RUNNING=1 + SCHEDULED=2 + FINISHED=3 + FAILED=4 + KILLED=5 end """ @@ -44,7 +44,7 @@ View type for ListExperiments query. - `ALL`: Get all experiments. """ @enum ViewType begin - ACTIVE_ONLY - DELETED_ONLY - ALL + ACTIVE_ONLY=1 + DELETED_ONLY=2 + ALL=3 end diff --git a/src/types/experiment.jl b/src/types/experiment.jl index 093c11c..b59dbd4 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -21,4 +21,8 @@ struct Experiment creation_time::Int64 tags::Array{Tag} end +Experiment(data::Dict{String, Any}) = Experiment(data["experiment_id"], + data["name"], data["artifact_location"], data["lifecycle_stage"], + data["last_update_time"], data["creation_time"], + [Tag(tag) for tag in get(data, "tags", [])]) Base.show(io::IO, t::Experiment) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/tag.jl b/src/types/tag.jl index d3b166e..6b56a8b 100644 --- a/src/types/tag.jl +++ b/src/types/tag.jl @@ -11,4 +11,5 @@ struct Tag key::String value::String end +Tag(data::Dict{String, Any}) = Tag(data["key"], data["value"]) Base.show(io::IO, t::Tag) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/utils.jl b/src/utils.jl index 02f7783..85e8d61 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,30 +1,4 @@ -""" - uri(mlf::MLFlow, endpoint="", query=missing) - -Retrieves an URI based on `mlf`, `endpoint`, and, optionally, `query`. - -# Examples -```@example -MLFlowClient.uri(mlf, "experiments/get", Dict(:experiment_id=>10)) -``` -""" -function uri(mlf::MLFlow, endpoint="", query=missing) - u = URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)") - !ismissing(query) && return URI(u; query=query) - u -end - -""" - headers(mlf::MLFlow,custom_headers::AbstractDict) - -Retrieves HTTP headers based on `mlf` and merges with user-provided `custom_headers` - -# Examples -```@example -headers(mlf,Dict("Content-Type"=>"application/json")) -``` -""" -headers(mlf::MLFlow, custom_headers::AbstractDict) = merge(mlf.headers, custom_headers) +IntOrString = Union{Int, String} """ generatefilterfromentity_type(filter_params::AbstractDict{K,V}, entity_type::String) where {K,V} @@ -70,9 +44,10 @@ const MLFLOW_ERROR_CODES = (; ) """ - transform_pair_array_to_dict_array(pair_array::Array{Pair{Any, Any}}) + pairtags_to_dictarray(pair_array::Array{Pair{Any, Any}}) -Transforms an array of `Pair` into an array of `Dict`. +Transforms an array of `Pair` tags into an array of MLFlow compatible `Dict` +format tags. ```@example # Having an array of pairs @@ -82,18 +57,19 @@ Transforms an array of `Pair` into an array of `Dict`. [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] ``` """ -function transform_pair_array_to_dict_array(pair_array::Array{Pair{Any, Any}}) - dict_array = Dict{String, String}[] +function pairtags_to_dictarray(pair_array::Array{<:Pair})::Array{<:Dict} + dict_array = Dict[] for pair in pair_array key = string(pair.first) value = string(pair.second) - push!(dict_array, Dict(key => value)) + push!(dict_array, Dict("key" => key, "value" => value)) end + return dict_array end """ - transform_dict_to_dict_array(dict::Dict{Any, Any}) + tagsdict_to_dictarray(dict::Dict{Any, Any}) Transforms a dictionary into an array of `Dict`. @@ -105,16 +81,18 @@ Dict("foo" => "bar", "missy" => "gala") [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] ``` """ -function transform_dict_to_dict_array(dict::Dict{Any, Any}) - dict_array = Dict{String, String}[] +function tagsdict_to_dictarray(dict::Dict{<:Any})::Array{<:Dict} + dict_array = Dict[] for (key, value) in dict - push!(dict_array, Dict(string(key) => string(value))) + push!(dict_array, Dict("key" => key |> string, + "value" => value |> string)) end + return dict_array end """ - transform_tag_array_to_dict_array(tag_array::Array{Tag}) + tagarray_to_dictarray(tag_array::Array{Tag}) Transforms an array of `Tag` into an array of `Dict`. @@ -126,10 +104,24 @@ Transforms an array of `Tag` into an array of `Dict`. [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] ``` """ -function transform_tag_array_to_dict_array(tag_array::Array{Tag}) - dict_array = Dict{String, String}[] +function tagarray_to_dictarray(tag_array::Array{Tag})::Array{<:Dict} + dict_array = Dict[] for tag in tag_array - push!(dict_array, Dict(tag.key => tag.value)) + push!(dict_array, Dict("key" => tag.key , "value" => tag.value)) end + return dict_array end + +function parsetags(tags::Union{Dict{<:Any}, Array{<:Any}})::Array{<:Dict} + parsed_tags = Dict[] + if tags isa Array{Tag} + parsed_tags = tags |> tagarray_to_dictarray + elseif tags isa Array{<:Pair} + parsed_tags = tags |> pairtags_to_dictarray + elseif tags isa Dict{<:Any} + parsed_tags = tags |> tagsdict_to_dictarray + end + + return parsed_tags +end diff --git a/test/runtests.jl b/test/runtests.jl index 3f35682..5515ffd 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,8 @@ end include("base.jl") -include("test_functional.jl") -include("test_experiments.jl") -include("test_runs.jl") -include("test_loggers.jl") +include("services/experiments.jl") +# include("test_functional.jl") +# include("test_experiments.jl") +# include("test_runs.jl") +# include("test_loggers.jl") diff --git a/test/services/experiments.jl b/test/services/experiments.jl new file mode 100644 index 0000000..10bf98d --- /dev/null +++ b/test/services/experiments.jl @@ -0,0 +1,167 @@ +@testset verbose = true "create experiment" begin + @ensuremlf + + @testset "base" begin + experiment_id = createexperiment(mlf) + @test isa(experiment_id, String) + deleteexperiment(mlf, experiment_id) + end + + @testset "name exists" begin + experiment_id = createexperiment(mlf) + @test_throws ErrorException createexperiment(mlf; name=exp.name) + deleteexperiment(mlf, experiment_id) + end + + @testset "with tags as array of tags" begin + experiment_id = createexperiment(mlf; + tags=[Tag("test_key", "test_value")]) + deleteexperiment(mlf, experiment_id) + end + + @testset "with tags as array of pairs" begin + experiment_id = createexperiment(mlf; + tags=["test_key" => "test_value"]) + deleteexperiment(mlf, experiment_id) + end + + @testset "with tags as array of dicts" begin + experiment_id = createexperiment(mlf; + tags=[Dict("key" => "test_key", "value" => "test_value")]) + deleteexperiment(mlf, experiment_id) + end + + @testset "with tags as dict" begin + experiment_id = createexperiment(mlf; + tags=Dict("test_key" => "test_value")) + deleteexperiment(mlf, experiment_id) + end +end + +@testset verbose = true "get experiment" begin + @ensuremlf + experiment_name = "test_name" + artifact_location="test_location" + tags = [Tag("test_key", "test_value")] + experiment_id = createexperiment(mlf; name=experiment_name, + artifact_location=artifact_location, tags=tags) + + @testset "using string id" begin + experiment = getexperiment(mlf, experiment_id) + @test isa(experiment, Experiment) + @test experiment.experiment_id == experiment_id + @test experiment.name == experiment_name + @test occursin(artifact_location, experiment.artifact_location) + @test (experiment.tags |> first).key == (tags |> first).key + @test (experiment.tags |> first).value == (tags |> first).value + end + + @testset "using integer id" begin + experiment = getexperiment(mlf, parse(Int, experiment_id)) + @test isa(experiment, Experiment) + end + + @testset "using name" begin + experiment = getexperimentbyname(mlf, experiment_name) + @test isa(experiment, Experiment) + end + + @testset "not found" begin + @test isa(getexperiment(mlf, 123), Missing) + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "delete experiment" begin + @ensuremlf + experiment_id = createexperiment(mlf) + + @testset "using string id" begin + @test deleteexperiment(mlf, experiment_id) + restoreexperiment(mlf, experiment_id) + end + + @testset "using integer id" begin + @test deleteexperiment(mlf, parse(Int, experiment_id)) + restoreexperiment(mlf, experiment_id) + end + + @testset "using Experiment" begin + experiment = getexperiment(mlf, experiment_id) + @test deleteexperiment(mlf, experiment) + restoreexperiment(mlf, experiment_id) + end + + @testset "delete already deleted" begin + deleteexperiment(mlf, experiment_id) + @test deleteexperiment(mlf, experiment_id) + end +end + +@testset verbose = true "restore experiment" begin + @ensuremlf + experiment_id = createexperiment(mlf) + + @testset "using string id" begin + deleteexperiment(mlf, experiment_id) + @test restoreexperiment(mlf, experiment_id) + end + + @testset "using integer id" begin + deleteexperiment(mlf, experiment_id) + @test restoreexperiment(mlf, parse(Int, experiment_id)) + end + + @testset "using Experiment" begin + experiment = getexperiment(mlf, experiment_id) + deleteexperiment(mlf, experiment_id) + @test restoreexperiment(mlf, experiment) + end +end + +@testset verbose = true "update experiment" begin + @ensuremlf + experiment_name = "test_name" + experiment_id = createexperiment(mlf; name=experiment_name) + + @testset "update name with string id" begin + new_name = "new_name_str" + updateexperiment(mlf, experiment_id, new_name) + experiment = getexperiment(mlf, experiment_id) + @test experiment.name == new_name + end + + @testset "update name with integer id" begin + new_name = "new_name_int" + updateexperiment(mlf, parse(Int, experiment_id), new_name) + experiment = getexperiment(mlf, experiment_id) + @test experiment.name == new_name + end + + @testset "update name with Experiment" begin + new_name = "new_name_exp" + experiment = getexperiment(mlf, experiment_id) + updateexperiment(mlf, experiment, new_name) + experiment = getexperiment(mlf, experiment_id) + @test experiment.name == new_name + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "search experiments" begin + @ensuremlf + + experiment_ids = [ + createexperiment(mlf; name="missy"), + createexperiment(mlf; name="gala"), + createexperiment(mlf; name="bizcochito")] + + @testset "default search" begin + experiments = searchexperiments(mlf) + @test length(experiments) == 4 # four because of the default experiment + end + + experiment_ids .|> (id -> deleteexperiment(mlf, id)) +end From 6a96cf1eb4156fa3e568d84f006c973be9610d2a Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 15 Sep 2024 16:38:55 -0500 Subject: [PATCH 04/31] Some fixes on experiments. Adding the run service and misc additions --- src/MLFlowClient.jl | 5 + src/deprecated.jl | 12 -- src/services/experiment.jl | 89 +++++++-------- src/services/run.jl | 33 ++++++ src/types/dataset.jl | 9 +- src/types/enums.jl | 1 + src/types/run.jl | 15 ++- src/utils.jl | 38 ------- test/runtests.jl | 4 +- .../{experiments.jl => experiment.jl} | 55 +++++---- test/services/run.jl | 14 +++ test/test_experiments.jl | 104 ------------------ 12 files changed, 154 insertions(+), 225 deletions(-) delete mode 100644 src/deprecated.jl create mode 100644 src/services/run.jl rename test/services/{experiments.jl => experiment.jl} (72%) create mode 100644 test/services/run.jl delete mode 100644 test/test_experiments.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 5ba6b8f..f4be2c9 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -60,4 +60,9 @@ export restoreexperiment, searchexperiments, getexperimentbyname + +include("services/run.jl") +export + createrun + end diff --git a/src/deprecated.jl b/src/deprecated.jl deleted file mode 100644 index 85b9d12..0000000 --- a/src/deprecated.jl +++ /dev/null @@ -1,12 +0,0 @@ -""" - listexperiments(mlf::MLFlow) - -Returns a list of MLFlow experiments. - -Deprecated (last MLFlow version: 1.30.1) in favor of [`searchexperiments`](@ref). -""" - -function listexperiments(mlf::MLFlow) - endpoint = "experiments/list" - mlfget(mlf, endpoint) -end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 0e43369..75a5ff4 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -1,5 +1,5 @@ """ - createexperiment(instance::MLFlow; name::String="", + createexperiment(instance::MLFlow, name::String; artifact_location::String="", tags::Union{Dict{<:Any}, Array{<:Any}}=[]) @@ -18,13 +18,9 @@ default. # Returns The ID of the newly created experiment. """ -function createexperiment(instance::MLFlow; name::String="", - artifact_location::String="", +function createexperiment(instance::MLFlow, name::String; + artifact_location::Union{String, Missing}=missing, tags::Union{Dict{<:Any}, Array{<:Any}}=[])::String - if name |> isempty - name = UUIDs.uuid4() |> string - end - tags = tags |> parsetags try @@ -100,26 +96,26 @@ function getexperimentbyname(instance::MLFlow, experiment_name::String) end """ - deleteexperiment(mlf::MLFlow, experiment_id::String) - deleteexperiment(mlf::MLFlow, experiment_id::Integer) - deleteexperiment(mlf::MLFlow, experiment::Experiment) + deleteexperiment(instance::MLFlow, experiment_id::String) + deleteexperiment(instance::MLFlow, experiment_id::Integer) + deleteexperiment(instance::MLFlow, experiment::Experiment) Mark an experiment and associated metadata, runs, metrics, params, and tags for deletion. If the experiment uses FileStore, artifacts associated with experiment are also deleted. # Arguments -- `mlf`: [`MLFlow`](@ref) configuration. +- `instance`: [`MLFlow`](@ref) configuration. - `experiment_id`: ID of the associated experiment. # Returns `true` if successful. Otherwise, raises exception. """ -function deleteexperiment(mlf::MLFlow, experiment_id::String) +function deleteexperiment(instance::MLFlow, experiment_id::String) endpoint = "experiments/delete" try - mlfpost(mlf, endpoint; experiment_id=experiment_id) + mlfpost(instance, endpoint; experiment_id=experiment_id) return true catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 @@ -129,32 +125,32 @@ function deleteexperiment(mlf::MLFlow, experiment_id::String) throw(e) end end -deleteexperiment(mlf::MLFlow, experiment_id::Integer) = - deleteexperiment(mlf, string(experiment_id)) -deleteexperiment(mlf::MLFlow, experiment::Experiment) = - deleteexperiment(mlf, experiment.experiment_id) +deleteexperiment(instance::MLFlow, experiment_id::Integer) = + deleteexperiment(instance, string(experiment_id)) +deleteexperiment(instance::MLFlow, experiment::Experiment) = + deleteexperiment(instance, experiment.experiment_id) """ - restoreexperiment(mlf::MLFlow, experiment_id::String) - restoreexperiment(mlf::MLFlow, experiment_id::Integer) - restoreexperiment(mlf::MLFlow, experiment::Experiment) + restoreexperiment(instance::MLFlow, experiment_id::String) + restoreexperiment(instance::MLFlow, experiment_id::Integer) + restoreexperiment(instance::MLFlow, experiment::Experiment) Restore an experiment marked for deletion. This also restores associated metadata, runs, metrics, params, and tags. If experiment uses FileStore, underlying artifacts associated with experiment are also restored. # Arguments -- `mlf`: [`MLFlow`](@ref) configuration. +- `instance`: [`MLFlow`](@ref) configuration. - `experiment_id`: ID of the associated experiment. # Returns `true` if successful. Otherwise, raises exception. """ -function restoreexperiment(mlf::MLFlow, experiment_id::String) +function restoreexperiment(instance::MLFlow, experiment_id::String) endpoint = "experiments/restore" try - mlfpost(mlf, endpoint; experiment_id=experiment_id) + mlfpost(instance, endpoint; experiment_id=experiment_id) return true catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 @@ -166,20 +162,22 @@ function restoreexperiment(mlf::MLFlow, experiment_id::String) throw(e) end end -restoreexperiment(mlf::MLFlow, experiment_id::Integer) = - deleteexperiment(mlf, string(experiment_id)) -restoreexperiment(mlf::MLFlow, experiment::Experiment) = - deleteexperiment(mlf, experiment.experiment_id) +restoreexperiment(instance::MLFlow, experiment_id::Integer) = + deleteexperiment(instance, string(experiment_id)) +restoreexperiment(instance::MLFlow, experiment::Experiment) = + deleteexperiment(instance, experiment.experiment_id) """ - updateexperiment(mlf::MLFlow, experiment_id::String, new_name::String) - updateexperiment(mlf::MLFlow, experiment_id::Integer, new_name::String) - updateexperiment(mlf::MLFlow, experiment::Experiment, new_name::String) + updateexperiment(instance::MLFlow, experiment_id::String, new_name::String) + updateexperiment(instance::MLFlow, experiment_id::Integer, + new_name::String) + updateexperiment(instance::MLFlow, experiment::Experiment, + new_name::String) Update experiment metadata. # Arguments -- `mlf`: [`MLFlow`](@ref) configuration. +- `instance`: [`MLFlow`](@ref) configuration. - `experiment_id`: ID of the associated experiment. - `new_name`: If provided, the experiment’s name is changed to the new name. The new name must be unique. @@ -187,27 +185,28 @@ The new name must be unique. # Returns `true` if successful. Otherwise, raises exception. """ -function updateexperiment(mlf::MLFlow, experiment_id::String, new_name::String) +function updateexperiment(instance::MLFlow, experiment_id::String, + new_name::String) endpoint = "experiments/update" try - mlfpost(mlf, endpoint; experiment_id=experiment_id, new_name=new_name) + mlfpost(instance, endpoint; experiment_id=experiment_id, new_name=new_name) return true catch e throw(e) end end -updateexperiment(mlf::MLFlow, experiment_id::Integer, new_name::String) = - updateexperiment(mlf, string(experiment_id), new_name) -updateexperiment(mlf::MLFlow, experiment::Experiment, new_name::String) = - updateexperiment(mlf, experiment.experiment_id, new_name::String) +updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) = + updateexperiment(instance, string(experiment_id), new_name) +updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) = + updateexperiment(instance, experiment.experiment_id, new_name::String) """ - searchexperiments(mlf::MLFlow; max_results::Integer=20000, + searchexperiments(instance::MLFlow; max_results::Integer=20000, page_token::String="", filter::String="", order_by::Array{String}=[], view_type::ViewType=ACTIVE_ONLY) # Arguments -- `mlf`: [`MLFlow`](@ref) configuration. +- `instance`: [`MLFlow`](@ref) configuration. - `max_results`: Maximum number of experiments desired. - `page_token`: Token indicating the page of experiments to fetch. - `filter`: A filter expression over experiment attributes and tags that allows @@ -221,9 +220,9 @@ unspecified, return only active experiments. # Returns - vector of [`MLFlowExperiment`](@ref) experiments that were found in the MLFlow instance """ -function searchexperiments(mlf::MLFlow; max_results::Integer=20000, +function searchexperiments(instance::MLFlow; max_results::Integer=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], - view_type::ViewType=ACTIVE_ONLY) + view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment}, Union{String, Nothing}} endpoint = "experiments/search" parameters = (; max_results, page_token, filter, :view_type => view_type |> Integer) @@ -233,8 +232,12 @@ function searchexperiments(mlf::MLFlow; max_results::Integer=20000, end try - result = mlfget(mlf, endpoint; parameters...) - return result["experiments"] |> (x -> [Experiment(y) for y in x]) + result = mlfget(instance, endpoint; parameters...) + + experiments = result["experiments"] |> (x -> [Experiment(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return experiments, next_page_token catch e throw(e) end diff --git a/src/services/run.jl b/src/services/run.jl new file mode 100644 index 0000000..65cb9b4 --- /dev/null +++ b/src/services/run.jl @@ -0,0 +1,33 @@ +""" + createrun(instance::MLFlow, experiment_id::String; + run_name::Union{String, Missing}=missing, + start_time::Union{Integer, Missing}=missing, + tags::Union{Dict{<:Any}, Array{<:Any}}=[]) + +Create a new run within an experiment. A run is usually a single execution of a +machine learning or data ETL pipeline. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: ID of the associated experiment. +- `run_name`: Name of the run. +- `start_time`: Unix timestamp in milliseconds of when the run started. +- `tags`: Additional metadata for run. + +# Returns +An instance of type [`Run`](@ref). +""" +function createrun(instance::MLFlow, experiment_id::String; + run_name::Union{String, Missing}=missing, + start_time::Union{Integer, Missing}=missing, + tags::Union{Dict{<:Any}, Array{<:Any}}=[]) + tags = tags |> parsetags + + try + result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, + run_name=run_name, start_time=start_time, tags=tags) + return result["run"] |> Run + catch e + throw(e) + end +end diff --git a/src/types/dataset.jl b/src/types/dataset.jl index 7bb028c..37380ed 100644 --- a/src/types/dataset.jl +++ b/src/types/dataset.jl @@ -24,8 +24,9 @@ struct Dataset schema::Union{String, Nothing} profile::Union{String, Nothing} end -Dataset(name, digest, source_type, source; schema=nothing, profile=nothing) = - Dataset(name, digest, source_type, source, schema, profile) +Dataset(data::Dict{String, Any}) = Dataset( + data["name"], data["digest"], data["source_type"], data["source"], + get(data, "schema", nothing), get(data, "profile", nothing)) Base.show(io::IO, t::Dataset) = show(io, ShowCase(t, new_lines=true)) """ @@ -45,5 +46,7 @@ struct DatasetInput tags::Array{Tag} dataset::Dataset end -DatasetInput(dataset; tags=[]) = DatasetInput(tags, dataset) +DatasetInput(data::Dict{String, Any}) = DatasetInput( + [Tag(tag) for tag in get(data, "tags", [])], + Dataset(data["dataset"])) Base.show(io::IO, t::DatasetInput) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/enums.jl b/src/types/enums.jl index 6009fcd..fc2cbd5 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -32,6 +32,7 @@ Status of a run. FAILED=4 KILLED=5 end +RunStatus(status::String) = Dict(value => key for (key, value) in RunStatus |> Base.Enums.namemap)[status |> Symbol] |> RunStatus """ ViewType diff --git a/src/types/run.jl b/src/types/run.jl index a18331c..49aa4fe 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -57,10 +57,14 @@ struct RunInfo experiment_id::String status::RunStatus start_time::Int64 - end_time::Int64 + end_time::Union{Int64, Nothing} artifact_uri::String lifecycle_stage::String end +RunInfo(data::Dict{String, Any}) = RunInfo(data["run_id"], data["run_name"], + data["experiment_id"], RunStatus(data["status"]), data["start_time"], + get(data, "end_time", nothing), data["artifact_uri"], + data["lifecycle_stage"]) Base.show(io::IO, t::RunInfo) = show(io, ShowCase(t, new_lines=true)) """ @@ -78,6 +82,10 @@ struct RunData params::Array{Param} tags::Array{Tag} end +RunData(data::Dict{String, Any}) = RunData( + [Metric(metric) for metric in get(data, "metrics", [])], + [Param(param) for param in get(data, "params", [])], + [Tag(tag) for tag in get(data, "tags", [])]) Base.show(io::IO, t::RunData) = show(io, ShowCase(t, new_lines=true)) """ @@ -91,6 +99,9 @@ Run inputs. struct RunInputs dataset_inputs::Array{DatasetInput} end +RunInputs(data::Dict{String, Any}) = RunInputs( + [DatasetInput(dataset_input) for dataset_input in + get(data, "dataset_inputs", [])]) Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) """ @@ -103,4 +114,6 @@ struct Run data::RunData inputs::RunInputs end +Run(data::Dict{String, Any}) = Run(RunInfo(data["info"]), + RunData(data["data"]), RunInputs(data["inputs"])) Base.show(io::IO, t::Run) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/utils.jl b/src/utils.jl index 85e8d61..25cdacc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,43 +1,5 @@ IntOrString = Union{Int, String} -""" - generatefilterfromentity_type(filter_params::AbstractDict{K,V}, entity_type::String) where {K,V} - -Generates a `filter` string from `filter_params` dictionary and `entity_type`. - -# Arguments -- `filter_params`: dictionary to use for filter generation. -- `entity_type`: entity type to use for filter generation. - -# Returns -A string that can be passed as `filter` to [`searchruns`](@ref). - -# Examples - -```@example -generatefilterfromentity_type(Dict("paramkey1" => "paramvalue1", "paramkey2" => "paramvalue2"), "param") -``` -""" -function generatefilterfromentity_type(filter_params::AbstractDict{K,V}, entity_type::String) where {K,V} - length(filter_params) > 0 || return "" - # NOTE: may have issues with escaping. - filters = ["$(entity_type).\"$(k)\" = \"$(v)\"" for (k, v) ∈ filter_params] - join(filters, " and ") -end - -""" - generatefilterfromparams(filter_params::AbstractDict{K,V}) where {K,V} - -Generates a `filter` string from `filter_params` dictionary and `param` entity type. -""" -generatefilterfromparams(filter_params::AbstractDict{K,V}) where {K,V} = generatefilterfromentity_type(filter_params, "param") -""" - generatefilterfrommattributes(filter_attributes::AbstractDict{K,V}) where {K,V} - -Generates a `filter` string from `filter_attributes` dictionary and `attribute` entity type. -""" -generatefilterfromattributes(filter_attributes::AbstractDict{K,V}) where {K,V} = generatefilterfromentity_type(filter_attributes, "attribute") - const MLFLOW_ERROR_CODES = (; RESOURCE_ALREADY_EXISTS = "RESOURCE_ALREADY_EXISTS", RESOURCE_DOES_NOT_EXIST = "RESOURCE_DOES_NOT_EXIST", diff --git a/test/runtests.jl b/test/runtests.jl index 5515ffd..967e74b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,8 +4,8 @@ end include("base.jl") -include("services/experiments.jl") +include("services/experiment.jl") +include("services/run.jl") # include("test_functional.jl") -# include("test_experiments.jl") # include("test_runs.jl") # include("test_loggers.jl") diff --git a/test/services/experiments.jl b/test/services/experiment.jl similarity index 72% rename from test/services/experiments.jl rename to test/services/experiment.jl index 10bf98d..fc755fa 100644 --- a/test/services/experiments.jl +++ b/test/services/experiment.jl @@ -1,38 +1,39 @@ @testset verbose = true "create experiment" begin @ensuremlf + experiment_name = UUIDs.uuid4() |> string + @testset "base" begin - experiment_id = createexperiment(mlf) + experiment_id = createexperiment(mlf, experiment_name) @test isa(experiment_id, String) - deleteexperiment(mlf, experiment_id) end @testset "name exists" begin - experiment_id = createexperiment(mlf) - @test_throws ErrorException createexperiment(mlf; name=exp.name) - deleteexperiment(mlf, experiment_id) + experiment = getexperimentbyname(mlf, experiment_name) + @test_throws ErrorException createexperiment(mlf, experiment.name) + deleteexperiment(mlf, experiment.experiment_id) end @testset "with tags as array of tags" begin - experiment_id = createexperiment(mlf; + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; tags=[Tag("test_key", "test_value")]) deleteexperiment(mlf, experiment_id) end @testset "with tags as array of pairs" begin - experiment_id = createexperiment(mlf; + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; tags=["test_key" => "test_value"]) deleteexperiment(mlf, experiment_id) end @testset "with tags as array of dicts" begin - experiment_id = createexperiment(mlf; + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; tags=[Dict("key" => "test_key", "value" => "test_value")]) deleteexperiment(mlf, experiment_id) end @testset "with tags as dict" begin - experiment_id = createexperiment(mlf; + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; tags=Dict("test_key" => "test_value")) deleteexperiment(mlf, experiment_id) end @@ -40,10 +41,10 @@ end @testset verbose = true "get experiment" begin @ensuremlf - experiment_name = "test_name" + experiment_name = UUIDs.uuid4() |> string artifact_location="test_location" tags = [Tag("test_key", "test_value")] - experiment_id = createexperiment(mlf; name=experiment_name, + experiment_id = createexperiment(mlf, experiment_name; artifact_location=artifact_location, tags=tags) @testset "using string id" begin @@ -75,7 +76,7 @@ end @testset verbose = true "delete experiment" begin @ensuremlf - experiment_id = createexperiment(mlf) + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) @testset "using string id" begin @test deleteexperiment(mlf, experiment_id) @@ -101,7 +102,7 @@ end @testset verbose = true "restore experiment" begin @ensuremlf - experiment_id = createexperiment(mlf) + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) @testset "using string id" begin deleteexperiment(mlf, experiment_id) @@ -122,25 +123,25 @@ end @testset verbose = true "update experiment" begin @ensuremlf - experiment_name = "test_name" - experiment_id = createexperiment(mlf; name=experiment_name) + experiment_name = UUIDs.uuid4() |> string + experiment_id = createexperiment(mlf, experiment_name) @testset "update name with string id" begin - new_name = "new_name_str" + new_name = UUIDs.uuid4() |> string updateexperiment(mlf, experiment_id, new_name) experiment = getexperiment(mlf, experiment_id) @test experiment.name == new_name end @testset "update name with integer id" begin - new_name = "new_name_int" + new_name = UUIDs.uuid4() |> string updateexperiment(mlf, parse(Int, experiment_id), new_name) experiment = getexperiment(mlf, experiment_id) @test experiment.name == new_name end @testset "update name with Experiment" begin - new_name = "new_name_exp" + new_name = UUIDs.uuid4() |> string experiment = getexperiment(mlf, experiment_id) updateexperiment(mlf, experiment, new_name) experiment = getexperiment(mlf, experiment_id) @@ -154,13 +155,23 @@ end @ensuremlf experiment_ids = [ - createexperiment(mlf; name="missy"), - createexperiment(mlf; name="gala"), - createexperiment(mlf; name="bizcochito")] + createexperiment(mlf, UUIDs.uuid4() |> string), + createexperiment(mlf, UUIDs.uuid4() |> string), + createexperiment(mlf, UUIDs.uuid4() |> string)] @testset "default search" begin - experiments = searchexperiments(mlf) + experiments, next_page_token = searchexperiments(mlf) + @test length(experiments) == 4 # four because of the default experiment + @test next_page_token |> isnothing + end + + @testset "with pagination" begin + experiments, next_page_token = searchexperiments(mlf; max_results=1) + + @test length(experiments) == 1 + @test next_page_token |> !isnothing + @test next_page_token isa String end experiment_ids .|> (id -> deleteexperiment(mlf, id)) diff --git a/test/services/run.jl b/test/services/run.jl new file mode 100644 index 0000000..85285c8 --- /dev/null +++ b/test/services/run.jl @@ -0,0 +1,14 @@ +@testset verbose = true "create run" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + + @testset "base" begin + run = createrun(mlf, experiment_id) + + @test run isa Run + @test run.info.experiment_id == experiment_id + end + + deleteexperiment(mlf, experiment_id) +end diff --git a/test/test_experiments.jl b/test/test_experiments.jl deleted file mode 100644 index cc697f4..0000000 --- a/test/test_experiments.jl +++ /dev/null @@ -1,104 +0,0 @@ -@testset "createexperiment" begin - @ensuremlf - exp = createexperiment(mlf) - - @test isa(exp, MLFlowExperiment) - @test_throws ErrorException createexperiment(mlf; name=exp.name) - - deleteexperiment(mlf, exp) -end - -@testset verbose = true "getexperiment" begin - @ensuremlf - exp = createexperiment(mlf) - experiment = getexperiment(mlf, exp.experiment_id) - - @testset "getexperiment_by_experiment_id" begin - @test isa(experiment, MLFlowExperiment) - @test experiment.experiment_id == exp.experiment_id - end - - @testset "getexperiment_by_experiment_name" begin - experiment_by_name = getexperiment(mlf, exp.name) - @test isa(experiment_by_name, MLFlowExperiment) - @test experiment_by_name.experiment_id == exp.experiment_id - end - - @testset "getexperiment_not_found" begin - @test isa(getexperiment(mlf, 123), Missing) - end - deleteexperiment(mlf, exp) -end - -@testset "getorcreateexperiment" begin - @ensuremlf - expname = "getorcreate" - artifact_location = "test$(expname)" - e = getorcreateexperiment(mlf, expname; artifact_location=artifact_location) - @test isa(e, MLFlowExperiment) - - ee = getorcreateexperiment(mlf, expname) - @test isa(ee, MLFlowExperiment) - @test e === ee - @test occursin(artifact_location, e.artifact_location) - deleteexperiment(mlf, ee) -end - -@testset "deleteexperiment" begin - @ensuremlf - exp = createexperiment(mlf) - deleteexperiment(mlf, exp) - - experiments = searchexperiments(mlf) - @test length(experiments) == 1 # 1 for the default experiment -end - -@testset "restoreexperiment" begin - @ensuremlf - exp = createexperiment(mlf) - deleteexperiment(mlf, exp) - - experiments = searchexperiments(mlf) - @test length(experiments) == 1 # 1 for the default experiment - - restoreexperiment(mlf, exp) - experiments = searchexperiments(mlf) - @test length(experiments) == 2 # the restored experiment and the default one - - deleteexperiment(mlf, exp) -end - -@testset verbose = true "searchexperiments" begin - @ensuremlf - n_experiments = 3 - for i in 2:n_experiments - createexperiment(mlf) - end - createexperiment(mlf; name="test") - experiments = searchexperiments(mlf) - - @testset "searchexperiments_get_all" begin - @test length(experiments) == (n_experiments + 1) # Adding one for the default experiment - end - - @testset "searchexperiments_by_filter" begin - experiments_by_filter = searchexperiments(mlf; filter="name=\"test\"") - @test length(experiments_by_filter) == 1 - @test experiments_by_filter[1].name == "test" - end - - @testset "searchexperiments_by_filter_attributes" begin - experiments_by_filter = searchexperiments(mlf; filter_attributes=Dict("name" => "test")) - @test length(experiments_by_filter) == 1 - @test experiments_by_filter[1].name == "test" - end - - @testset "searchexperiments_filter_exception" begin - @test_throws ErrorException searchexperiments(mlf; filter="test", filter_attributes=Dict("test" => "test")) - end - - popfirst!(experiments) # removing the default experiment (it can't be deleted) - for e in experiments - deleteexperiment(mlf, e) - end -end From b51fae69da595b770628522a48d4d784fc85f375 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 15 Sep 2024 23:16:10 -0500 Subject: [PATCH 05/31] Uploading finished run service --- src/MLFlowClient.jl | 6 ++- src/services/experiment.jl | 11 ++--- src/services/run.jl | 87 +++++++++++++++++++++++++++++++++++++ test/services/experiment.jl | 10 +++-- test/services/run.jl | 82 +++++++++++++++++++++++++++++++++- 5 files changed, 181 insertions(+), 15 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index f4be2c9..e4c97d4 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -63,6 +63,8 @@ export include("services/run.jl") export - createrun - + createrun, + deleterun, + restorerun, + getrun end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 75a5ff4..05cfbf5 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -49,7 +49,7 @@ Get metadata for an experiment. This method works on deleted experiments. - `experiment_id`: ID of the associated experiment. # Returns -An object of type [`Experiment`](@ref). +An instance of type [`Experiment`](@ref). """ function getexperiment(instance::MLFlow, experiment_id::String) try @@ -57,9 +57,6 @@ function getexperiment(instance::MLFlow, experiment_id::String) result = mlfget(instance, "experiments/get"; arguments...) return result["experiment"] |> Experiment catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - return missing - end throw(e) end end @@ -80,7 +77,7 @@ deleted experiments share the same name, the API will return one of them. - `experiment_name`: Name of the associated experiment. # Returns -An object of type [`Experiment`](@ref). +An instance of type [`Experiment`](@ref). """ function getexperimentbyname(instance::MLFlow, experiment_name::String) try @@ -163,9 +160,9 @@ function restoreexperiment(instance::MLFlow, experiment_id::String) end end restoreexperiment(instance::MLFlow, experiment_id::Integer) = - deleteexperiment(instance, string(experiment_id)) + restoreexperiment(instance, string(experiment_id)) restoreexperiment(instance::MLFlow, experiment::Experiment) = - deleteexperiment(instance, experiment.experiment_id) + restoreexperiment(instance, experiment.experiment_id) """ updateexperiment(instance::MLFlow, experiment_id::String, new_name::String) diff --git a/src/services/run.jl b/src/services/run.jl index 65cb9b4..1baf38e 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -31,3 +31,90 @@ function createrun(instance::MLFlow, experiment_id::String; throw(e) end end +createrun(instance::MLFlow, experiment_id::Integer; + run_name::Union{String, Missing}=missing, + start_time::Union{Integer, Missing}=missing, + tags::Union{Dict{<:Any}, Array{<:Any}}=[]) = + createrun(instance, string(experiment_id); run_name=run_name, + start_time=start_time, tags=tags) +createrun(instance::MLFlow, experiment::Experiment; + run_name::Union{String, Missing}=missing, + start_time::Union{Integer, Missing}=missing, + tags::Union{Dict{<:Any}, Array{<:Any}}=[]) = + createrun(instance, string(experiment.experiment_id); run_name=run_name, + start_time=start_time, tags=tags) + +""" + deleterun(instance::MLFlow, run_id::String) + deleterun(instance::MLFlow, run::Run) + +Mark a run for deletion. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to delete. + +# Returns + +`true` if successful. Otherwise, raises exception. +""" +function deleterun(instance::MLFlow, run_id::String) + endpoint = "runs/delete" + try + mlfpost(instance, endpoint; run_id=run_id) + return true + catch e + throw(e) + end +end +deleterun(instance::MLFlow, run::Run) = deleterun(instance, run.info.run_id) + +""" + restorerun(instance::MLFlow, run_id::String) + restorerun(instance::MLFlow, run::Run) + +Restore a deleted run. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to restore. + +# Returns + +`true` if successful. Otherwise, raises exception. +""" +function restorerun(instance::MLFlow, run_id::String) + endpoint = "runs/restore" + try + mlfpost(instance, endpoint; run_id=run_id) + return true + catch e + throw(e) + end +end +restorerun(instance::MLFlow, run::Run) = restorerun(instance, run.info.run_id) + +""" + getrun(instance::MLFlow, run_id::String) + +Get metadata, metrics, params, and tags for a run. In the case where multiple +metrics with the same key are logged for a run, return only the value with the +latest timestamp. If there are multiple values with the latest timestamp, +return the maximum of these values. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to fetch. + +# Returns +An instance of type [`Run`](@ref). +""" +function getrun(instance::MLFlow, run_id::String) + try + arguments = (:run_id => run_id,) + result = mlfget(instance, "runs/get"; arguments...) + return result["run"] |> Run + catch e + throw(e) + end +end diff --git a/test/services/experiment.jl b/test/services/experiment.jl index fc755fa..176772b 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -67,10 +67,6 @@ end @test isa(experiment, Experiment) end - @testset "not found" begin - @test isa(getexperiment(mlf, 123), Missing) - end - deleteexperiment(mlf, experiment_id) end @@ -119,6 +115,12 @@ end deleteexperiment(mlf, experiment_id) @test restoreexperiment(mlf, experiment) end + + @testset "restore not found" begin + @test_throws ErrorException restoreexperiment(mlf, 123) + end + + deleteexperiment(mlf, experiment_id) end @testset verbose = true "update experiment" begin diff --git a/test/services/run.jl b/test/services/run.jl index 85285c8..20b6a03 100644 --- a/test/services/run.jl +++ b/test/services/run.jl @@ -1,14 +1,92 @@ @testset verbose = true "create run" begin @ensuremlf - experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) @testset "base" begin run = createrun(mlf, experiment_id) - @test run isa Run + @test run.info isa RunInfo + @test run.data isa RunData + @test run.inputs isa RunInputs + @test run.info.experiment_id == experiment_id + end + + @testset "with experiment id as string" begin + run = createrun(mlf, experiment_id) + @test run.info.experiment_id == experiment_id end + @testset "with experiment id as integer" begin + run = createrun(mlf, parse(Int, experiment_id)) + + @test run.info.experiment_id == experiment_id + end + + @testset "with experiment" begin + experiment = getexperiment(mlf, experiment_id) + run = createrun(mlf, experiment) + + @test run.info.experiment_id == experiment_id + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "delete run" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "using string id" begin + @test deleterun(mlf, run.info.run_id) + restorerun(mlf, run.info.run_id) + end + + @testset "using Run" begin + @test deleterun(mlf, run) + restorerun(mlf, run.info.run_id) + end + + @testset "delete already deleted" begin + deleterun(mlf, run.info.run_id) + @test deleterun(mlf, run.info.run_id) + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "restore run" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "using string id" begin + deleterun(mlf, run.info.run_id) + @test restorerun(mlf, run.info.run_id) + end + + @testset "using Run" begin + deleterun(mlf, run) + @test restorerun(mlf, run) + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "get run" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "using id" begin + retrieved_run = getrun(mlf, run.info.run_id) + + @test retrieved_run.info isa RunInfo + @test retrieved_run.data isa RunData + @test retrieved_run.inputs isa RunInputs + @test retrieved_run.info.experiment_id == experiment_id + end + deleteexperiment(mlf, experiment_id) end From 8c14210b57fec9ff1bf909cd9c04a4c3ca9b8a6f Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 22 Sep 2024 14:09:33 -0500 Subject: [PATCH 06/31] Upload W.I.P improved mlflow logging data handling --- src/MLFlowClient.jl | 12 +++-- src/services/experiment.jl | 17 +++--- src/services/loggers.jl | 44 +++++++++++++++ src/services/run.jl | 14 +++-- src/types/mlflow.jl | 4 +- src/types/run.jl | 7 ++- src/types/tag.jl | 4 +- src/utils.jl | 104 ++++++++++++------------------------ test/runtests.jl | 6 +-- test/services/experiment.jl | 31 +++++------ test/services/loggers.jl | 50 +++++++++++++++++ 11 files changed, 180 insertions(+), 113 deletions(-) create mode 100644 src/services/loggers.jl create mode 100644 test/services/loggers.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index e4c97d4..3fa839f 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -8,6 +8,9 @@ using JSON using ShowCases using FilePathsBase: AbstractPath +include("types/mlflow.jl") +export MLFlow + include("types/tag.jl") export Tag @@ -45,10 +48,8 @@ export RunInfo, RunInputs -include("types/mlflow.jl") -export MLFlow - include("utils.jl") +export refresh include("api.jl") include("services/experiment.jl") @@ -67,4 +68,9 @@ export deleterun, restorerun, getrun +include("services/loggers.jl") +export + logmetric, + logbatch + end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 05cfbf5..4708db0 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -20,12 +20,10 @@ The ID of the newly created experiment. """ function createexperiment(instance::MLFlow, name::String; artifact_location::Union{String, Missing}=missing, - tags::Union{Dict{<:Any}, Array{<:Any}}=[])::String - tags = tags |> parsetags - + tags::MLFlowUpsertData{Tag}=Tag[])::String try result = mlfpost(instance, "experiments/create"; name=name, - artifact_location=artifact_location, tags=tags) + artifact_location=artifact_location, tags=(tags |> parse)) return result["experiment_id"] catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 @@ -51,7 +49,7 @@ Get metadata for an experiment. This method works on deleted experiments. # Returns An instance of type [`Experiment`](@ref). """ -function getexperiment(instance::MLFlow, experiment_id::String) +function getexperiment(instance::MLFlow, experiment_id::String)::Experiment try arguments = (:experiment_id => experiment_id,) result = mlfget(instance, "experiments/get"; arguments...) @@ -60,7 +58,7 @@ function getexperiment(instance::MLFlow, experiment_id::String) throw(e) end end -getexperiment(instance::MLFlow, experiment_id::Integer) = +getexperiment(instance::MLFlow, experiment_id::Integer)::Experiment = getexperiment(instance, string(experiment_id)) """ @@ -79,7 +77,8 @@ deleted experiments share the same name, the API will return one of them. # Returns An instance of type [`Experiment`](@ref). """ -function getexperimentbyname(instance::MLFlow, experiment_name::String) +function getexperimentbyname(instance::MLFlow, + experiment_name::String)::Experiment try arguments = (:experiment_name => experiment_name,) result = mlfget(instance, "experiments/get-by-name"; arguments...) @@ -198,7 +197,7 @@ updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) = updateexperiment(instance, experiment.experiment_id, new_name::String) """ - searchexperiments(instance::MLFlow; max_results::Integer=20000, + searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=[], view_type::ViewType=ACTIVE_ONLY) @@ -217,7 +216,7 @@ unspecified, return only active experiments. # Returns - vector of [`MLFlowExperiment`](@ref) experiments that were found in the MLFlow instance """ -function searchexperiments(instance::MLFlow; max_results::Integer=20000, +function searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment}, Union{String, Nothing}} endpoint = "experiments/search" diff --git a/src/services/loggers.jl b/src/services/loggers.jl new file mode 100644 index 0000000..2ce153d --- /dev/null +++ b/src/services/loggers.jl @@ -0,0 +1,44 @@ +""" + logmetric(instance::MLFlow, run_id::String, key::String, value::Float64; + timestamp::Int64=round(Int, now() |> datetime2unix), + step::Union{Int64, Missing}=missing) + logmetric(instance::MLFlow, run::Run, key::String, value::Float64; + timestamp::Int64=round(Int, now() |> datetime2unix), + step::Union{Int64, Missing}=missing) + +Log a metric for a run. A metric is a key-value pair (string key, float value) +with an associated timestamp. Examples include the various metrics that +represent ML model accuracy. A metric can be logged multiple times. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run under which to log the metric. +- `key`: Name of the metric. +- `value`: Double value of the metric being logged. +- `timestamp`: Unix timestamp in milliseconds at the time metric was logged. +- `step`: Step at which to log the metric. +""" +logmetric(instance::MLFlow, run_id::String, key::String, value::Float64; + timestamp::Int64=round(Int, now() |> datetime2unix), + step::Union{Int64, Missing}=missing) = + mlfpost(instance, "runs/log-metric"; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) +logmetric(instance::MLFlow, run::Run, key::String, value::Float64; + timestamp::Int64=round(Int, now() |> datetime2unix), + step::Union{Int64, Missing}=missing) = + logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) + +""" + logbatch(instance::MLFlow, run_id::String, metrics::Array{Metric}, + params::Array{Param}, tags::Array{Tag}) + +Log a batch of metrics, params, and tags for a run. In case of error, partial +data may be written. + +For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). +""" +function logbatch(instance::MLFlow, run_id::String; + metrics::Array{Metric}=Metric[], params::Array{Param}=Param[], + tags::MLFlowUpsertData{Tag}=Tag[]) + mlfpost(instance, "runs/log-batch"; run_id=run_id, metrics=metrics, + params=params, tags=(tags |> parse)) +end diff --git a/src/services/run.jl b/src/services/run.jl index 1baf38e..2019994 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -1,7 +1,7 @@ """ createrun(instance::MLFlow, experiment_id::String; run_name::Union{String, Missing}=missing, - start_time::Union{Integer, Missing}=missing, + start_time::Union{Int64, Missing}=missing, tags::Union{Dict{<:Any}, Array{<:Any}}=[]) Create a new run within an experiment. A run is usually a single execution of a @@ -19,13 +19,11 @@ An instance of type [`Run`](@ref). """ function createrun(instance::MLFlow, experiment_id::String; run_name::Union{String, Missing}=missing, - start_time::Union{Integer, Missing}=missing, - tags::Union{Dict{<:Any}, Array{<:Any}}=[]) - tags = tags |> parsetags - + start_time::Union{Int64, Missing}=missing, + tags::MLFlowUpsertData{Tag}=Tag[]) try result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, - run_name=run_name, start_time=start_time, tags=tags) + run_name=run_name, start_time=start_time, tags=(tags |> parse)) return result["run"] |> Run catch e throw(e) @@ -34,13 +32,13 @@ end createrun(instance::MLFlow, experiment_id::Integer; run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, - tags::Union{Dict{<:Any}, Array{<:Any}}=[]) = + tags::MLFlowUpsertData{Tag}=Tag[]) = createrun(instance, string(experiment_id); run_name=run_name, start_time=start_time, tags=tags) createrun(instance::MLFlow, experiment::Experiment; run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, - tags::Union{Dict{<:Any}, Array{<:Any}}=[]) = + tags::MLFlowUpsertData{Tag}=Tag[]) = createrun(instance, string(experiment.experiment_id); run_name=run_name, start_time=start_time, tags=tags) diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index 326a0ff..34604c8 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -28,7 +28,7 @@ mlf = MLFlow(remote_url, headers=Dict("Authorization" => "Bearer string) Base.show(io::IO, t::Tag) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/utils.jl b/src/utils.jl index 25cdacc..a547ee5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,89 +1,55 @@ -IntOrString = Union{Int, String} +const IntOrString = Union{Int, String} +const MLFlowUpsertData{T} = Union{Array{T}, Dict{String, String}, + Array{Pair{String, String}}, Array{Dict{String, String}}} const MLFLOW_ERROR_CODES = (; RESOURCE_ALREADY_EXISTS = "RESOURCE_ALREADY_EXISTS", RESOURCE_DOES_NOT_EXIST = "RESOURCE_DOES_NOT_EXIST", ) -""" - pairtags_to_dictarray(pair_array::Array{Pair{Any, Any}}) - -Transforms an array of `Pair` tags into an array of MLFlow compatible `Dict` -format tags. - -```@example -# Having an array of pairs -["foo" => "bar", "missy" => "gala"] - -# Will be transformed into an array of dictionaries -[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] -``` -""" -function pairtags_to_dictarray(pair_array::Array{<:Pair})::Array{<:Dict} - dict_array = Dict[] - for pair in pair_array - key = string(pair.first) - value = string(pair.second) - push!(dict_array, Dict("key" => key, "value" => value)) +function dict_to_array(dict::Dict{String, String})::MLFlowUpsertData + tags = Tag[] + for (key, value) in dict + push!(tags, Tag(key, value)) end - return dict_array + return tags end -""" - tagsdict_to_dictarray(dict::Dict{Any, Any}) - -Transforms a dictionary into an array of `Dict`. - -```@example -# Having a dictionary -Dict("foo" => "bar", "missy" => "gala") - -# Will be transformed into an array of dictionaries -[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] -``` -""" -function tagsdict_to_dictarray(dict::Dict{<:Any})::Array{<:Dict} - dict_array = Dict[] - for (key, value) in dict - push!(dict_array, Dict("key" => key |> string, - "value" => value |> string)) +function pairsarray_to_array(pair_array::Array{<:Pair})::MLFlowUpsertData + entity_array = Tag[] + for pair in pair_array + println(pair) + key = pair.first |> string + value = pair.second |> string + push!(entity_array, Tag(key, value)) end - return dict_array + return entity_array end -""" - tagarray_to_dictarray(tag_array::Array{Tag}) - -Transforms an array of `Tag` into an array of `Dict`. - -```@example -# Having an array of tags -[Tag("foo", "bar"), Tag("missy", "gala")] - -# Will be transformed into an array of dictionaries -[Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] -``` -""" -function tagarray_to_dictarray(tag_array::Array{Tag})::Array{<:Dict} - dict_array = Dict[] - for tag in tag_array - push!(dict_array, Dict("key" => tag.key , "value" => tag.value)) +function dictarray_to_array(dict_array::Array{Dict{String, String}})::MLFlowUpsertData + tags = Tag[] + for dict in dict_array + push!(tags, Tag(dict["key"], dict["value"])) end - return dict_array + return tags end -function parsetags(tags::Union{Dict{<:Any}, Array{<:Any}})::Array{<:Dict} - parsed_tags = Dict[] - if tags isa Array{Tag} - parsed_tags = tags |> tagarray_to_dictarray - elseif tags isa Array{<:Pair} - parsed_tags = tags |> pairtags_to_dictarray - elseif tags isa Dict{<:Any} - parsed_tags = tags |> tagsdict_to_dictarray +function parse(entities::MLFlowUpsertData{T}) where T<:LoggingData + println(typeof(entities)) + if entities isa Dict{String, String} + return entities |> dict_to_array + elseif entities isa Array{Pair{String, String}} + return entities |> pairsarray_to_array + elseif entities isa Array{Dict{String, String}} + return entities |> dictarray_to_array end - - return parsed_tags + return entities end + +refresh(instance::MLFlow, experiment::Experiment)::Experiment = + getexperiment(instance, experiment.experiment_id) +refresh(instance::MLFlow, run::Run)::Run = + getrun(instance, run.info.run_id) diff --git a/test/runtests.jl b/test/runtests.jl index 967e74b..fd86f27 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,5 @@ end include("base.jl") include("services/experiment.jl") -include("services/run.jl") -# include("test_functional.jl") -# include("test_runs.jl") -# include("test_loggers.jl") +# include("services/run.jl") +# include("services/loggers.jl") diff --git a/test/services/experiment.jl b/test/services/experiment.jl index 176772b..6fe68e2 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -14,11 +14,11 @@ deleteexperiment(mlf, experiment.experiment_id) end - @testset "with tags as array of tags" begin - experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - tags=[Tag("test_key", "test_value")]) - deleteexperiment(mlf, experiment_id) - end + # @testset "with tags as array of tags" begin + # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + # tags=[Tag("test_key", "test_value")]) + # deleteexperiment(mlf, experiment_id) + # end @testset "with tags as array of pairs" begin experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; @@ -26,17 +26,17 @@ deleteexperiment(mlf, experiment_id) end - @testset "with tags as array of dicts" begin - experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - tags=[Dict("key" => "test_key", "value" => "test_value")]) - deleteexperiment(mlf, experiment_id) - end + # @testset "with tags as array of dicts" begin + # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + # tags=[Dict("key" => "test_key", "value" => "test_value")]) + # deleteexperiment(mlf, experiment_id) + # end - @testset "with tags as dict" begin - experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - tags=Dict("test_key" => "test_value")) - deleteexperiment(mlf, experiment_id) - end + # @testset "with tags as dict" begin + # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + # tags=Dict("test_key" => "test_value")) + # deleteexperiment(mlf, experiment_id) + # end end @testset verbose = true "get experiment" begin @@ -53,6 +53,7 @@ end @test experiment.experiment_id == experiment_id @test experiment.name == experiment_name @test occursin(artifact_location, experiment.artifact_location) + @test experiment.tags |> !isempty @test (experiment.tags |> first).key == (tags |> first).key @test (experiment.tags |> first).value == (tags |> first).value end diff --git a/test/services/loggers.jl b/test/services/loggers.jl new file mode 100644 index 0000000..9968951 --- /dev/null +++ b/test/services/loggers.jl @@ -0,0 +1,50 @@ +@testset verbose = true "log metric" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "with run id as string" begin + logmetric(mlf, run.info.run_id, "missy", 0.9) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + + @test last_metric isa Metric + @test last_metric.key == "missy" + @test last_metric.value == 0.9 + end + + @testset "with run" begin + logmetric(mlf, run, "gala", 0.1) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + + @test last_metric isa Metric + @test last_metric.key == "gala" + @test last_metric.value == 0.1 + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "log batch" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "with run id as string" begin + logbatch(mlf, run.info.run_id, metrics=[("gala", 0.1)]) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + + @test last_metric isa Metric + @test last_metric.key == "missy" + @test last_metric.value == 0.9 + end + + deleteexperiment(mlf, experiment_id) +end From c4bd0be3180466d1993aa4a036d01f7352cc11c0 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Tue, 29 Oct 2024 01:32:39 -0500 Subject: [PATCH 07/31] Finishing with `logbatch` functionality --- src/api.jl | 2 +- src/services/experiment.jl | 3 +- src/services/loggers.jl | 19 ++++-- src/services/run.jl | 2 +- src/types/run.jl | 8 +-- src/types/tag.jl | 2 +- src/utils.jl | 101 ++++++++++++++++++++++-------- test/runtests.jl | 4 +- test/services/experiment.jl | 30 ++++----- test/services/loggers.jl | 120 +++++++++++++++++++++++++++++++++++- 10 files changed, 235 insertions(+), 56 deletions(-) diff --git a/src/api.jl b/src/api.jl index 3f74dc4..3c13496 100644 --- a/src/api.jl +++ b/src/api.jl @@ -9,7 +9,7 @@ MLFlowClient.uri(mlf, "experiments/get", Dict(:experiment_id=>10)) ``` """ uri(mlf::MLFlow, endpoint::String; - parameters::Dict{Symbol, <:Any}=Dict{Symbol, IntOrString}()) = + parameters::Dict{Symbol, <:Any}=Dict{Symbol, NumberOrString}()) = URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)"; query=parameters) diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 4708db0..3e5f7ef 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -23,7 +23,8 @@ function createexperiment(instance::MLFlow, name::String; tags::MLFlowUpsertData{Tag}=Tag[])::String try result = mlfpost(instance, "experiments/create"; name=name, - artifact_location=artifact_location, tags=(tags |> parse)) + artifact_location=artifact_location, + tags=parse(Tag, tags)) return result["experiment_id"] catch e if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 diff --git a/src/services/loggers.jl b/src/services/loggers.jl index 2ce153d..330d162 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -28,7 +28,9 @@ logmetric(instance::MLFlow, run::Run, key::String, value::Float64; logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) """ - logbatch(instance::MLFlow, run_id::String, metrics::Array{Metric}, + logbatch(instance::MLFlow, run_id::String; metrics::Array{Metric}, + params::Array{Param}, tags::Array{Tag}) + logbatch(instance::MLFlow, run::Run; metrics::Array{Metric}, params::Array{Param}, tags::Array{Tag}) Log a batch of metrics, params, and tags for a run. In case of error, partial @@ -37,8 +39,15 @@ data may be written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). """ function logbatch(instance::MLFlow, run_id::String; - metrics::Array{Metric}=Metric[], params::Array{Param}=Param[], - tags::MLFlowUpsertData{Tag}=Tag[]) - mlfpost(instance, "runs/log-batch"; run_id=run_id, metrics=metrics, - params=params, tags=(tags |> parse)) + metrics::MLFlowUpsertData{Metric}=Metric[], + params::MLFlowUpsertData{Param}=Param[], tags::MLFlowUpsertData{Tag}=Tag[]) + mlfpost(instance, "runs/log-batch"; run_id=run_id, + metrics=parse(Metric, metrics), params=parse(Param, params), + tags=parse(Tag, tags)) end +logbatch(instance::MLFlow, run::Run; + metrics::MLFlowUpsertData{Metric}=Metric[], + params::MLFlowUpsertData{Param}=Param[], + tags::MLFlowUpsertData{Tag}=Tag[]) = + logbatch(instance, run.info.run_id; metrics=metrics, params=params, + tags=tags) diff --git a/src/services/run.jl b/src/services/run.jl index 2019994..d58984b 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -23,7 +23,7 @@ function createrun(instance::MLFlow, experiment_id::String; tags::MLFlowUpsertData{Tag}=Tag[]) try result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, - run_name=run_name, start_time=start_time, tags=(tags |> parse)) + run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) return result["run"] |> Run catch e throw(e) diff --git a/src/types/run.jl b/src/types/run.jl index 27c8fb4..dcb8581 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -1,5 +1,5 @@ """ - Metric + Metric <: LoggingData Metric associated with a run, represented as a key-value pair. @@ -7,20 +7,20 @@ Metric associated with a run, represented as a key-value pair. - `key::String`: Key identifying this metric. - `value::Float64`: Value associated with this metric. - `timestamp::Int64`: The timestamp at which this metric was recorded. -- `step::Int64`: Step at which to log the metric. +- `step::Union{Int64, Missing}`: Step at which to log the metric. """ struct Metric <: LoggingData key::String value::Float64 timestamp::Int64 - step::Int64 + step::Union{Int64, Missing} end Metric(data::Dict{String, Any}) = Metric(data["key"], data["value"], data["timestamp"], data["step"]) Base.show(io::IO, t::Metric) = show(io, ShowCase(t, new_lines=true)) """ - Param + Param <: LoggingData Param associated with a run. diff --git a/src/types/tag.jl b/src/types/tag.jl index fbd177c..5dffe63 100644 --- a/src/types/tag.jl +++ b/src/types/tag.jl @@ -1,5 +1,5 @@ """ - Tag + Tag <: LoggingData Generic tag type for MLFlow entities. diff --git a/src/utils.jl b/src/utils.jl index a547ee5..85c0e66 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,50 +1,101 @@ -const IntOrString = Union{Int, String} -const MLFlowUpsertData{T} = Union{Array{T}, Dict{String, String}, - Array{Pair{String, String}}, Array{Dict{String, String}}} +const NumberOrString = Union{Number, String} +const MLFlowUpsertData{T} = Union{ + Array{T}, + Array{<:Dict{String, <:Any}}, + Dict{String, <:NumberOrString}, + Array{<:Pair{String, <:NumberOrString}}, + Array{<:Tuple{String, <:NumberOrString}} +} const MLFLOW_ERROR_CODES = (; RESOURCE_ALREADY_EXISTS = "RESOURCE_ALREADY_EXISTS", RESOURCE_DOES_NOT_EXIST = "RESOURCE_DOES_NOT_EXIST", ) -function dict_to_array(dict::Dict{String, String})::MLFlowUpsertData - tags = Tag[] +function dict_to_T_array(::Type{T}, dict::Dict{String, <:NumberOrString}) where T<:LoggingData + entities = T[] for (key, value) in dict - push!(tags, Tag(key, value)) + if T<:Metric + push!(entities, Metric(key, Float64(value), + round(Int, now() |> datetime2unix), missing)) + else + push!(entities, T(key, value |> string)) + end end - return tags + return entities end -function pairsarray_to_array(pair_array::Array{<:Pair})::MLFlowUpsertData - entity_array = Tag[] +function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where T<:LoggingData + entities = T[] for pair in pair_array - println(pair) key = pair.first |> string - value = pair.second |> string - push!(entity_array, Tag(key, value)) + if T<:Metric + value = pair.second + push!(entities, Metric(key, Float64(value), + round(Int, now() |> datetime2unix), missing)) + else + value = pair.second |> string + push!(entities, T(key, value)) + end end - return entity_array + return entities end -function dictarray_to_array(dict_array::Array{Dict{String, String}})::MLFlowUpsertData - tags = Tag[] +function tuplearray_to_T_array(::Type{T}, + tuple_array::Array{<:Tuple{String, <:NumberOrString}}) where T<:LoggingData + entities = T[] + for tuple in tuple_array + if length(tuple) != 2 + error("Tuple must have exactly two elements (format: (key, value))") + end + + key = tuple |> first |> string + if T<: Metric + value = tuple |> last + push!(entities, Metric(key, Float64(value), + round(Int, now() |> datetime2unix), missing)) + else + value = tuple |> last |> string + push!(entities, T(key, value)) + end + end + + return entities +end + +function dictarray_to_T_array(::Type{T}, + dict_array::Array{<:Dict{String, <:Any}}) where T<:LoggingData + entities = T[] for dict in dict_array - push!(tags, Tag(dict["key"], dict["value"])) + key = dict["key"] |> string + if T<:Metric + value = Float64(dict["value"]) + if haskey(dict, "timestamp") + timestamp = dict["timestamp"] + else + timestamp = round(Int, now() |> datetime2unix) + end + push!(entities, Metric(key, value, timestamp, missing)) + else + value = dict["value"] |> string + push!(entities, T(key, value)) + end end - return tags + return entities end -function parse(entities::MLFlowUpsertData{T}) where T<:LoggingData - println(typeof(entities)) - if entities isa Dict{String, String} - return entities |> dict_to_array - elseif entities isa Array{Pair{String, String}} - return entities |> pairsarray_to_array - elseif entities isa Array{Dict{String, String}} - return entities |> dictarray_to_array +function parse(::Type{T}, entities::MLFlowUpsertData{T}) where T<:LoggingData + if entities isa Dict{String, <:NumberOrString} + return dict_to_T_array(T, entities) + elseif entities isa Array{<:Dict{String, <:Any}} + return dictarray_to_T_array(T, entities) + elseif entities isa Array{<:Pair{String, <:NumberOrString}} + return pairarray_to_T_array(T, entities) + elseif entities isa Array{<:Tuple{String, <:NumberOrString}} + return tuplearray_to_T_array(T, entities) end return entities end diff --git a/test/runtests.jl b/test/runtests.jl index fd86f27..22b023e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,5 +5,5 @@ end include("base.jl") include("services/experiment.jl") -# include("services/run.jl") -# include("services/loggers.jl") +include("services/run.jl") +include("services/loggers.jl") diff --git a/test/services/experiment.jl b/test/services/experiment.jl index 6fe68e2..83f8c7e 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -14,11 +14,11 @@ deleteexperiment(mlf, experiment.experiment_id) end - # @testset "with tags as array of tags" begin - # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - # tags=[Tag("test_key", "test_value")]) - # deleteexperiment(mlf, experiment_id) - # end + @testset "with tags as array of tags" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + tags=[Tag("test_key", "test_value")]) + deleteexperiment(mlf, experiment_id) + end @testset "with tags as array of pairs" begin experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; @@ -26,17 +26,17 @@ deleteexperiment(mlf, experiment_id) end - # @testset "with tags as array of dicts" begin - # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - # tags=[Dict("key" => "test_key", "value" => "test_value")]) - # deleteexperiment(mlf, experiment_id) - # end + @testset "with tags as array of dicts" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + tags=[Dict("key" => "test_key", "value" => "test_value")]) + deleteexperiment(mlf, experiment_id) + end - # @testset "with tags as dict" begin - # experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; - # tags=Dict("test_key" => "test_value")) - # deleteexperiment(mlf, experiment_id) - # end + @testset "with tags as dict" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string; + tags=Dict("test_key" => "test_value")) + deleteexperiment(mlf, experiment_id) + end end @testset verbose = true "get experiment" begin diff --git a/test/services/loggers.jl b/test/services/loggers.jl index 9968951..a9654a1 100644 --- a/test/services/loggers.jl +++ b/test/services/loggers.jl @@ -33,17 +33,135 @@ end @ensuremlf experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) - run = createrun(mlf, experiment_id) @testset "with run id as string" begin + run = createrun(mlf, experiment_id) logbatch(mlf, run.info.run_id, metrics=[("gala", 0.1)]) run = refresh(mlf, run) last_metric = run.data.metrics |> last + @test last_metric isa Metric + @test last_metric.key == "gala" + @test last_metric.value == 0.1 + deleterun(mlf, run) + end + + @testset "with run" begin + run = createrun(mlf, experiment_id) + logbatch(mlf, run, metrics=[("missy", 0.9)]) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + @test last_metric isa Metric @test last_metric.key == "missy" @test last_metric.value == 0.9 + deleterun(mlf, run) + end + + @testset "with metrics, params and tags as dict" begin + run = createrun(mlf, experiment_id) + logbatch(mlf, run, metrics=Dict("ana" => 0.5), + params=Dict("test_param" => "0.9"), + tags=Dict("test_tag" => "gala")) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + last_param = run.data.params |> last + last_tag = run.data.tags |> first + + @test last_metric isa Metric + @test last_metric.key == "ana" + @test last_metric.value == 0.5 + + @test last_param isa Param + @test last_param.key == "test_param" + @test last_param.value == "0.9" + + @test last_tag isa Tag + @test last_tag.key == "test_tag" + @test last_tag.value == "gala" + + deleterun(mlf, run) + end + + @testset "with metrics, params and tags as pair array" begin + run = createrun(mlf, experiment_id) + logbatch(mlf, run, metrics=["ana" => 0.5], + params=["test_param" => "0.9"], tags=["test_tag" => "gala"]) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + last_param = run.data.params |> last + last_tag = run.data.tags |> first + + @test last_metric isa Metric + @test last_metric.key == "ana" + @test last_metric.value == 0.5 + + @test last_param isa Param + @test last_param.key == "test_param" + @test last_param.value == "0.9" + + @test last_tag isa Tag + @test last_tag.key == "test_tag" + @test last_tag.value == "gala" + + deleterun(mlf, run) + end + + @testset "with metrics, params and tags as tuple array" begin + run = createrun(mlf, experiment_id) + logbatch(mlf, run, metrics=[("ana", 0.5)], + params=[("test_param", "0.9")], tags=[("test_tag", "gala")]) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + last_param = run.data.params |> last + last_tag = run.data.tags |> first + + @test last_metric isa Metric + @test last_metric.key == "ana" + @test last_metric.value == 0.5 + + @test last_param isa Param + @test last_param.key == "test_param" + @test last_param.value == "0.9" + + @test last_tag isa Tag + @test last_tag.key == "test_tag" + @test last_tag.value == "gala" + + deleterun(mlf, run) + end + + @testset "with metrics, params and tags as dict array" begin + run = createrun(mlf, experiment_id) + logbatch(mlf, run, + metrics=[Dict("key" => "ana", "value" => 0.5, "timestamp" => 123)], + params=[Dict("key" => "test_param", "value" => "0.9")], + tags=[Dict("key" => "test_tag", "value" => "gala")]) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + last_param = run.data.params |> last + last_tag = run.data.tags |> first + + @test last_metric isa Metric + @test last_metric.key == "ana" + @test last_metric.value == 0.5 + @test last_metric.timestamp == 123 + + @test last_param isa Param + @test last_param.key == "test_param" + @test last_param.value == "0.9" + + @test last_tag isa Tag + @test last_tag.key == "test_tag" + @test last_tag.value == "gala" + + deleterun(mlf, run) end deleteexperiment(mlf, experiment_id) From 22767d4d61a960f99ae71a0ffaa0b67871b0deaa Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:30:54 -0500 Subject: [PATCH 08/31] Adding `loginputs` function --- src/MLFlowClient.jl | 3 ++- src/services/loggers.jl | 27 ++++++++++++++++++----- src/types/run.jl | 4 ++-- src/utils.jl | 8 +++---- test/services/loggers.jl | 47 +++++++++++++++++++++++++++++++++++----- 5 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 3fa839f..dc12a28 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -71,6 +71,7 @@ export include("services/loggers.jl") export logmetric, - logbatch + logbatch, + loginputs end diff --git a/src/services/loggers.jl b/src/services/loggers.jl index 330d162..f9083c4 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -28,26 +28,41 @@ logmetric(instance::MLFlow, run::Run, key::String, value::Float64; logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) """ - logbatch(instance::MLFlow, run_id::String; metrics::Array{Metric}, - params::Array{Param}, tags::Array{Tag}) + logbatch(instance::MLFlow, run_id::String; + metrics::MLFlowUpsertData{Metric}, params::MLFlowUpsertData{Param}, + tags::MLFlowUpsertData{Tag}) logbatch(instance::MLFlow, run::Run; metrics::Array{Metric}, - params::Array{Param}, tags::Array{Tag}) + params::MLFlowUpsertData{Param}, tags::MLFlowUpsertData{Tag}) Log a batch of metrics, params, and tags for a run. In case of error, partial data may be written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). """ -function logbatch(instance::MLFlow, run_id::String; +logbatch(instance::MLFlow, run_id::String; metrics::MLFlowUpsertData{Metric}=Metric[], - params::MLFlowUpsertData{Param}=Param[], tags::MLFlowUpsertData{Tag}=Tag[]) + params::MLFlowUpsertData{Param}=Param[], + tags::MLFlowUpsertData{Tag}=Tag[]) = mlfpost(instance, "runs/log-batch"; run_id=run_id, metrics=parse(Metric, metrics), params=parse(Param, params), tags=parse(Tag, tags)) -end logbatch(instance::MLFlow, run::Run; metrics::MLFlowUpsertData{Metric}=Metric[], params::MLFlowUpsertData{Param}=Param[], tags::MLFlowUpsertData{Tag}=Tag[]) = logbatch(instance, run.info.run_id; metrics=metrics, params=params, tags=tags) + +""" + loginputs(instance::MLFlow, run_id::String; datasets::Array{DatasetInput}) + loginputs(instance::MLFlow, run::Run; datasets::Array{DatasetInput}) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to log under This field is required. +- `datasets`: Dataset inputs. +""" +loginputs(instance::MLFlow, run_id::String, datasets::Array{DatasetInput}) = + mlfpost(instance, "runs/log-inputs"; run_id=run_id, datasets=datasets) +loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput}) = + loginputs(instance, run.info.run_id, datasets) diff --git a/src/types/run.jl b/src/types/run.jl index dcb8581..e15a0eb 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -7,13 +7,13 @@ Metric associated with a run, represented as a key-value pair. - `key::String`: Key identifying this metric. - `value::Float64`: Value associated with this metric. - `timestamp::Int64`: The timestamp at which this metric was recorded. -- `step::Union{Int64, Missing}`: Step at which to log the metric. +- `step::Union{Int64, Nothing}`: Step at which to log the metric. """ struct Metric <: LoggingData key::String value::Float64 timestamp::Int64 - step::Union{Int64, Missing} + step::Union{Int64, Nothing} end Metric(data::Dict{String, Any}) = Metric(data["key"], data["value"], data["timestamp"], data["step"]) diff --git a/src/utils.jl b/src/utils.jl index 85c0e66..6006f32 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -17,7 +17,7 @@ function dict_to_T_array(::Type{T}, dict::Dict{String, <:NumberOrString}) where for (key, value) in dict if T<:Metric push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), missing)) + round(Int, now() |> datetime2unix), nothing)) else push!(entities, T(key, value |> string)) end @@ -33,7 +33,7 @@ function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where T<:Log if T<:Metric value = pair.second push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), missing)) + round(Int, now() |> datetime2unix), nothing)) else value = pair.second |> string push!(entities, T(key, value)) @@ -55,7 +55,7 @@ function tuplearray_to_T_array(::Type{T}, if T<: Metric value = tuple |> last push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), missing)) + round(Int, now() |> datetime2unix), nothing)) else value = tuple |> last |> string push!(entities, T(key, value)) @@ -77,7 +77,7 @@ function dictarray_to_T_array(::Type{T}, else timestamp = round(Int, now() |> datetime2unix) end - push!(entities, Metric(key, value, timestamp, missing)) + push!(entities, Metric(key, value, timestamp, nothing)) else value = dict["value"] |> string push!(entities, T(key, value)) diff --git a/test/services/loggers.jl b/test/services/loggers.jl index a9654a1..9b8a3c8 100644 --- a/test/services/loggers.jl +++ b/test/services/loggers.jl @@ -36,7 +36,7 @@ end @testset "with run id as string" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run.info.run_id, metrics=[("gala", 0.1)]) + logbatch(mlf, run.info.run_id; metrics=[("gala", 0.1)]) run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -49,7 +49,7 @@ end @testset "with run" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run, metrics=[("missy", 0.9)]) + logbatch(mlf, run; metrics=[("missy", 0.9)]) run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -62,7 +62,7 @@ end @testset "with metrics, params and tags as dict" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run, metrics=Dict("ana" => 0.5), + logbatch(mlf, run; metrics=Dict("ana" => 0.5), params=Dict("test_param" => "0.9"), tags=Dict("test_tag" => "gala")) @@ -88,7 +88,7 @@ end @testset "with metrics, params and tags as pair array" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run, metrics=["ana" => 0.5], + logbatch(mlf, run; metrics=["ana" => 0.5], params=["test_param" => "0.9"], tags=["test_tag" => "gala"]) run = refresh(mlf, run) @@ -113,7 +113,7 @@ end @testset "with metrics, params and tags as tuple array" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run, metrics=[("ana", 0.5)], + logbatch(mlf, run; metrics=[("ana", 0.5)], params=[("test_param", "0.9")], tags=[("test_tag", "gala")]) run = refresh(mlf, run) @@ -138,7 +138,7 @@ end @testset "with metrics, params and tags as dict array" begin run = createrun(mlf, experiment_id) - logbatch(mlf, run, + logbatch(mlf, run; metrics=[Dict("key" => "ana", "value" => 0.5, "timestamp" => 123)], params=[Dict("key" => "test_param", "value" => "0.9")], tags=[Dict("key" => "test_tag", "value" => "gala")]) @@ -166,3 +166,38 @@ end deleteexperiment(mlf, experiment_id) end + +@testset verbose = true "log inputs" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + + @testset "with run id as string" begin + run = createrun(mlf, experiment_id) + inputs = [DatasetInput([Tag("tag_key", "tag_value")], + Dataset("dataset_name", "dataset_digest", "dataset_source_type", + "dataset_source", nothing, nothing))] + loginputs(mlf, run.info.run_id, inputs) + + run = refresh(mlf, run) + + @test run.inputs.dataset_inputs |> length == 1 + + dataset_input = run.inputs.dataset_inputs |> first + dataset_input_tag = dataset_input.tags |> first + + @test dataset_input_tag isa Tag + @test dataset_input_tag.key == "tag_key" + @test dataset_input_tag.value == "tag_value" + + @test dataset_input.dataset isa Dataset + @test dataset_input.dataset.name == "dataset_name" + @test dataset_input.dataset.digest == "dataset_digest" + @test dataset_input.dataset.source_type == "dataset_source_type" + @test dataset_input.dataset.source == "dataset_source" + @test dataset_input.dataset.schema |> isnothing + @test dataset_input.dataset.profile |> isnothing + end + + deleteexperiment(mlf, experiment_id) +end From 5452ee2d5523a0a5671030d9e3425e204a847090 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:03:15 -0500 Subject: [PATCH 09/31] Implementing `setexperimenttag --- src/MLFlowClient.jl | 1 + src/api.jl | 16 +++-- src/services/experiment.jl | 121 ++++++++++++++---------------------- src/services/run.jl | 39 +++--------- test/services/experiment.jl | 37 ++++++++++- 5 files changed, 105 insertions(+), 109 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index dc12a28..0cdcaf3 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -58,6 +58,7 @@ export createexperiment, deleteexperiment, updateexperiment, + setexperimenttag, restoreexperiment, searchexperiments, getexperimentbyname diff --git a/src/api.jl b/src/api.jl index 3c13496..5bac07d 100644 --- a/src/api.jl +++ b/src/api.jl @@ -36,9 +36,13 @@ function mlfget(mlf, endpoint; kwargs...) try response = HTTP.get(apiuri, apiheaders) - return JSON.parse(String(response.body)) + return response.body |> String |> JSON.parse catch e - throw(e) + error_response = e.response.body |> String |> JSON.parse + error_message = + "$(error_response["error_code"]) - $(error_response["message"])" + @error error_message + throw(ErrorException(error_message)) end end @@ -54,8 +58,12 @@ function mlfpost(mlf, endpoint; kwargs...) try response = HTTP.post(apiuri, apiheaders, body) - return JSON.parse(String(response.body)) + return response.body |> String |> JSON.parse catch e - throw(e) + error_response = e.response.body |> String |> JSON.parse + error_message = + "$(error_response["error_code"]) - $(error_response["message"])" + @error error_message + throw(ErrorException(error_message)) end end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 3e5f7ef..99dd321 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -21,20 +21,9 @@ The ID of the newly created experiment. function createexperiment(instance::MLFlow, name::String; artifact_location::Union{String, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::String - try - result = mlfpost(instance, "experiments/create"; name=name, - artifact_location=artifact_location, - tags=parse(Tag, tags)) - return result["experiment_id"] - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 - error_code = (e.response.body |> String |> JSON.parse)["error_code"] - if error_code == MLFLOW_ERROR_CODES.RESOURCE_ALREADY_EXISTS - error("Experiment with name \"$name\" already exists") - end - end - throw(e) - end + result = mlfpost(instance, "experiments/create"; name=name, + artifact_location=artifact_location, tags=parse(Tag, tags)) + return result["experiment_id"] end """ @@ -51,13 +40,8 @@ Get metadata for an experiment. This method works on deleted experiments. An instance of type [`Experiment`](@ref). """ function getexperiment(instance::MLFlow, experiment_id::String)::Experiment - try - arguments = (:experiment_id => experiment_id,) - result = mlfget(instance, "experiments/get"; arguments...) - return result["experiment"] |> Experiment - catch e - throw(e) - end + result = mlfget(instance, "experiments/get"; experiment_id=experiment_id) + return result["experiment"] |> Experiment end getexperiment(instance::MLFlow, experiment_id::Integer)::Experiment = getexperiment(instance, string(experiment_id)) @@ -80,16 +64,9 @@ An instance of type [`Experiment`](@ref). """ function getexperimentbyname(instance::MLFlow, experiment_name::String)::Experiment - try - arguments = (:experiment_name => experiment_name,) - result = mlfget(instance, "experiments/get-by-name"; arguments...) - return result["experiment"] |> Experiment - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - return missing - end - throw(e) - end + result = mlfget(instance, "experiments/get-by-name"; + experiment_name=experiment_name) + return result["experiment"] |> Experiment end """ @@ -106,21 +83,11 @@ experiment are also deleted. - `experiment_id`: ID of the associated experiment. # Returns - `true` if successful. Otherwise, raises exception. """ function deleteexperiment(instance::MLFlow, experiment_id::String) - endpoint = "experiments/delete" - try - mlfpost(instance, endpoint; experiment_id=experiment_id) - return true - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - # experiment already deleted - return true - end - throw(e) - end + mlfpost(instance, "experiments/delete"; experiment_id=experiment_id) + return true end deleteexperiment(instance::MLFlow, experiment_id::Integer) = deleteexperiment(instance, string(experiment_id)) @@ -141,23 +108,11 @@ underlying artifacts associated with experiment are also restored. - `experiment_id`: ID of the associated experiment. # Returns - `true` if successful. Otherwise, raises exception. """ function restoreexperiment(instance::MLFlow, experiment_id::String) - endpoint = "experiments/restore" - try - mlfpost(instance, endpoint; experiment_id=experiment_id) - return true - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - error_code = JSON.parse(String(e.response.body))["error_code"] - if error_code == MLFLOW_ERROR_CODES.RESOURCE_DOES_NOT_EXIST - error("Experiment with id \"$experiment_id\" does not exist") - end - end - throw(e) - end + mlfpost(instance, "experiments/restore"; experiment_id=experiment_id) + return true end restoreexperiment(instance::MLFlow, experiment_id::Integer) = restoreexperiment(instance, string(experiment_id)) @@ -184,13 +139,9 @@ The new name must be unique. """ function updateexperiment(instance::MLFlow, experiment_id::String, new_name::String) - endpoint = "experiments/update" - try - mlfpost(instance, endpoint; experiment_id=experiment_id, new_name=new_name) - return true - catch e - throw(e) - end + mlfpost(instance, "experiments/update"; experiment_id=experiment_id, + new_name=new_name) + return true end updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) = updateexperiment(instance, string(experiment_id), new_name) @@ -219,8 +170,8 @@ unspecified, return only active experiments. """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], - view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment}, Union{String, Nothing}} - endpoint = "experiments/search" + view_type::ViewType=ACTIVE_ONLY +)::Tuple{Array{Experiment}, Union{String, Nothing}} parameters = (; max_results, page_token, filter, :view_type => view_type |> Integer) @@ -228,14 +179,36 @@ function searchexperiments(instance::MLFlow; max_results::Int64=20000, parameters = (; order_by, parameters...) end - try - result = mlfget(instance, endpoint; parameters...) + result = mlfget(instance, "experiments/search"; parameters...) - experiments = result["experiments"] |> (x -> [Experiment(y) for y in x]) - next_page_token = get(result, "next_page_token", nothing) + experiments = result["experiments"] |> (x -> [Experiment(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) - return experiments, next_page_token - catch e - throw(e) - end + return experiments, next_page_token end + +""" + setexperimenttag(instance::MLFlow, experiment_id::String, key::String, + value::String) + setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, + value::String) + setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, + value::String) + +Set a tag on an experiment. Experiment tags are metadata that can be updated. + +# Arguments +- `experiment_id`: ID of the experiment under which to log the tag. +- `key`: Name of the tag. +- `value`: String value of the tag being logged. +""" +setexperimenttag(instance::MLFlow, experiment_id::String, key::String, + value::String) = + mlfpost(instance, "experiments/set-experiment-tag"; + experiment_id=experiment_id, key=key, value=value) +setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, + value::String) = + setexperimenttag(instance, string(experiment_id), key, value) +setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, + value::String) = + setexperimenttag(instance, experiment.experiment_id, key, value) diff --git a/src/services/run.jl b/src/services/run.jl index d58984b..05db82b 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -21,13 +21,9 @@ function createrun(instance::MLFlow, experiment_id::String; run_name::Union{String, Missing}=missing, start_time::Union{Int64, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[]) - try - result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, - run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) - return result["run"] |> Run - catch e - throw(e) - end + result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, + run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) + return result["run"] |> Run end createrun(instance::MLFlow, experiment_id::Integer; run_name::Union{String, Missing}=missing, @@ -53,17 +49,11 @@ Mark a run for deletion. - `run_id`: ID of the run to delete. # Returns - `true` if successful. Otherwise, raises exception. """ function deleterun(instance::MLFlow, run_id::String) - endpoint = "runs/delete" - try - mlfpost(instance, endpoint; run_id=run_id) - return true - catch e - throw(e) - end + mlfpost(instance, "runs/delete"; run_id=run_id) + return true end deleterun(instance::MLFlow, run::Run) = deleterun(instance, run.info.run_id) @@ -78,17 +68,11 @@ Restore a deleted run. - `run_id`: ID of the run to restore. # Returns - `true` if successful. Otherwise, raises exception. """ function restorerun(instance::MLFlow, run_id::String) - endpoint = "runs/restore" - try - mlfpost(instance, endpoint; run_id=run_id) - return true - catch e - throw(e) - end + mlfpost(instance, "runs/restore"; run_id=run_id) + return true end restorerun(instance::MLFlow, run::Run) = restorerun(instance, run.info.run_id) @@ -108,11 +92,6 @@ return the maximum of these values. An instance of type [`Run`](@ref). """ function getrun(instance::MLFlow, run_id::String) - try - arguments = (:run_id => run_id,) - result = mlfget(instance, "runs/get"; arguments...) - return result["run"] |> Run - catch e - throw(e) - end + result = mlfget(instance, "runs/get"; run_id=run_id) + return result["run"] |> Run end diff --git a/test/services/experiment.jl b/test/services/experiment.jl index 83f8c7e..e020f8d 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -93,7 +93,7 @@ end @testset "delete already deleted" begin deleteexperiment(mlf, experiment_id) - @test deleteexperiment(mlf, experiment_id) + @test_throws ErrorException deleteexperiment(mlf, experiment_id) end end @@ -179,3 +179,38 @@ end experiment_ids .|> (id -> deleteexperiment(mlf, id)) end + +@testset verbose = true "set experiment tag" begin + @ensuremlf + + @testset "set tag with string id" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + setexperimenttag(mlf, experiment_id, "test_key", "test_value") + experiment = getexperiment(mlf, experiment_id) + @test experiment.tags |> !isempty + @test (experiment.tags |> first).key == "test_key" + @test (experiment.tags |> first).value == "test_value" + deleteexperiment(mlf, experiment_id) + end + + @testset "set tag with integer id" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + setexperimenttag(mlf, parse(Int, experiment_id), "test_key", "test_value") + experiment = getexperiment(mlf, experiment_id) + @test experiment.tags |> !isempty + @test (experiment.tags |> first).key == "test_key" + @test (experiment.tags |> first).value == "test_value" + deleteexperiment(mlf, experiment_id) + end + + @testset "set tag with Experiment" begin + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + experiment = getexperiment(mlf, experiment_id) + setexperimenttag(mlf, experiment, "test_key", "test_value") + experiment = getexperiment(mlf, experiment_id) + @test experiment.tags |> !isempty + @test (experiment.tags |> first).key == "test_key" + @test (experiment.tags |> first).value == "test_value" + deleteexperiment(mlf, experiment_id) + end +end From f3e651c2b9df5c6f35a16dd1925042fb11642d44 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:26:35 -0500 Subject: [PATCH 10/31] Implement `setruntag` --- src/MLFlowClient.jl | 11 +++++---- src/services/experiment.jl | 35 +++++++++++++++----------- src/services/loggers.jl | 50 +++++++++++++++++++++++++++++--------- src/services/run.jl | 42 ++++++++++++++++++++++++++------ test/services/run.jl | 33 +++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 38 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 0cdcaf3..9e4ca7a 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -57,22 +57,23 @@ export getexperiment, createexperiment, deleteexperiment, - updateexperiment, setexperimenttag, + updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname include("services/run.jl") export + getrun, createrun, deleterun, - restorerun, - getrun + setruntag, + restorerun include("services/loggers.jl") export - logmetric, logbatch, - loginputs + loginputs, + logmetric end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 99dd321..4793c77 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -85,13 +85,13 @@ experiment are also deleted. # Returns `true` if successful. Otherwise, raises exception. """ -function deleteexperiment(instance::MLFlow, experiment_id::String) +function deleteexperiment(instance::MLFlow, experiment_id::String)::Bool mlfpost(instance, "experiments/delete"; experiment_id=experiment_id) return true end -deleteexperiment(instance::MLFlow, experiment_id::Integer) = +deleteexperiment(instance::MLFlow, experiment_id::Integer)::Bool = deleteexperiment(instance, string(experiment_id)) -deleteexperiment(instance::MLFlow, experiment::Experiment) = +deleteexperiment(instance::MLFlow, experiment::Experiment)::Bool = deleteexperiment(instance, experiment.experiment_id) """ @@ -110,13 +110,13 @@ underlying artifacts associated with experiment are also restored. # Returns `true` if successful. Otherwise, raises exception. """ -function restoreexperiment(instance::MLFlow, experiment_id::String) +function restoreexperiment(instance::MLFlow, experiment_id::String)::Bool mlfpost(instance, "experiments/restore"; experiment_id=experiment_id) return true end -restoreexperiment(instance::MLFlow, experiment_id::Integer) = +restoreexperiment(instance::MLFlow, experiment_id::Integer)::Bool = restoreexperiment(instance, string(experiment_id)) -restoreexperiment(instance::MLFlow, experiment::Experiment) = +restoreexperiment(instance::MLFlow, experiment::Experiment)::Bool = restoreexperiment(instance, experiment.experiment_id) """ @@ -138,15 +138,17 @@ The new name must be unique. `true` if successful. Otherwise, raises exception. """ function updateexperiment(instance::MLFlow, experiment_id::String, - new_name::String) + new_name::String)::Bool mlfpost(instance, "experiments/update"; experiment_id=experiment_id, new_name=new_name) return true end -updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) = +updateexperiment(instance::MLFlow, experiment_id::Integer, + new_name::String)::Bool = updateexperiment(instance, string(experiment_id), new_name) -updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) = - updateexperiment(instance, experiment.experiment_id, new_name::String) +updateexperiment(instance::MLFlow, experiment::Experiment, + new_name::String)::Bool = + updateexperiment(instance, experiment.experiment_id, new_name) """ searchexperiments(instance::MLFlow; max_results::Int64=20000, @@ -201,14 +203,19 @@ Set a tag on an experiment. Experiment tags are metadata that can be updated. - `experiment_id`: ID of the experiment under which to log the tag. - `key`: Name of the tag. - `value`: String value of the tag being logged. + +# Returns +`true` if successful. Otherwise, raises exception. """ -setexperimenttag(instance::MLFlow, experiment_id::String, key::String, - value::String) = +function setexperimenttag(instance::MLFlow, experiment_id::String, key::String, + value::String)::Bool mlfpost(instance, "experiments/set-experiment-tag"; experiment_id=experiment_id, key=key, value=value) + return true +end setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, - value::String) = + value::String)::Bool = setexperimenttag(instance, string(experiment_id), key, value) setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, - value::String) = + value::String)::Bool = setexperimenttag(instance, experiment.experiment_id, key, value) diff --git a/src/services/loggers.jl b/src/services/loggers.jl index f9083c4..9c6e86d 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -17,15 +17,22 @@ represent ML model accuracy. A metric can be logged multiple times. - `value`: Double value of the metric being logged. - `timestamp`: Unix timestamp in milliseconds at the time metric was logged. - `step`: Step at which to log the metric. + +# Returns +`true` if successful. Otherwise, raises exception. """ -logmetric(instance::MLFlow, run_id::String, key::String, value::Float64; - timestamp::Int64=round(Int, now() |> datetime2unix), - step::Union{Int64, Missing}=missing) = - mlfpost(instance, "runs/log-metric"; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) +function logmetric(instance::MLFlow, run_id::String, key::String, + value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), + step::Union{Int64, Missing}=missing)::Bool + mlfpost(instance, "runs/log-metric"; run_id=run_id, key=key, value=value, + timestamp=timestamp, step=step) + return true +end logmetric(instance::MLFlow, run::Run, key::String, value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), - step::Union{Int64, Missing}=missing) = - logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) + step::Union{Int64, Missing}=missing)::Bool = + logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, + step=step) """ logbatch(instance::MLFlow, run_id::String; @@ -38,18 +45,33 @@ Log a batch of metrics, params, and tags for a run. In case of error, partial data may be written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to log under. +- `metrics`: Metrics to log. +- `params`: Params to log. +- `tags`: Tags to log. + +**Note**: A single request can contain up to 1000 metrics, and up to 1000 +metrics, params, and tags in total. + +# Returns +`true` if successful. Otherwise, raises exception. """ -logbatch(instance::MLFlow, run_id::String; +function logbatch(instance::MLFlow, run_id::String; metrics::MLFlowUpsertData{Metric}=Metric[], params::MLFlowUpsertData{Param}=Param[], - tags::MLFlowUpsertData{Tag}=Tag[]) = + tags::MLFlowUpsertData{Tag}=Tag[])::Bool mlfpost(instance, "runs/log-batch"; run_id=run_id, metrics=parse(Metric, metrics), params=parse(Param, params), tags=parse(Tag, tags)) + return true +end logbatch(instance::MLFlow, run::Run; metrics::MLFlowUpsertData{Metric}=Metric[], params::MLFlowUpsertData{Param}=Param[], - tags::MLFlowUpsertData{Tag}=Tag[]) = + tags::MLFlowUpsertData{Tag}=Tag[])::Bool = logbatch(instance, run.info.run_id; metrics=metrics, params=params, tags=tags) @@ -61,8 +83,14 @@ logbatch(instance::MLFlow, run::Run; - `instance`: [`MLFlow`](@ref) configuration. - `run_id`: ID of the run to log under This field is required. - `datasets`: Dataset inputs. + +# Returns +`true` if successful. Otherwise, raises exception. """ -loginputs(instance::MLFlow, run_id::String, datasets::Array{DatasetInput}) = +function loginputs(instance::MLFlow, run_id::String, + datasets::Array{DatasetInput})::Bool mlfpost(instance, "runs/log-inputs"; run_id=run_id, datasets=datasets) -loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput}) = + return true +end +loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput})::Bool = loginputs(instance, run.info.run_id, datasets) diff --git a/src/services/run.jl b/src/services/run.jl index 05db82b..d1e7bc5 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -20,7 +20,7 @@ An instance of type [`Run`](@ref). function createrun(instance::MLFlow, experiment_id::String; run_name::Union{String, Missing}=missing, start_time::Union{Int64, Missing}=missing, - tags::MLFlowUpsertData{Tag}=Tag[]) + tags::MLFlowUpsertData{Tag}=Tag[])::Run result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) return result["run"] |> Run @@ -28,13 +28,13 @@ end createrun(instance::MLFlow, experiment_id::Integer; run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, - tags::MLFlowUpsertData{Tag}=Tag[]) = + tags::MLFlowUpsertData{Tag}=Tag[])::Run = createrun(instance, string(experiment_id); run_name=run_name, start_time=start_time, tags=tags) createrun(instance::MLFlow, experiment::Experiment; run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, - tags::MLFlowUpsertData{Tag}=Tag[]) = + tags::MLFlowUpsertData{Tag}=Tag[])::Run = createrun(instance, string(experiment.experiment_id); run_name=run_name, start_time=start_time, tags=tags) @@ -51,11 +51,12 @@ Mark a run for deletion. # Returns `true` if successful. Otherwise, raises exception. """ -function deleterun(instance::MLFlow, run_id::String) +function deleterun(instance::MLFlow, run_id::String)::Bool mlfpost(instance, "runs/delete"; run_id=run_id) return true end -deleterun(instance::MLFlow, run::Run) = deleterun(instance, run.info.run_id) +deleterun(instance::MLFlow, run::Run)::Bool = + deleterun(instance, run.info.run_id) """ restorerun(instance::MLFlow, run_id::String) @@ -70,11 +71,12 @@ Restore a deleted run. # Returns `true` if successful. Otherwise, raises exception. """ -function restorerun(instance::MLFlow, run_id::String) +function restorerun(instance::MLFlow, run_id::String)::Bool mlfpost(instance, "runs/restore"; run_id=run_id) return true end -restorerun(instance::MLFlow, run::Run) = restorerun(instance, run.info.run_id) +restorerun(instance::MLFlow, run::Run)::Bool = + restorerun(instance, run.info.run_id) """ getrun(instance::MLFlow, run_id::String) @@ -91,7 +93,31 @@ return the maximum of these values. # Returns An instance of type [`Run`](@ref). """ -function getrun(instance::MLFlow, run_id::String) +function getrun(instance::MLFlow, run_id::String)::Run result = mlfget(instance, "runs/get"; run_id=run_id) return result["run"] |> Run end + +""" + setruntag(instance::MLFlow, run_id::String, key::String, value::String) + setruntag(instance::MLFlow, run::Run, key::String, value::String) + +Set a tag on a run. Tags are run metadata that can be updated during a run and +after a run completes. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run under which to log the tag. +- `key`: Name of the tag. +- `value`: String value of the tag being logged. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function setruntag(instanceL::MLFlow, run_id::String, key::String, + value::String):Bool + mlfpost(instanceL, "runs/set-tag"; run_id=run_id, key=key, value=value) + return true +end +setruntag(instance::MLFlow, run::Run, key::String, value::String)::Bool = + setruntag(instance, run.info.run_id, key, value) diff --git a/test/services/run.jl b/test/services/run.jl index 20b6a03..8cb571c 100644 --- a/test/services/run.jl +++ b/test/services/run.jl @@ -90,3 +90,36 @@ end deleteexperiment(mlf, experiment_id) end + +@testset verbose = true "set run tag" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + + @testset "set tag with run string id" begin + run = createrun(mlf, experiment_id) + setruntag(mlf, run.info.run_id, "tag", "value") + + run = refresh(mlf, run) + + @test run.data.tags |> !isempty + @test (run.data.tags |> first).key == "tag" + @test (run.data.tags |> first).value == "value" + + deleterun(mlf, run) + end + + @testset "set tag with run" begin + run = createrun(mlf, experiment_id) + setruntag(mlf, run, "tag", "value") + + run = refresh(mlf, run) + + @test run.data.tags |> !isempty + @test (run.data.tags |> first).key == "tag" + @test (run.data.tags |> first).value == "value" + + deleterun(mlf, run) + end + + deleteexperiment(mlf, experiment_id) +end From 0ac57fcf2efe45e04f1d99703ef884137bbc6f55 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:47:54 -0500 Subject: [PATCH 11/31] Implementing `deleteruntag` --- src/MLFlowClient.jl | 4 +++- src/services/run.jl | 28 ++++++++++++++++++++++++---- test/services/run.jl | 28 ++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 9e4ca7a..2cbb186 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -69,7 +69,9 @@ export createrun, deleterun, setruntag, - restorerun + restorerun, + deleteruntag + include("services/loggers.jl") export logbatch, diff --git a/src/services/run.jl b/src/services/run.jl index d1e7bc5..8cabd70 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -102,8 +102,7 @@ end setruntag(instance::MLFlow, run_id::String, key::String, value::String) setruntag(instance::MLFlow, run::Run, key::String, value::String) -Set a tag on a run. Tags are run metadata that can be updated during a run and -after a run completes. +Set a tag on a run. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -114,10 +113,31 @@ after a run completes. # Returns `true` if successful. Otherwise, raises exception. """ -function setruntag(instanceL::MLFlow, run_id::String, key::String, +function setruntag(instance::MLFlow, run_id::String, key::String, value::String):Bool - mlfpost(instanceL, "runs/set-tag"; run_id=run_id, key=key, value=value) + mlfpost(instance, "runs/set-tag"; run_id=run_id, key=key, value=value) return true end setruntag(instance::MLFlow, run::Run, key::String, value::String)::Bool = setruntag(instance, run.info.run_id, key, value) + +""" + deletetag(instance::MLFlow, run_id::String, key::String) + deletetag(instance::MLFlow, run::Run, key::String) + +Delete a tag on a run. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run that the tag was logged under. +- `key`: Name of the tag. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteruntag(instance::MLFlow, run_id::String, key::String)::Bool + mlfpost(instance, "runs/delete-tag"; run_id=run_id, key=key) + return true +end +deleteruntag(instance::MLFlow, run::Run, key::String)::Bool = + deleteruntag(instance, run.info.run_id, key) diff --git a/test/services/run.jl b/test/services/run.jl index 8cb571c..19f74b6 100644 --- a/test/services/run.jl +++ b/test/services/run.jl @@ -123,3 +123,31 @@ end deleteexperiment(mlf, experiment_id) end + +@testset verbose = true "delete run tag" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + + @testset "delete tag with run string id" begin + run = createrun(mlf, experiment_id) + setruntag(mlf, run.info.run_id, "tag", "value") + deleteruntag(mlf, run.info.run_id, "tag") + + run = refresh(mlf, run) + + @test (run.data.tags |> length) == 1 # The default tag + deleterun(mlf, run) + end + + @testset "delete tag with run string id" begin + run = createrun(mlf, experiment_id) + setruntag(mlf, run, "tag", "value") + deleteruntag(mlf, run, "tag") + + run = refresh(mlf, run) + + @test (run.data.tags |> length) == 1 # The default tag + deleterun(mlf, run) + end + deleteexperiment(mlf, experiment_id) +end From 6d094dcd4b328c714c1e2d5e1fd1dd43038b188c Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:53:04 -0500 Subject: [PATCH 12/31] Implemented `logparam` --- src/MLFlowClient.jl | 3 +- src/services/loggers.jl | 35 +++++++++++++++ test/services/loggers.jl | 95 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 131 insertions(+), 2 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 2cbb186..567c535 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -76,6 +76,7 @@ include("services/loggers.jl") export logbatch, loginputs, - logmetric + logmetric, + logparam end diff --git a/src/services/loggers.jl b/src/services/loggers.jl index 9c6e86d..2dc0a39 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -33,6 +33,12 @@ logmetric(instance::MLFlow, run::Run, key::String, value::Float64; step::Union{Int64, Missing}=missing)::Bool = logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) +logmetric(instance::MLFlow, run_id::String, metric::Metric)::Bool = + logmetric(instance, run_id, metric.key, metric.value, timestamp=metric.timestamp, + step=metric.step) +logmetric(instance::MLFlow, run::Run, metric::Metric)::Bool = + logmetric(instance, run.info.run_id, metric.key, metric.value, + timestamp=metric.timestamp, step=metric.step) """ logbatch(instance::MLFlow, run_id::String; @@ -94,3 +100,32 @@ function loginputs(instance::MLFlow, run_id::String, end loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput})::Bool = loginputs(instance, run.info.run_id, datasets) + +""" + logparam(instance::MLFlow, run_id::String, key::String, value::String) + logparam(instance::MLFlow, run::Run, key::String, value::String) + logparam(instance::MLFlow, run_id::String, param::Param) + logparam(instance::MLFlow, run::Run, param::Param) + +Log a param used for a run. A param is a key-value pair (string key, string +value). Examples include hyperparameters used for ML model training and +constant dates and values used in an ETL pipeline. A param can be logged only +once for a run. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run under which to log the param. +- `key`: Name of the param. +- `value`: String value of the param being logged. +""" +function logparam(instance::MLFlow, run_id::String, key::String, + value::String)::Bool + mlfpost(instance, "runs/log-parameter"; run_id=run_id, key=key, value=value) + return true +end +logparam(instance::MLFlow, run::Run, key::String, value::String)::Bool = + logparam(instance, run.info.run_id, key, value) +logparam(instance::MLFlow, run_id::String, param::Param)::Bool = + logparam(instance, run_id, param.key, param.value) +logparam(instance::MLFlow, run::Run, param::Param)::Bool = + logparam(instance, run.info.run_id, param.key, param.value) diff --git a/test/services/loggers.jl b/test/services/loggers.jl index 9b8a3c8..229e68b 100644 --- a/test/services/loggers.jl +++ b/test/services/loggers.jl @@ -2,9 +2,9 @@ @ensuremlf experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) - run = createrun(mlf, experiment_id) @testset "with run id as string" begin + run = createrun(mlf, experiment_id) logmetric(mlf, run.info.run_id, "missy", 0.9) run = refresh(mlf, run) @@ -13,9 +13,11 @@ @test last_metric isa Metric @test last_metric.key == "missy" @test last_metric.value == 0.9 + deleterun(mlf, run) end @testset "with run" begin + run = createrun(mlf, experiment_id) logmetric(mlf, run, "gala", 0.1) run = refresh(mlf, run) @@ -24,6 +26,37 @@ @test last_metric isa Metric @test last_metric.key == "gala" @test last_metric.value == 0.1 + deleterun(mlf, run) + end + + @testset "with run id as string and metric" begin + run = createrun(mlf, experiment_id) + logmetric(mlf, run.info.run_id, Metric("missy", 0.9, 123, 1)) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + + @test last_metric isa Metric + @test last_metric.key == "missy" + @test last_metric.value == 0.9 + @test last_metric.timestamp == 123 + @test last_metric.step == 1 + deleterun(mlf, run) + end + + @testset "with run and metric" begin + run = createrun(mlf, experiment_id) + logmetric(mlf, run, Metric("gala", 0.1, 123, 1)) + + run = refresh(mlf, run) + last_metric = run.data.metrics |> last + + @test last_metric isa Metric + @test last_metric.key == "gala" + @test last_metric.value == 0.1 + @test last_metric.timestamp == 123 + @test last_metric.step == 1 + deleterun(mlf, run) end deleteexperiment(mlf, experiment_id) @@ -201,3 +234,63 @@ end deleteexperiment(mlf, experiment_id) end + +@testset verbose = true "log param" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + + @testset "with run id as string" begin + run = createrun(mlf, experiment_id) + logparam(mlf, run.info.run_id, "missy", "0.9") + + run = refresh(mlf, run) + last_param = run.data.params |> last + + @test last_param isa Param + @test last_param.key == "missy" + @test last_param.value == "0.9" + deleterun(mlf, run) + end + + @testset "with run" begin + run = createrun(mlf, experiment_id) + logparam(mlf, run, "gala", "0.1") + + run = refresh(mlf, run) + last_param = run.data.params |> last + + @test last_param isa Param + @test last_param.key == "gala" + @test last_param.value == "0.1" + deleterun(mlf, run) + end + + @testset "with run id as string and param" begin + run = createrun(mlf, experiment_id) + logparam(mlf, run.info.run_id, Param("missy", "0.9")) + + run = refresh(mlf, run) + last_param = run.data.params |> last + + @test last_param isa Param + @test last_param.key == "missy" + @test last_param.value == "0.9" + deleterun(mlf, run) + end + + @testset "with run and param" begin + run = createrun(mlf, experiment_id) + logparam(mlf, run, Param("gala", "0.1")) + + run = refresh(mlf, run) + last_param = run.data.params |> last + + @test last_param isa Param + @test last_param.key == "gala" + @test last_param.value == "0.1" + deleterun(mlf, run) + end + + deleteexperiment(mlf, experiment_id) +end From ddde82bf76cdc6803a0a1eae9fad380552c96826 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Fri, 1 Nov 2024 13:40:30 -0500 Subject: [PATCH 13/31] Implementing `getmetrichistory` --- src/MLFlowClient.jl | 4 + src/api.jl | 3 +- src/services/misc.jl | 28 +++++++ test/runtests.jl | 1 + test/services/misc.jl | 19 +++++ test/test_functional.jl | 122 ---------------------------- test/test_runs.jl | 172 ---------------------------------------- 7 files changed, 54 insertions(+), 295 deletions(-) create mode 100644 src/services/misc.jl create mode 100644 test/services/misc.jl delete mode 100644 test/test_functional.jl delete mode 100644 test/test_runs.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 567c535..3054c88 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -79,4 +79,8 @@ export logmetric, logparam +include("services/misc.jl") +export + getmetrichistory + end diff --git a/src/api.jl b/src/api.jl index 5bac07d..501d708 100644 --- a/src/api.jl +++ b/src/api.jl @@ -31,7 +31,8 @@ headers(mlf::MLFlow, custom_headers::AbstractDict) = merge(mlf.headers, custom_h Performs a HTTP GET to a specified endpoint. kwargs are turned into GET params. """ function mlfget(mlf, endpoint; kwargs...) - apiuri = uri(mlf, endpoint; parameters=kwargs |> Dict) + apiuri = uri(mlf, endpoint; + parameters=Dict(k => v for (k, v) in kwargs if v !== missing)) apiheaders = headers(mlf, ("Content-Type" => "application/json") |> Dict) try diff --git a/src/services/misc.jl b/src/services/misc.jl new file mode 100644 index 0000000..e4c10b9 --- /dev/null +++ b/src/services/misc.jl @@ -0,0 +1,28 @@ +""" + getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; + page_token::String="", max_results::Int32=1) + +Get a list of all values for the specified metric for a given run. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run from which to fetch metric values. +- `metric_key`: Name of the metric. +- `page_token`: Token indicating the page of metric history to fetch. +- `max_results`: Maximum number of logged instances of a metric for a run to +return per call. +""" +function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; + page_token::String="", + max_results::Union{Int64, Missing}=missing +)::Tuple{Array{Metric}, Union{String, Nothing}} + result = mlfget(instance, "metrics/get-history"; run_id=run_id, + metric_key=metric_key, page_token=page_token, + max_results= + (ismissing(max_results) ? max_results : (max_results |> Int32))) + + metrics = result["metrics"] |> (x -> [Metric(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return metrics, next_page_token +end diff --git a/test/runtests.jl b/test/runtests.jl index 22b023e..146e94b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,3 +7,4 @@ include("base.jl") include("services/experiment.jl") include("services/run.jl") include("services/loggers.jl") +include("services/misc.jl") diff --git a/test/services/misc.jl b/test/services/misc.jl new file mode 100644 index 0000000..b3f3135 --- /dev/null +++ b/test/services/misc.jl @@ -0,0 +1,19 @@ +@testset verbose = true "get metric history" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + for i in 1:20 + logmetric(mlf, run, "missy", i |> Float64) + end + + @testset "default search" begin + metrics, next_page_token = getmetrichistory(mlf, run.info.run_id, + "missy") + + @test length(metrics) == 20 + @test next_page_token |> isnothing + end + + deleteexperiment(mlf, experiment_id) +end diff --git a/test/test_functional.jl b/test/test_functional.jl deleted file mode 100644 index 400ba06..0000000 --- a/test/test_functional.jl +++ /dev/null @@ -1,122 +0,0 @@ -@testset "MLFlow" begin - mlf = MLFlow() - @test mlf.apiroot == ENV["MLFLOW_TRACKING_URI"] - @test mlf.apiversion == 2.0 - @test mlf.headers == Dict() - mlf = MLFlow("https://localhost:5001/api", apiversion=3.0) - @test mlf.apiroot == "https://localhost:5001/api" - @test mlf.apiversion == 3.0 - @test mlf.headers == Dict() - let custom_headers = Dict("Authorization" => "Bearer EMPTY") - mlf = MLFlow("https://localhost:5001/api", apiversion=3.0, headers=custom_headers) - @test mlf.apiroot == "https://localhost:5001/api" - @test mlf.apiversion == 3.0 - @test mlf.headers == custom_headers - end -end - -# test that sensitive fields are not displayed by show() -@testset "MLFLow/show" begin - let io = IOBuffer(), - secret_token = "SECRET" - - custom_headers = Dict("Authorization" => "Bearer $secret_token") - mlf = MLFlow("https://localhost:5001/api", apiversion=3.0, headers=custom_headers) - @test mlf.apiroot == "https://localhost:5001/api" - @test mlf.apiversion == 3.0 - @test mlf.headers == custom_headers - show(io, mlf) - show_output = String(take!(io)) - @test !(occursin(secret_token, show_output)) - end -end - -@testset "utils" begin - using MLFlowClient: uri, headers - using URIs: URI - - let apiroot = "http://localhost:5001/api", apiversion = 2.0, endpoint = "experiments/get" - mlf = MLFlow(apiroot; apiversion=apiversion) - apiuri = uri(mlf, endpoint) - @test apiuri == URI("$apiroot/$apiversion/mlflow/$endpoint") - end - let apiroot = "http://localhost:5001/api", auth_headers = Dict("Authorization" => "Bearer 123456"), - custom_headers = Dict("Content-Type" => "application/json") - - mlf = MLFlow(apiroot; headers=auth_headers) - apiheaders = headers(mlf, custom_headers) - @test apiheaders == Dict("Authorization" => "Bearer 123456", "Content-Type" => "application/json") - end -end - -@testset "artifacts" begin - @ensuremlf - exp = createexperiment(mlf) - @test isa(exp, MLFlowExperiment) - exprun = createrun(mlf, exp) - @test isa(exprun, MLFlowRun) - # only run the below if artifact_uri is a local directory - # i.e. when running mlflow server as a separate process next to the testset - # when running mlflow in a container, the below tests will be skipped - # this is what happens in github actions - mlflow runs in a container, the artifact_uri is not immediately available, and tests are skipped - artifact_uri = exprun.info.artifact_uri - if isdir(artifact_uri) - @test_throws SystemError logartifact(mlf, exprun, "/etc/shadow") - - tmpfiletoupload = "sometempfilename.txt" - f = open(tmpfiletoupload, "w") - write(f, "samplecontents") - close(f) - artifactpath = logartifact(mlf, exprun, tmpfiletoupload) - @test isfile(artifactpath) - rm(tmpfiletoupload) - artifactpath = logartifact(mlf, exprun, "randbytes.bin", b"some rand bytes here") - @test isfile(artifactpath) - - mkdir(joinpath(artifact_uri, "newdir")) - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "randbytesindir.bin"), b"bytes here") - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "randbytesindir2.bin"), b"bytes here") - mkdir(joinpath(artifact_uri, "newdir", "new2")) - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "randbytesindir.bin"), b"bytes here") - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "randbytesindir2.bin"), b"bytes here") - mkdir(joinpath(artifact_uri, "newdir", "new2", "new3")) - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "randbytesindir.bin"), b"bytes here") - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "randbytesindir2.bin"), b"bytes here") - mkdir(joinpath(artifact_uri, "newdir", "new2", "new3", "new4")) - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "new4", "randbytesindir.bin"), b"bytes here") - artifactpath = logartifact(mlf, exprun, joinpath("newdir", "new2", "new3", "new4", "randbytesindir2.bin"), b"bytes here") - - # artifact tree should now look like this: - # - # ├── newdir - # │   ├── new2 - # │   │   ├── new3 - # │   │   │   ├── new4 - # │   │   │   │   ├── randbytesindir2.bin - # │   │   │   │   └── randbytesindir.bin - # │   │   │   ├── randbytesindir2.bin - # │   │   │   └── randbytesindir.bin - # │   │   ├── randbytesindir2.bin - # │   │   └── randbytesindir.bin - # │   ├── randbytesindir2.bin - # │   └── randbytesindir.bin - # ├── randbytes.bin - # └── sometempfilename.txt - - # 4 directories, 10 files - - artifactlist = listartifacts(mlf, exprun) - @test sort(basename.(get_path.(artifactlist))) == ["newdir", "randbytes.bin", "sometempfilename.txt"] - @test sort(get_size.(artifactlist)) == [0, 14, 20] - - ald2 = listartifacts(mlf, exprun, maxdepth=2) - @test length(ald2) == 6 - @test sort(basename.(get_path.(ald2))) == ["new2", "newdir", "randbytes.bin", "randbytesindir.bin", "randbytesindir2.bin", "sometempfilename.txt"] - aldrecursion = listartifacts(mlf, exprun, maxdepth=-1) - @test length(aldrecursion) == 14 # 4 directories, 10 files - @test sum(typeof.(aldrecursion) .== MLFlowArtifactDirInfo) == 4 # 4 directories - @test sum(typeof.(aldrecursion) .== MLFlowArtifactFileInfo) == 10 # 10 files - end - deleterun(mlf, exprun) - deleteexperiment(mlf, exp) -end diff --git a/test/test_runs.jl b/test/test_runs.jl deleted file mode 100644 index 06adae1..0000000 --- a/test/test_runs.jl +++ /dev/null @@ -1,172 +0,0 @@ -@testset verbose = true "createrun" begin - @ensuremlf - expname = "createrun-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - - function runtests(run) - @test isa(run, MLFlowRun) - @test run.info.run_name == runname - end - - @testset "createrun_by_experiment_id" begin - r = createrun(mlf, e.experiment_id; run_name=runname) - runtests(r) - end - - @testset "createrun_by_experiment_type" begin - r = createrun(mlf, e; run_name=runname) - runtests(r) - end - - deleteexperiment(mlf, e) -end - -@testset "getrun" begin - @ensuremlf - expname = "getrun-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id; run_name=runname) - - retrieved_r = getrun(mlf, r.info.run_id) - - @test isa(retrieved_r, MLFlowRun) - @test retrieved_r.info.run_id == r.info.run_id - deleteexperiment(mlf, e) -end - -@testset verbose = true "updaterun" begin - @ensuremlf - expname = "updaterun-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id; run_name=runname) - - new_runname = "new_updaterun-$(UUIDs.uuid4())" - new_status = "FINISHED" - new_status_using_type = MLFlowRunStatus("FINISHED") - - function runtests(run_updated) - @test isa(run_updated, MLFlowRun) - @test run_updated.info.run_name != r.info.run_name - @test run_updated.info.status.status != r.info.status - @test run_updated.info.run_name == new_runname - @test run_updated.info.status.status == new_status - end - - @testset "updaterun_by_run_id" begin - r_updated = updaterun(mlf, r.info.run_id, new_status; run_name=new_runname) - runtests(r_updated) - end - @testset "updaterun_by_run_info" begin - r_updated = updaterun(mlf, r.info, new_status; run_name=new_runname) - runtests(r_updated) - end - @testset "updaterun_byrun" begin - r_updated = updaterun(mlf, r, new_status; run_name=new_runname) - runtests(r_updated) - end - - @testset "updaterun_by_run_info_and_defined_status" begin - r_updated = updaterun(mlf, r.info, new_status_using_type; run_name=new_runname) - runtests(r_updated) - end - @testset "updaterun_by_run_and_defined_status" begin - r_updated = updaterun(mlf, r, new_status_using_type; run_name=new_runname) - runtests(r_updated) - end - - deleteexperiment(mlf, e) -end - -@testset verbose = true "deleterun" begin - @ensuremlf - expname = "deleterun-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - - function runtests(run) - @test deleterun(mlf, run) - end - - @testset "deleterun_by_run_info" begin - r = createrun(mlf, e.experiment_id) - runtests(r.info) - end - - @testset "deleterun_by_run" begin - r = createrun(mlf, e.experiment_id) - runtests(r) - end - - deleteexperiment(mlf, e) -end - -@testset verbose = true "searchruns" begin - @ensuremlf - getexpname() = "searchruns-$(UUIDs.uuid4())" - e1 = getorcreateexperiment(mlf, getexpname()) - e2 = getorcreateexperiment(mlf, getexpname()) - - run_array1 = MLFlowRun[] - run_array2 = MLFlowRun[] - run_status = ["FINISHED", "FINISHED", "FAILED"] - failed_runs = 0 - - function addruns!(run_array, experiment, run_status) - for status in run_status - run = createrun(mlf, experiment.experiment_id) - run = updaterun(mlf, run, status) - if status == "FAILED" - logparam(mlf, run, "test", "failed") - failed_runs += 1 - else - logparam(mlf, run, "test", "test") - end - push!(run_array, run) - end - end - - addruns!(run_array1, e1, run_status) - addruns!(run_array2, e2, run_status) - - @testset "searchruns_by_experiment_id" begin - runs = searchruns(mlf, e1.experiment_id) - @test runs |> length == run_array1 |> length - end - - @testset "searchruns_by_experiment" begin - runs = searchruns(mlf, e1) - @test runs |> length == run_array1 |> length - end - - @testset "searchruns_by_experiments_array" begin - runs = searchruns(mlf, [e1, e2]) - @test runs |> length == (run_array1 |> length) + (run_array2 |> length) - end - - @testset "searchruns_by_filter" begin - runs = searchruns(mlf, [e1, e2]; filter="param.test = \"failed\"") - @test failed_runs == runs |> length - end - - @testset "searchruns_by_filter_params" begin - runs = searchruns(mlf, [e1, e2]; filter_params=Dict("test" => "failed")) - @test failed_runs == runs |> length - end - - @testset "searchruns_filter_exception" begin - @test_throws ErrorException searchruns(mlf, [e1, e2]; filter="test", filter_params=Dict("test" => "test")) - end - - @testset "runs_get_methods" begin - runs = searchruns(mlf, [e1, e2]; filter_params=Dict("test" => "failed")) - @test get_info(runs[1]) == runs[1].info - @test get_data(runs[1]) == runs[1].data - @test get_run_id(runs[1]) == runs[1].info.run_id - @test get_params(runs[1]) == runs[1].data.params - end - - deleteexperiment(mlf, e1) - deleteexperiment(mlf, e2) -end From a6579ec31cc842776f123aad8e76fb8edd0b2576 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Fri, 1 Nov 2024 18:46:17 -0500 Subject: [PATCH 14/31] Implementing `searchruns` --- src/MLFlowClient.jl | 1 + src/services/experiment.jl | 5 +++-- src/services/run.jl | 45 ++++++++++++++++++++++++++++++++++++++ src/types/mlflow.jl | 11 ++++------ test/services/loggers.jl | 12 ++++++---- test/services/run.jl | 42 +++++++++++++++++++++++++++++++---- 6 files changed, 99 insertions(+), 17 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 3054c88..7adb4df 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -70,6 +70,7 @@ export deleterun, setruntag, restorerun, + searchruns, deleteruntag include("services/loggers.jl") diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 4793c77..95d98b1 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -168,7 +168,8 @@ is the default. unspecified, return only active experiments. # Returns -- vector of [`MLFlowExperiment`](@ref) experiments that were found in the MLFlow instance +- Vector of [`Experiment`](@ref) that were found in the MLFlow instance. +- The next page token if there are more results. """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], @@ -183,7 +184,7 @@ function searchexperiments(instance::MLFlow; max_results::Int64=20000, result = mlfget(instance, "experiments/search"; parameters...) - experiments = result["experiments"] |> (x -> [Experiment(y) for y in x]) + experiments = get(result, "experiments", []) |> (x -> [Experiment(y) for y in x]) next_page_token = get(result, "next_page_token", nothing) return experiments, next_page_token diff --git a/src/services/run.jl b/src/services/run.jl index 8cabd70..c275944 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -141,3 +141,48 @@ function deleteruntag(instance::MLFlow, run_id::String, key::String)::Bool end deleteruntag(instance::MLFlow, run::Run, key::String)::Bool = deleteruntag(instance, run.info.run_id, key) + +""" + searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], + filter::String="", run_view_type::ViewType=ACTIVE_ONLY, + max_results::Int=1000, order_by::Array{String}=String[], + page_token::String="") + +Search for runs that satisfy expressions. Search expressions can use Metric and +Param keys. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_ids`: List of experiment IDs to search over. +- `filter`: A filter expression over params, metrics, and tags, that allows +returning a subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). +- `run_view_type`: Whether to display only active, only deleted, or all runs. +Defaults to only active runs. +- `max_results`: Maximum number of runs desired. +- `order_by`: List of columns to be ordered by, including attributes, params, +metrics, and tags with an optional “DESC” or “ASC” annotation, where “ASC” is +the default. +- `page_token`: Token indicating the page of runs to fetch. + +# Returns +- Vector of [`Run`](@ref) that were found in the specified experiments. +- The next page token if there are more results. +""" +function searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], + filter::String="", run_view_type::ViewType=ACTIVE_ONLY, + max_results::Int=1000, order_by::Array{String}=String[], + page_token::String="")::Tuple{Array{Run}, Union{String, Nothing}} + parameters = (; experiment_ids, filter, + :run_view_type => run_view_type |> Integer, max_results, page_token) + + if order_by |> !isempty + parameters = (; order_by, parameters...) + end + + result = mlfpost(instance, "runs/search"; parameters...) + + runs = get(result, "runs", []) |> (x -> [Run(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return runs, next_page_token +end diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index 34604c8..1f9a7fa 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -32,13 +32,10 @@ struct MLFlow headers::Dict end MLFlow(apiroot; apiversion=2.0, headers=Dict()) = MLFlow(apiroot, apiversion, headers) -function MLFlow() - apiroot = "http://localhost:5000/api" - if haskey(ENV, "MLFLOW_TRACKING_URI") - apiroot = ENV["MLFLOW_TRACKING_URI"] - end - return MLFlow(apiroot) -end +MLFlow(; apiroot="http://localhost:5000/api", apiversion=2.0, headers=Dict()) = + MLFlow((haskey(ENV, "MLFLOW_TRACKING_URI") ? + ENV["MLFLOW_TRACKING_URI"] : apiroot), apiversion, headers) + Base.show(io::IO, t::MLFlow) = show(io, ShowCase(t, [:apiroot,:apiversion], new_lines=true)) abstract type LoggingData end diff --git a/test/services/loggers.jl b/test/services/loggers.jl index 229e68b..4430673 100644 --- a/test/services/loggers.jl +++ b/test/services/loggers.jl @@ -102,7 +102,8 @@ end run = refresh(mlf, run) last_metric = run.data.metrics |> last last_param = run.data.params |> last - last_tag = run.data.tags |> first + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] @test last_metric isa Metric @test last_metric.key == "ana" @@ -127,7 +128,8 @@ end run = refresh(mlf, run) last_metric = run.data.metrics |> last last_param = run.data.params |> last - last_tag = run.data.tags |> first + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] @test last_metric isa Metric @test last_metric.key == "ana" @@ -152,7 +154,8 @@ end run = refresh(mlf, run) last_metric = run.data.metrics |> last last_param = run.data.params |> last - last_tag = run.data.tags |> first + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] @test last_metric isa Metric @test last_metric.key == "ana" @@ -179,7 +182,8 @@ end run = refresh(mlf, run) last_metric = run.data.metrics |> last last_param = run.data.params |> last - last_tag = run.data.tags |> first + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] @test last_metric isa Metric @test last_metric.key == "ana" diff --git a/test/services/run.jl b/test/services/run.jl index 19f74b6..19b2a03 100644 --- a/test/services/run.jl +++ b/test/services/run.jl @@ -102,8 +102,10 @@ end run = refresh(mlf, run) @test run.data.tags |> !isempty - @test (run.data.tags |> first).key == "tag" - @test (run.data.tags |> first).value == "value" + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] + @test last_tag.key == "tag" + @test last_tag.value == "value" deleterun(mlf, run) end @@ -115,8 +117,10 @@ end run = refresh(mlf, run) @test run.data.tags |> !isempty - @test (run.data.tags |> first).key == "tag" - @test (run.data.tags |> first).value == "value" + last_tag = run.data.tags[ + findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] + @test last_tag.key == "tag" + @test last_tag.value == "value" deleterun(mlf, run) end @@ -151,3 +155,33 @@ end end deleteexperiment(mlf, experiment_id) end + +@testset verbose = true "search runs" begin + @ensuremlf + + experiment_ids = [ + createexperiment(mlf, UUIDs.uuid4() |> string), + createexperiment(mlf, UUIDs.uuid4() |> string), + ] + for experiment_id in experiment_ids + createrun(mlf, experiment_id) + end + + @testset "default search" begin + runs, next_page_token = searchruns(mlf; experiment_ids=experiment_ids) + + @test length(runs) == 2 + @test next_page_token |> isnothing + end + + @testset "with pagination" begin + runs, next_page_token = searchruns(mlf; experiment_ids=experiment_ids, + max_results=1) + + @test length(runs) == 1 + @test next_page_token |> !isnothing + @test next_page_token isa String + end + + experiment_ids .|> (id -> deleteexperiment(mlf, id)) +end From 1552bb3be6f1b770b2bcda26acd811d66ea1df97 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Sat, 2 Nov 2024 02:19:10 -0500 Subject: [PATCH 15/31] Some formatting changes and extra functions using types --- src/MLFlowClient.jl | 51 ++++---------------- src/api.jl | 12 ++--- src/services/experiment.jl | 91 ++++++++++++++--------------------- src/services/loggers.jl | 59 ++++++++++------------- src/services/misc.jl | 25 +++++++--- src/services/run.jl | 64 ++++++++++++------------ src/types/dataset.jl | 13 +++-- src/types/enums.jl | 4 +- src/types/experiment.jl | 14 +++--- src/types/mlflow.jl | 10 ++-- src/types/model_version.jl | 25 +++++----- src/types/registered_model.jl | 11 ++--- src/types/run.jl | 26 +++++----- src/utils.jl | 35 +++++--------- test/services/misc.jl | 3 +- 15 files changed, 190 insertions(+), 253 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 7adb4df..67c6894 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -15,15 +15,10 @@ include("types/tag.jl") export Tag include("types/enums.jl") -export - ViewType, - RunStatus, - ModelVersionStatus +export ViewType, RunStatus, ModelVersionStatus include("types/dataset.jl") -export - Dataset, - DatasetInput +export Dataset, DatasetInput include("types/artifact.jl") export FileInfo @@ -32,56 +27,30 @@ include("types/model_version.jl") export ModelVersion include("types/registered_model.jl") -export - RegisteredModel, - RegisteredModelAlias +export RegisteredModel, RegisteredModelAlias include("types/experiment.jl") export Experiment include("types/run.jl") -export - Run, - Param, - Metric, - RunData, - RunInfo, - RunInputs +export Run, Param, Metric, RunData, RunInfo, RunInputs include("utils.jl") export refresh + include("api.jl") include("services/experiment.jl") -export - getexperiment, - createexperiment, - deleteexperiment, - setexperimenttag, - updateexperiment, - restoreexperiment, - searchexperiments, - getexperimentbyname +export getexperiment, createexperiment, deleteexperiment, setexperimenttag, + updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname include("services/run.jl") -export - getrun, - createrun, - deleterun, - setruntag, - restorerun, - searchruns, - deleteruntag +export getrun, createrun, deleterun, setruntag, restorerun, searchruns, deleteruntag include("services/loggers.jl") -export - logbatch, - loginputs, - logmetric, - logparam +export logbatch, loginputs, logmetric, logparam include("services/misc.jl") -export - getmetrichistory +export getmetrichistory end diff --git a/src/api.jl b/src/api.jl index 501d708..a8ff985 100644 --- a/src/api.jl +++ b/src/api.jl @@ -10,8 +10,7 @@ MLFlowClient.uri(mlf, "experiments/get", Dict(:experiment_id=>10)) """ uri(mlf::MLFlow, endpoint::String; parameters::Dict{Symbol, <:Any}=Dict{Symbol, NumberOrString}()) = - URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)"; - query=parameters) + URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)"; query=parameters) """ headers(mlf::MLFlow,custom_headers::AbstractDict) @@ -40,8 +39,7 @@ function mlfget(mlf, endpoint; kwargs...) return response.body |> String |> JSON.parse catch e error_response = e.response.body |> String |> JSON.parse - error_message = - "$(error_response["error_code"]) - $(error_response["message"])" + error_message = "$(error_response["error_code"]) - $(error_response["message"])" @error error_message throw(ErrorException(error_message)) end @@ -50,7 +48,8 @@ end """ mlfpost(mlf, endpoint; kwargs...) -Performs a HTTP POST to the specified endpoint. kwargs are converted to JSON and become the POST body. +Performs a HTTP POST to the specified endpoint. kwargs are converted to JSON and become the +POST body. """ function mlfpost(mlf, endpoint; kwargs...) apiuri = uri(mlf, endpoint;) @@ -62,8 +61,7 @@ function mlfpost(mlf, endpoint; kwargs...) return response.body |> String |> JSON.parse catch e error_response = e.response.body |> String |> JSON.parse - error_message = - "$(error_response["error_code"]) - $(error_response["message"])" + error_message = "$(error_response["error_code"]) - $(error_response["message"])" @error error_message throw(ErrorException(error_message)) end diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 95d98b1..1233d48 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -1,11 +1,10 @@ """ - createexperiment(instance::MLFlow, name::String; - artifact_location::String="", + createexperiment(instance::MLFlow, name::String; artifact_location::String="", tags::Union{Dict{<:Any}, Array{<:Any}}=[]) -Create an experiment with a name. Returns the newly created experiment. -Validates that another experiment with the same name does not already exist and -fails if another experiment with the same name already exists. +Create an experiment with a name. Returns the newly created experiment. Validates that +another experiment with the same name does not already exist and fails if another +experiment with the same name already exists. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -51,9 +50,9 @@ getexperiment(instance::MLFlow, experiment_id::Integer)::Experiment = Get metadata for an experiment. -This endpoint will return deleted experiments, but prefers the active -experiment if an active and deleted experiment share the same name. If multiple -deleted experiments share the same name, the API will return one of them. +This endpoint will return deleted experiments, but prefers the active experiment if an +active and deleted experiment share the same name. If multiple deleted experiments share +the same name, the API will return one of them. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -62,10 +61,8 @@ deleted experiments share the same name, the API will return one of them. # Returns An instance of type [`Experiment`](@ref). """ -function getexperimentbyname(instance::MLFlow, - experiment_name::String)::Experiment - result = mlfget(instance, "experiments/get-by-name"; - experiment_name=experiment_name) +function getexperimentbyname(instance::MLFlow, experiment_name::String)::Experiment + result = mlfget(instance, "experiments/get-by-name"; experiment_name=experiment_name) return result["experiment"] |> Experiment end @@ -74,9 +71,8 @@ end deleteexperiment(instance::MLFlow, experiment_id::Integer) deleteexperiment(instance::MLFlow, experiment::Experiment) -Mark an experiment and associated metadata, runs, metrics, params, and tags for -deletion. If the experiment uses FileStore, artifacts associated with -experiment are also deleted. +Mark an experiment and associated metadata, runs, metrics, params, and tags for deletion. +If the experiment uses FileStore, artifacts associated with experiment are also deleted. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -99,9 +95,9 @@ deleteexperiment(instance::MLFlow, experiment::Experiment)::Bool = restoreexperiment(instance::MLFlow, experiment_id::Integer) restoreexperiment(instance::MLFlow, experiment::Experiment) -Restore an experiment marked for deletion. This also restores associated -metadata, runs, metrics, params, and tags. If experiment uses FileStore, -underlying artifacts associated with experiment are also restored. +Restore an experiment marked for deletion. This also restores associated metadata, runs, +metrics, params, and tags. If experiment uses FileStore, underlying artifacts associated +with experiment are also restored. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -121,51 +117,43 @@ restoreexperiment(instance::MLFlow, experiment::Experiment)::Bool = """ updateexperiment(instance::MLFlow, experiment_id::String, new_name::String) - updateexperiment(instance::MLFlow, experiment_id::Integer, - new_name::String) - updateexperiment(instance::MLFlow, experiment::Experiment, - new_name::String) + updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) + updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) Update experiment metadata. # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `experiment_id`: ID of the associated experiment. -- `new_name`: If provided, the experiment’s name is changed to the new name. -The new name must be unique. +- `new_name`: If provided, the experiment’s name is changed to the new name. The new name +must be unique. # Returns `true` if successful. Otherwise, raises exception. """ -function updateexperiment(instance::MLFlow, experiment_id::String, - new_name::String)::Bool - mlfpost(instance, "experiments/update"; experiment_id=experiment_id, - new_name=new_name) +function updateexperiment(instance::MLFlow, experiment_id::String, new_name::String)::Bool + mlfpost(instance, "experiments/update"; experiment_id=experiment_id, new_name=new_name) return true end -updateexperiment(instance::MLFlow, experiment_id::Integer, - new_name::String)::Bool = +updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String)::Bool = updateexperiment(instance, string(experiment_id), new_name) -updateexperiment(instance::MLFlow, experiment::Experiment, - new_name::String)::Bool = +updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String)::Bool = updateexperiment(instance, experiment.experiment_id, new_name) """ - searchexperiments(instance::MLFlow; max_results::Int64=20000, - page_token::String="", filter::String="", order_by::Array{String}=[], - view_type::ViewType=ACTIVE_ONLY) + searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", + filter::String="", order_by::Array{String}=[], view_type::ViewType=ACTIVE_ONLY) # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `max_results`: Maximum number of experiments desired. - `page_token`: Token indicating the page of experiments to fetch. -- `filter`: A filter expression over experiment attributes and tags that allows -returning a subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). -- `order_by`: List of columns for ordering search results, which can include -experiment name and id with an optional “DESC” or “ASC” annotation, where “ASC” -is the default. -- `view_type`: Qualifier for type of experiments to be returned. If -unspecified, return only active experiments. +- `filter`: A filter expression over experiment attributes and tags that allows returning a +subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). +- `order_by`: List of columns for ordering search results, which can include experiment +name and id with an optional “DESC” or “ASC” annotation, where “ASC” is the default. +- `view_type`: Qualifier for type of experiments to be returned. If unspecified, return +only active experiments. # Returns - Vector of [`Experiment`](@ref) that were found in the MLFlow instance. @@ -173,10 +161,8 @@ unspecified, return only active experiments. """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], - view_type::ViewType=ACTIVE_ONLY -)::Tuple{Array{Experiment}, Union{String, Nothing}} - parameters = (; max_results, page_token, filter, - :view_type => view_type |> Integer) + view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment}, Union{String, Nothing}} + parameters = (; max_results, page_token, filter, :view_type => view_type |> Integer) if order_by |> !isempty parameters = (; order_by, parameters...) @@ -191,12 +177,9 @@ function searchexperiments(instance::MLFlow; max_results::Int64=20000, end """ - setexperimenttag(instance::MLFlow, experiment_id::String, key::String, - value::String) - setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, - value::String) - setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, - value::String) + setexperimenttag(instance::MLFlow, experiment_id::String, key::String, value::String) + setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, value::String) + setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, value::String) Set a tag on an experiment. Experiment tags are metadata that can be updated. @@ -210,8 +193,8 @@ Set a tag on an experiment. Experiment tags are metadata that can be updated. """ function setexperimenttag(instance::MLFlow, experiment_id::String, key::String, value::String)::Bool - mlfpost(instance, "experiments/set-experiment-tag"; - experiment_id=experiment_id, key=key, value=value) + mlfpost(instance, "experiments/set-experiment-tag"; experiment_id=experiment_id, + key=key, value=value) return true end setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, diff --git a/src/services/loggers.jl b/src/services/loggers.jl index 2dc0a39..ecab95a 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -6,9 +6,9 @@ timestamp::Int64=round(Int, now() |> datetime2unix), step::Union{Int64, Missing}=missing) -Log a metric for a run. A metric is a key-value pair (string key, float value) -with an associated timestamp. Examples include the various metrics that -represent ML model accuracy. A metric can be logged multiple times. +Log a metric for a run. A metric is a key-value pair (string key, float value) with an +associated timestamp. Examples include the various metrics that represent ML model +accuracy. A metric can be logged multiple times. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -21,8 +21,8 @@ represent ML model accuracy. A metric can be logged multiple times. # Returns `true` if successful. Otherwise, raises exception. """ -function logmetric(instance::MLFlow, run_id::String, key::String, - value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), +function logmetric(instance::MLFlow, run_id::String, key::String, value::Float64; + timestamp::Int64=round(Int, now() |> datetime2unix), step::Union{Int64, Missing}=missing)::Bool mlfpost(instance, "runs/log-metric"; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) @@ -31,8 +31,7 @@ end logmetric(instance::MLFlow, run::Run, key::String, value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), step::Union{Int64, Missing}=missing)::Bool = - logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, - step=step) + logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) logmetric(instance::MLFlow, run_id::String, metric::Metric)::Bool = logmetric(instance, run_id, metric.key, metric.value, timestamp=metric.timestamp, step=metric.step) @@ -41,14 +40,13 @@ logmetric(instance::MLFlow, run::Run, metric::Metric)::Bool = timestamp=metric.timestamp, step=metric.step) """ - logbatch(instance::MLFlow, run_id::String; - metrics::MLFlowUpsertData{Metric}, params::MLFlowUpsertData{Param}, - tags::MLFlowUpsertData{Tag}) + logbatch(instance::MLFlow, run_id::String; metrics::MLFlowUpsertData{Metric}, + params::MLFlowUpsertData{Param}, tags::MLFlowUpsertData{Tag}) logbatch(instance::MLFlow, run::Run; metrics::Array{Metric}, params::MLFlowUpsertData{Param}, tags::MLFlowUpsertData{Tag}) -Log a batch of metrics, params, and tags for a run. In case of error, partial -data may be written. +Log a batch of metrics, params, and tags for a run. In case of error, partial data may be +written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). @@ -59,27 +57,22 @@ For more information about this function, check [MLFlow official documentation]( - `params`: Params to log. - `tags`: Tags to log. -**Note**: A single request can contain up to 1000 metrics, and up to 1000 -metrics, params, and tags in total. +**Note**: A single request can contain up to 1000 metrics, and up to 1000 metrics, params, +and tags in total. # Returns `true` if successful. Otherwise, raises exception. """ function logbatch(instance::MLFlow, run_id::String; - metrics::MLFlowUpsertData{Metric}=Metric[], - params::MLFlowUpsertData{Param}=Param[], + metrics::MLFlowUpsertData{Metric}=Metric[], params::MLFlowUpsertData{Param}=Param[], tags::MLFlowUpsertData{Tag}=Tag[])::Bool - mlfpost(instance, "runs/log-batch"; run_id=run_id, - metrics=parse(Metric, metrics), params=parse(Param, params), - tags=parse(Tag, tags)) + mlfpost(instance, "runs/log-batch"; run_id=run_id, metrics=parse(Metric, metrics), + params=parse(Param, params), tags=parse(Tag, tags)) return true end -logbatch(instance::MLFlow, run::Run; - metrics::MLFlowUpsertData{Metric}=Metric[], - params::MLFlowUpsertData{Param}=Param[], - tags::MLFlowUpsertData{Tag}=Tag[])::Bool = - logbatch(instance, run.info.run_id; metrics=metrics, params=params, - tags=tags) +logbatch(instance::MLFlow, run::Run; metrics::MLFlowUpsertData{Metric}=Metric[], + params::MLFlowUpsertData{Param}=Param[], tags::MLFlowUpsertData{Tag}=Tag[])::Bool = + logbatch(instance, run.info.run_id; metrics=metrics, params=params, tags=tags) """ loginputs(instance::MLFlow, run_id::String; datasets::Array{DatasetInput}) @@ -93,8 +86,7 @@ logbatch(instance::MLFlow, run::Run; # Returns `true` if successful. Otherwise, raises exception. """ -function loginputs(instance::MLFlow, run_id::String, - datasets::Array{DatasetInput})::Bool +function loginputs(instance::MLFlow, run_id::String, datasets::Array{DatasetInput})::Bool mlfpost(instance, "runs/log-inputs"; run_id=run_id, datasets=datasets) return true end @@ -107,19 +99,20 @@ loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput})::Bool = logparam(instance::MLFlow, run_id::String, param::Param) logparam(instance::MLFlow, run::Run, param::Param) -Log a param used for a run. A param is a key-value pair (string key, string -value). Examples include hyperparameters used for ML model training and -constant dates and values used in an ETL pipeline. A param can be logged only -once for a run. +Log a param used for a run. A param is a key-value pair (string key, string value). +Examples include hyperparameters used for ML model training and constant dates and values +used in an ETL pipeline. A param can be logged only once for a run. # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `run_id`: ID of the run under which to log the param. - `key`: Name of the param. - `value`: String value of the param being logged. + +# Returns +`true` if successful. Otherwise, raises exception. """ -function logparam(instance::MLFlow, run_id::String, key::String, - value::String)::Bool +function logparam(instance::MLFlow, run_id::String, key::String, value::String)::Bool mlfpost(instance, "runs/log-parameter"; run_id=run_id, key=key, value=value) return true end diff --git a/src/services/misc.jl b/src/services/misc.jl index e4c10b9..7be2137 100644 --- a/src/services/misc.jl +++ b/src/services/misc.jl @@ -11,18 +11,31 @@ Get a list of all values for the specified metric for a given run. - `page_token`: Token indicating the page of metric history to fetch. - `max_results`: Maximum number of logged instances of a metric for a run to return per call. + +# Returns +- A list of all metric historical values for the specified metric in the +specified run. +- The next page token if there are more results. """ function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; - page_token::String="", - max_results::Union{Int64, Missing}=missing + page_token::String="", max_results::Union{Int64, Missing}=missing )::Tuple{Array{Metric}, Union{String, Nothing}} - result = mlfget(instance, "metrics/get-history"; run_id=run_id, - metric_key=metric_key, page_token=page_token, - max_results= - (ismissing(max_results) ? max_results : (max_results |> Int32))) + result = mlfget(instance, "metrics/get-history"; run_id=run_id, metric_key=metric_key, + page_token=page_token, + max_results=(ismissing(max_results) ? max_results : (max_results |> Int32))) metrics = result["metrics"] |> (x -> [Metric(y) for y in x]) next_page_token = get(result, "next_page_token", nothing) return metrics, next_page_token end +getmetrichistory(instance::MLFlow, run::Run, metric_key::String; page_token::String="", + max_results::Union{Int64, Missing}=missing +)::Tuple{Array{Metric}, Union{String, Nothing}} = + getmetrichistory(instance, run.info.run_id, metric_key; page_token=page_token, + max_results=max_results) +getmetrichistory(instance::MLFlow, run::Run, metric::Metric; page_token::String="", + max_results::Union{Int64, Missing}=missing +)::Tuple{Array{Metric}, Union{String, Nothing}} = + getmetrichistory(instance, run.info.run_id, metric.key; page_token=page_token, + max_results=max_results) diff --git a/src/services/run.jl b/src/services/run.jl index c275944..2842b18 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -4,8 +4,8 @@ start_time::Union{Int64, Missing}=missing, tags::Union{Dict{<:Any}, Array{<:Any}}=[]) -Create a new run within an experiment. A run is usually a single execution of a -machine learning or data ETL pipeline. +Create a new run within an experiment. A run is usually a single execution of a machine +learning or data ETL pipeline. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -18,22 +18,19 @@ machine learning or data ETL pipeline. An instance of type [`Run`](@ref). """ function createrun(instance::MLFlow, experiment_id::String; - run_name::Union{String, Missing}=missing, - start_time::Union{Int64, Missing}=missing, + run_name::Union{String, Missing}=missing, start_time::Union{Int64, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) return result["run"] |> Run end createrun(instance::MLFlow, experiment_id::Integer; - run_name::Union{String, Missing}=missing, - start_time::Union{Integer, Missing}=missing, + run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run = - createrun(instance, string(experiment_id); run_name=run_name, - start_time=start_time, tags=tags) + createrun(instance, string(experiment_id); run_name=run_name, start_time=start_time, + tags=tags) createrun(instance::MLFlow, experiment::Experiment; - run_name::Union{String, Missing}=missing, - start_time::Union{Integer, Missing}=missing, + run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run = createrun(instance, string(experiment.experiment_id); run_name=run_name, start_time=start_time, tags=tags) @@ -81,10 +78,9 @@ restorerun(instance::MLFlow, run::Run)::Bool = """ getrun(instance::MLFlow, run_id::String) -Get metadata, metrics, params, and tags for a run. In the case where multiple -metrics with the same key are logged for a run, return only the value with the -latest timestamp. If there are multiple values with the latest timestamp, -return the maximum of these values. +Get metadata, metrics, params, and tags for a run. In the case where multiple metrics with +the same key are logged for a run, return only the value with the latest timestamp. If +there are multiple values with the latest timestamp, return the maximum of these values. # Arguments - `instance`: [`MLFlow`](@ref) configuration. @@ -101,6 +97,7 @@ end """ setruntag(instance::MLFlow, run_id::String, key::String, value::String) setruntag(instance::MLFlow, run::Run, key::String, value::String) + setruntag(instance::MLFlow, run::Run, tag::Tag) Set a tag on a run. @@ -113,17 +110,19 @@ Set a tag on a run. # Returns `true` if successful. Otherwise, raises exception. """ -function setruntag(instance::MLFlow, run_id::String, key::String, - value::String):Bool +function setruntag(instance::MLFlow, run_id::String, key::String, value::String):Bool mlfpost(instance, "runs/set-tag"; run_id=run_id, key=key, value=value) return true end setruntag(instance::MLFlow, run::Run, key::String, value::String)::Bool = setruntag(instance, run.info.run_id, key, value) +setruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = + setruntag(instance, run.info.run_id, tag.key, tag.value) """ deletetag(instance::MLFlow, run_id::String, key::String) deletetag(instance::MLFlow, run::Run, key::String) + deletetag(instance::MLFlow, run::Run, tag::Tag) Delete a tag on a run. @@ -141,27 +140,26 @@ function deleteruntag(instance::MLFlow, run_id::String, key::String)::Bool end deleteruntag(instance::MLFlow, run::Run, key::String)::Bool = deleteruntag(instance, run.info.run_id, key) +deleteruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = + deleteruntag(instance, run.info.run_id, tag.key) """ - searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], - filter::String="", run_view_type::ViewType=ACTIVE_ONLY, - max_results::Int=1000, order_by::Array{String}=String[], - page_token::String="") + searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], filter::String="", + run_view_type::ViewType=ACTIVE_ONLY, max_results::Int=1000, + order_by::Array{String}=String[], page_token::String="") -Search for runs that satisfy expressions. Search expressions can use Metric and -Param keys. +Search for runs that satisfy expressions. Search expressions can use Metric and Param keys. # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `experiment_ids`: List of experiment IDs to search over. -- `filter`: A filter expression over params, metrics, and tags, that allows -returning a subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). -- `run_view_type`: Whether to display only active, only deleted, or all runs. -Defaults to only active runs. +- `filter`: A filter expression over params, metrics, and tags, that allows returning a +subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). +- `run_view_type`: Whether to display only active, only deleted, or all runs. Defaults to +only active runs. - `max_results`: Maximum number of runs desired. -- `order_by`: List of columns to be ordered by, including attributes, params, -metrics, and tags with an optional “DESC” or “ASC” annotation, where “ASC” is -the default. +- `order_by`: List of columns to be ordered by, including attributes, params, metrics, and +tags with an optional “DESC” or “ASC” annotation, where “ASC” is the default. - `page_token`: Token indicating the page of runs to fetch. # Returns @@ -169,11 +167,11 @@ the default. - The next page token if there are more results. """ function searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], - filter::String="", run_view_type::ViewType=ACTIVE_ONLY, - max_results::Int=1000, order_by::Array{String}=String[], + filter::String="", run_view_type::ViewType=ACTIVE_ONLY, max_results::Int=1000, + order_by::Array{String}=String[], page_token::String="")::Tuple{Array{Run}, Union{String, Nothing}} - parameters = (; experiment_ids, filter, - :run_view_type => run_view_type |> Integer, max_results, page_token) + parameters = (; experiment_ids, filter, :run_view_type => run_view_type |> Integer, + max_results, page_token) if order_by |> !isempty parameters = (; order_by, parameters...) diff --git a/src/types/dataset.jl b/src/types/dataset.jl index 37380ed..7843b2a 100644 --- a/src/types/dataset.jl +++ b/src/types/dataset.jl @@ -1,8 +1,8 @@ """ Dataset -Represents a reference to data used for training, testing, or evaluation during -the model development process. +Represents a reference to data used for training, testing, or evaluation during the model +development process. # Fields - `name::String`: The name of the dataset. @@ -24,9 +24,9 @@ struct Dataset schema::Union{String, Nothing} profile::Union{String, Nothing} end -Dataset(data::Dict{String, Any}) = Dataset( - data["name"], data["digest"], data["source_type"], data["source"], - get(data, "schema", nothing), get(data, "profile", nothing)) +Dataset(data::Dict{String, Any}) = Dataset(data["name"], data["digest"], + data["source_type"], data["source"], get(data, "schema", nothing), + get(data, "profile", nothing)) Base.show(io::IO, t::Dataset) = show(io, ShowCase(t, new_lines=true)) """ @@ -47,6 +47,5 @@ struct DatasetInput dataset::Dataset end DatasetInput(data::Dict{String, Any}) = DatasetInput( - [Tag(tag) for tag in get(data, "tags", [])], - Dataset(data["dataset"])) + [Tag(tag) for tag in get(data, "tags", [])], Dataset(data["dataset"])) Base.show(io::IO, t::DatasetInput) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/enums.jl b/src/types/enums.jl index fc2cbd5..4c2cb69 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -2,8 +2,8 @@ ModelVersionStatus # Members -- `PENDING_REGISTRATION`: Request to register a new model version is pending as -server performs background tasks. +- `PENDING_REGISTRATION`: Request to register a new model version is pending as server +performs background tasks. - `FAILED_REGISTRATION`: Request to register a new model version has failed. - `READY`: Model version is ready for use. """ diff --git a/src/types/experiment.jl b/src/types/experiment.jl index b59dbd4..1529dbc 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -4,10 +4,9 @@ # Fields - `experiment_id::Integer`: Unique identifier for the experiment. - `name::String`: Human readable name that identifies the experiment. -- `artifact_location::String`: Location where artifacts for the experiment are -stored. -- `lifecycle_stage::String`: Current life cycle stage of the experiment: -“active” or “deleted”. Deleted experiments are not returned by APIs. +- `artifact_location::String`: Location where artifacts for the experiment are stored. +- `lifecycle_stage::String`: Current life cycle stage of the experiment: “active” or +“deleted”. Deleted experiments are not returned by APIs. - `last_update_time::Int64`: Last update time. - `creation_time::Int64`: Creation time. - `tags::Array{Tag}`: Additional metadata key-value pairs. @@ -21,8 +20,7 @@ struct Experiment creation_time::Int64 tags::Array{Tag} end -Experiment(data::Dict{String, Any}) = Experiment(data["experiment_id"], - data["name"], data["artifact_location"], data["lifecycle_stage"], - data["last_update_time"], data["creation_time"], - [Tag(tag) for tag in get(data, "tags", [])]) +Experiment(data::Dict{String, Any}) = Experiment(data["experiment_id"], data["name"], + data["artifact_location"], data["lifecycle_stage"], data["last_update_time"], + data["creation_time"], [Tag(tag) for tag in get(data, "tags", [])]) Base.show(io::IO, t::Experiment) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index 1f9a7fa..9ba4e6a 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -6,13 +6,14 @@ Base type which defines location and version for MLFlow API service. # Fields - `apiroot::String`: API root URL, e.g. `http://localhost:5000/api` - `apiversion::Union{Integer, AbstractFloat}`: used API version, e.g. `2.0` -- `headers::Dict`: HTTP headers to be provided with the REST API requests (useful for authetication tokens) -Default is `false`, using the REST API endpoint. +- `headers::Dict`: HTTP headers to be provided with the REST API requests (useful for +authetication tokens) Default is `false`, using the REST API endpoint. # Constructors - `MLFlow(apiroot; apiversion=2.0,headers=Dict())` -- `MLFlow()` - defaults to `MLFlow(ENV["MLFLOW_TRACKING_URI"])` or `MLFlow("http://localhost:5000/api")` +- `MLFlow()` - defaults to `MLFlow(ENV["MLFLOW_TRACKING_URI"])` or +`MLFlow("http://localhost:5000/api")` # Examples @@ -36,6 +37,7 @@ MLFlow(; apiroot="http://localhost:5000/api", apiversion=2.0, headers=Dict()) = MLFlow((haskey(ENV, "MLFLOW_TRACKING_URI") ? ENV["MLFLOW_TRACKING_URI"] : apiroot), apiversion, headers) -Base.show(io::IO, t::MLFlow) = show(io, ShowCase(t, [:apiroot,:apiversion], new_lines=true)) +Base.show(io::IO, t::MLFlow) = + show(io, ShowCase(t, [:apiroot,:apiversion], new_lines=true)) abstract type LoggingData end diff --git a/src/types/model_version.jl b/src/types/model_version.jl index fcbd146..3b07655 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -4,25 +4,22 @@ # Fields - `name::String`: Unique name of the model. - `version::String`: Model’s version number. -- `creation_timestamp::Int64`: Timestamp recorded when this model_version was -created. -- `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this -model_version was last updated. +- `creation_timestamp::Int64`: Timestamp recorded when this model_version was created. +- `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this model_version +was last updated. - `user_id::String`: User that created this model_version. - `current_stage::String`: Current stage for this model_version. - `description::String`: Description of this model_version. -- `source::String`: URI indicating the location of the source model artifacts, -used when creating model_version. -- `run_id::String`: MLflow run ID used when creating model_version, if source -was generated by an experiment run stored in MLflow tracking server. +- `source::String`: URI indicating the location of the source model artifacts, used when +creating model_version. +- `run_id::String`: MLflow run ID used when creating model_version, if source was generated +by an experiment run stored in MLflow tracking server. - `status::ModelVersionStatus`: Current status of model_version. -- `status_message::String`: Details on current status, if it is pending or -failed. +- `status_message::String`: Details on current status, if it is pending or failed. - `tags::Array{Tag}`: Additional metadata key-value pairs. -- `run_link::String`: Direct link to the run that generated this version. This -field is set at model version creation time only for model versions whose -source run is from a tracking server that is different from the registry -server. +- `run_link::String`: Direct link to the run that generated this version. This field is set +at model version creation time only for model versions whose source run is from a tracking +server that is different from the registry server. - `aliases::Array{String}`: Aliases pointing to this model_version. """ struct ModelVersion diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index 8420538..ebdd865 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -18,17 +18,16 @@ Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true # Fields - `name::String`: Unique name for the model. -- `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was -created. +- `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this RegisteredModel was last updated. - `user_id::String`: User that created this RegisteredModel. - `description::String`: Description of this RegisteredModel. -- `latest_versions::Array{ModelVersion}`: Collection of latest model versions -for each stage. Only contains models with current READY status. +- `latest_versions::Array{ModelVersion}`: Collection of latest model versions for each +stage. Only contains models with current READY status. - `tags::Array{Tag}`: Additional metadata key-value pairs. -- `aliases::Array{RegisteredModelAlias}`: Aliases pointing to model versions -associated with this RegisteredModel. +- `aliases::Array{RegisteredModelAlias}`: Aliases pointing to model versions associated +with this RegisteredModel. """ struct RegisteredModel name::String diff --git a/src/types/run.jl b/src/types/run.jl index e15a0eb..44d51b6 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -15,8 +15,8 @@ struct Metric <: LoggingData timestamp::Int64 step::Union{Int64, Nothing} end -Metric(data::Dict{String, Any}) = Metric(data["key"], data["value"], - data["timestamp"], data["step"]) +Metric(data::Dict{String, Any}) = Metric(data["key"], data["value"], data["timestamp"], + data["step"]) Base.show(io::IO, t::Metric) = show(io, ShowCase(t, new_lines=true)) """ @@ -47,12 +47,12 @@ Metadata of a single run. - `status::RunStatus`: Current status of the run. - `start_time::Int64`: Unix timestamp of when the run started in milliseconds. - `end_time::Int64`: Unix timestamp of when the run ended in milliseconds. -- `artifact_uri::String`: URI of the directory where artifacts should be -uploaded. This can be a local path (starting with “/”), or a distributed file -system (DFS) path, like s3://bucket/directory or dbfs:/my/directory. If not -set, the local ./mlruns directory is chosen. -- `lifecycle_stage::String`: Current life cycle stage of the experiment: -"active" or "deleted". +- `artifact_uri::String`: URI of the directory where artifacts should be uploaded. This can +be a local path (starting with “/”), or a distributed file system (DFS) path, +like s3://bucket/directory or dbfs:/my/directory. If not set, the local ./mlruns directory +is chosen. +- `lifecycle_stage::String`: Current life cycle stage of the experiment: "active" or +"deleted". """ struct RunInfo run_id::String @@ -66,8 +66,7 @@ struct RunInfo end RunInfo(data::Dict{String, Any}) = RunInfo(data["run_id"], data["run_name"], data["experiment_id"], RunStatus(data["status"]), data["start_time"], - get(data, "end_time", nothing), data["artifact_uri"], - data["lifecycle_stage"]) + get(data, "end_time", nothing), data["artifact_uri"], data["lifecycle_stage"]) Base.show(io::IO, t::RunInfo) = show(io, ShowCase(t, new_lines=true)) """ @@ -103,8 +102,7 @@ struct RunInputs dataset_inputs::Array{DatasetInput} end RunInputs(data::Dict{String, Any}) = RunInputs( - [DatasetInput(dataset_input) for dataset_input in - get(data, "dataset_inputs", [])]) + [DatasetInput(dataset_input) for dataset_input in get(data, "dataset_inputs", [])]) Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) """ @@ -117,6 +115,6 @@ struct Run data::RunData inputs::RunInputs end -Run(data::Dict{String, Any}) = Run(RunInfo(data["info"]), - RunData(data["data"]), RunInputs(data["inputs"])) +Run(data::Dict{String, Any}) = Run(RunInfo(data["info"]), RunData(data["data"]), + RunInputs(data["inputs"])) Base.show(io::IO, t::Run) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/utils.jl b/src/utils.jl index 6006f32..7a28a60 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,23 +1,15 @@ const NumberOrString = Union{Number, String} -const MLFlowUpsertData{T} = Union{ - Array{T}, - Array{<:Dict{String, <:Any}}, - Dict{String, <:NumberOrString}, - Array{<:Pair{String, <:NumberOrString}}, - Array{<:Tuple{String, <:NumberOrString}} -} +const MLFlowUpsertData{T} = Union{Array{T}, Array{<:Dict{String, <:Any}}, + Dict{String, <:NumberOrString}, Array{<:Pair{String, <:NumberOrString}}, + Array{<:Tuple{String, <:NumberOrString}}} -const MLFLOW_ERROR_CODES = (; - RESOURCE_ALREADY_EXISTS = "RESOURCE_ALREADY_EXISTS", - RESOURCE_DOES_NOT_EXIST = "RESOURCE_DOES_NOT_EXIST", -) - -function dict_to_T_array(::Type{T}, dict::Dict{String, <:NumberOrString}) where T<:LoggingData +function dict_to_T_array(::Type{T}, + dict::Dict{String, <:NumberOrString}) where T<:LoggingData entities = T[] for (key, value) in dict if T<:Metric - push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), nothing)) + push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), + nothing)) else push!(entities, T(key, value |> string)) end @@ -32,8 +24,8 @@ function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where T<:Log key = pair.first |> string if T<:Metric value = pair.second - push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), nothing)) + push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), + nothing)) else value = pair.second |> string push!(entities, T(key, value)) @@ -54,8 +46,8 @@ function tuplearray_to_T_array(::Type{T}, key = tuple |> first |> string if T<: Metric value = tuple |> last - push!(entities, Metric(key, Float64(value), - round(Int, now() |> datetime2unix), nothing)) + push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), + nothing)) else value = tuple |> last |> string push!(entities, T(key, value)) @@ -100,7 +92,6 @@ function parse(::Type{T}, entities::MLFlowUpsertData{T}) where T<:LoggingData return entities end -refresh(instance::MLFlow, experiment::Experiment)::Experiment = +refresh(instance::MLFlow, experiment::Experiment)::Experiment = getexperiment(instance, experiment.experiment_id) -refresh(instance::MLFlow, run::Run)::Run = - getrun(instance, run.info.run_id) +refresh(instance::MLFlow, run::Run)::Run = getrun(instance, run.info.run_id) diff --git a/test/services/misc.jl b/test/services/misc.jl index b3f3135..d53a34c 100644 --- a/test/services/misc.jl +++ b/test/services/misc.jl @@ -8,8 +8,7 @@ end @testset "default search" begin - metrics, next_page_token = getmetrichistory(mlf, run.info.run_id, - "missy") + metrics, next_page_token = getmetrichistory(mlf, run, "missy") @test length(metrics) == 20 @test next_page_token |> isnothing From 92e2bb00ea8a316c8962351082b8921e502397e3 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Sat, 2 Nov 2024 23:50:40 -0500 Subject: [PATCH 16/31] Implementing `listartifacts` --- src/MLFlowClient.jl | 3 + src/experiments.jl | 247 ----------------------------------- src/loggers.jl | 264 -------------------------------------- src/runs.jl | 190 --------------------------- src/services/artifact.jl | 31 +++++ test/runtests.jl | 5 +- test/services/artifact.jl | 25 ++++ 7 files changed, 62 insertions(+), 703 deletions(-) delete mode 100644 src/experiments.jl delete mode 100644 src/loggers.jl delete mode 100644 src/runs.jl create mode 100644 src/services/artifact.jl create mode 100644 test/services/artifact.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 67c6894..5d750ed 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -50,6 +50,9 @@ export getrun, createrun, deleterun, setruntag, restorerun, searchruns, deleteru include("services/loggers.jl") export logbatch, loginputs, logmetric, logparam +include("services/artifact.jl") +export listartifacts + include("services/misc.jl") export getmetrichistory diff --git a/src/experiments.jl b/src/experiments.jl deleted file mode 100644 index 7b34efd..0000000 --- a/src/experiments.jl +++ /dev/null @@ -1,247 +0,0 @@ -""" - createexperiment(mlf::MLFlow; name=missing, artifact_location=missing, tags=missing) - -Creates an MLFlow experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `name`: experiment name. If not specified, MLFlow sets it. -- `artifact_location`: directory where artifacts of this experiment will be stored. If not specified, MLFlow uses its default configuration. -- `tags`: a Vector of Dictionaries which tag the experiment. - - example tags: [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] - -# Returns -An object of type [`MLFlowExperiment`](@ref). - -""" -function createexperiment(mlf::MLFlow; name=missing, artifact_location=missing, tags=missing) - endpoint = "experiments/create" - - if ismissing(name) - name = string(UUIDs.uuid4()) - end - - try - result = mlfpost(mlf, endpoint; name=name, artifact_location=artifact_location, tags=tags) - experiment_id = parse(Int, result["experiment_id"]) - return getexperiment(mlf, experiment_id) - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 400 - error_code = JSON.parse(String(e.response.body))["error_code"] - if error_code == MLFLOW_ERROR_CODES.RESOURCE_ALREADY_EXISTS - error("Experiment with name \"$name\" already exists") - end - end - throw(e) - end -end - -""" - getexperiment(mlf::MLFlow, experiment_id::Integer) - -Retrieves an MLFlow experiment by id. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_id`: Experiment identifier. - -# Returns -An instance of type [`MLFlowExperiment`](@ref) - -""" -function getexperiment(mlf::MLFlow, experiment_id::Integer) - try - endpoint = "experiments/get" - arguments = (:experiment_id => experiment_id,) - result = mlfget(mlf, endpoint; arguments...)["experiment"] - return MLFlowExperiment(result) - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - return missing - end - throw(e) - end -end -""" - getexperiment(mlf::MLFlow, experiment_name::String) - -Retrieves an MLFlow experiment by name. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_name`: Experiment name. - -# Returns -An instance of type [`MLFlowExperiment`](@ref) - -""" -function getexperiment(mlf::MLFlow, experiment_name::String) - try - endpoint = "experiments/get-by-name" - arguments = (:experiment_name => experiment_name,) - result = mlfget(mlf, endpoint; arguments...)["experiment"] - return MLFlowExperiment(result) - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - return missing - end - throw(e) - end -end - -""" - getorcreateexperiment(mlf::MLFlow, experiment_name::String; artifact_location=missing, tags=missing) - -Gets an experiment if one alrady exists, or creates a new one. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_name`: Experiment name. -- `artifact_location`: directory where artifacts of this experiment will be stored. If not specified, MLFlow uses its default configuration. -- `tags`: a Vector of Dictionaries which tag the experiment. - - example tags: [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] - -# Returns -An instance of type [`MLFlowExperiment`](@ref) - -""" -function getorcreateexperiment(mlf::MLFlow, experiment_name::String; artifact_location=missing, tags=missing) - experiment = getexperiment(mlf, experiment_name) - - if ismissing(experiment) - return createexperiment(mlf, name=experiment_name, artifact_location=artifact_location, tags=tags) - end - return experiment -end - -""" - deleteexperiment(mlf::MLFlow, experiment_id::Integer) - -Deletes an MLFlow experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_id`: experiment identifier. - -# Returns - -`true` if successful. Otherwise, raises exception. -""" -function deleteexperiment(mlf::MLFlow, experiment_id::Integer) - endpoint = "experiments/delete" - try - mlfpost(mlf, endpoint; experiment_id=experiment_id) - return true - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - # experiment already deleted - return true - end - throw(e) - end -end - -""" - deleteexperiment(mlf::MLFlow, experiment::MLFlowExperiment) - -Deletes an MLFlow experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment`: an object of type [`MLFlowExperiment`](@ref) - -Dispatches to `deleteexperiment(mlf::MLFlow, experiment_id::Integer)`. - -""" -deleteexperiment(mlf::MLFlow, experiment::MLFlowExperiment) = - deleteexperiment(mlf, experiment.experiment_id) - -""" - restoreexperiment(mlf::MLFlow, experiment_id::Integer) - -Restores a deleted MLFlow experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_id`: experiment identifier. - -# Returns - -`true` if successful. Otherwise, raises exception. -""" -function restoreexperiment(mlf::MLFlow, experiment_id::Integer) - endpoint = "experiments/restore" - try - mlfpost(mlf, endpoint; experiment_id=experiment_id) - return true - catch e - if isa(e, HTTP.ExceptionRequest.StatusError) && e.status == 404 - error_code = JSON.parse(String(e.response.body))["error_code"] - if error_code == MLFLOW_ERROR_CODES.RESOURCE_DOES_NOT_EXIST - error("Experiment with id \"$experiment_id\" does not exist") - end - end - throw(e) - end -end - -restoreexperiment(mlf::MLFlow, experiment::MLFlowExperiment) = - restoreexperiment(mlf, experiment.experiment_id) - -""" - searchexperiments(mlf::MLFlow) - -Searches for experiments in an MLFlow instance. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. - -# Keywords -- `filter::String`: filter as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments) -- `filter_attributes::AbstractDict{K,V}`: if provided, `filter` is automatically generated based on `filter_attributes` using [`generatefilterfromattributes`](@ref). One can only provide either `filter` or `filter_attributes`, but not both. -- `run_view_type::String`: one of `ACTIVE_ONLY`, `DELETED_ONLY`, or `ALL`. -- `max_results::Integer`: 50,000 by default. -- `order_by::String`: as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments) -- `page_token::String`: paging functionality, handled automatically. Not meant to be passed by the user. - -# Returns -- vector of [`MLFlowExperiment`](@ref) experiments that were found in the MLFlow instance - -""" -function searchexperiments(mlf::MLFlow; - filter::String="", - filter_attributes::AbstractDict{K,V}=Dict{}(), - run_view_type::String="ACTIVE_ONLY", - max_results::Int64=50000, - order_by::AbstractVector{<:String}=["attribute.last_update_time"], - page_token::String="" -) where {K,V} - endpoint = "experiments/search" - run_view_type ∈ ["ACTIVE_ONLY", "DELETED_ONLY", "ALL"] || error("Unsupported run_view_type = $run_view_type") - - if length(filter_attributes) > 0 && length(filter) > 0 - error("Cannot specify both filter and filter_attributes") - end - - if length(filter_attributes) > 0 - filter = generatefilterfromattributes(filter_attributes) - end - - kwargs = (; filter, run_view_type, max_results, order_by) - if !isempty(page_token) - kwargs = (; kwargs..., page_token=page_token) - end - - result = mlfpost(mlf, endpoint; kwargs...) - haskey(result, "experiments") || return MLFlowExperiment[] - - experiments = map(x -> MLFlowExperiment(x), result["experiments"]) - - if haskey(result, "next_page_token") && !isempty(result["next_page_token"]) - kwargs = (; filter, run_view_type, max_results, order_by, page_token=result["next_page_token"]) - next_experiments = searchexperiments(mlf; kwargs...) - return vcat(experiments, next_experiments) - end - - experiments -end diff --git a/src/loggers.jl b/src/loggers.jl deleted file mode 100644 index b8f0639..0000000 --- a/src/loggers.jl +++ /dev/null @@ -1,264 +0,0 @@ -""" - settag(mlf::MLFlow, run, key, value) - settag(mlf::MLFlow, run, kv) - -Associates a tag (a key and a value) to the particular run. - -Refer to [the official MLflow REST API -docs](https://mlflow.org/docs/latest/rest-api.html#set-tag) for restrictions on -`key` and `value`. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. -- `key`: tag key (name). Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. -- `value`: parameter value. Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. - -One could also specify `kv::Dict` instead of separate `key` and `value` arguments. -""" -function settag(mlf::MLFlow, run_id::String, key, value) - endpoint ="runs/set-tag" - mlfpost(mlf, endpoint; run_id=run_id, key=string(key), value=string(value)) -end -settag(mlf::MLFlow, run_info::MLFlowRunInfo, key, value) = - settag(mlf, run_info.run_id, key, value) -settag(mlf::MLFlow, run::MLFlowRun, key, value) = - settag(mlf, run.info, key, value) -function settag(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, kv) - for (k, v) in kv - logparam(mlf, run, k, v) - end -end - - -""" - logparam(mlf::MLFlow, run, key, value) - logparam(mlf::MLFlow, run, kv) - -Associates a key/value pair of parameters to the particular run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. -- `key`: parameter key (name). Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. -- `value`: parameter value. Automatically converted to string before sending to MLFlow because this is the only type that MLFlow supports. - -One could also specify `kv::Dict` instead of separate `key` and `value` arguments. -""" -function logparam(mlf::MLFlow, run_id::String, key, value) - endpoint = "runs/log-parameter" - mlfpost(mlf, endpoint; run_id=run_id, key=string(key), value=string(value)) -end -logparam(mlf::MLFlow, run_info::MLFlowRunInfo, key, value) = - logparam(mlf, run_info.run_id, key, value) -logparam(mlf::MLFlow, run::MLFlowRun, key, value) = - logparam(mlf, run.info, key, value) -function logparam(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, kv) - for (k, v) in kv - logparam(mlf, run, k, v) - end -end - - -""" - logmetric(mlf::MLFlow, run, key, value::T; timestamp, step) where T<:Real - logmetric(mlf::MLFlow, run, key, values::AbstractArray{T}; timestamp, step) where T<:Real - -Logs a metric value (or values) against a particular run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String` -- `key`: metric name. -- `value`: metric value, must be numeric. - -# Keywords -- `timestamp`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time. -- `step`: step at which the metric value has been taken. -""" -function logmetric(mlf::MLFlow, run_id::String, key, value::T; timestamp=missing, step=missing) where {T<:Real} - endpoint = "runs/log-metric" - if ismissing(timestamp) - timestamp = Int(trunc(datetime2unix(now(UTC)) * 1000)) - end - mlfpost(mlf, endpoint; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) -end -logmetric(mlf::MLFlow, run_info::MLFlowRunInfo, key, value::T; timestamp=missing, step=missing) where {T<:Real} = - logmetric(mlf::MLFlow, run_info.run_id, key, value; timestamp=timestamp, step=step) -logmetric(mlf::MLFlow, run::MLFlowRun, key, value::T; timestamp=missing, step=missing) where {T<:Real} = - logmetric(mlf, run.info, key, value; timestamp=timestamp, step=step) - -function logmetric(mlf::MLFlow, run::Union{String,MLFlowRun,MLFlowRunInfo}, key, values::AbstractArray{T}; timestamp=missing, step=missing) where {T<:Real} - for v in values - logmetric(mlf, run, key, v; timestamp=timestamp, step=step) - end -end - - -""" - logartifact(mlf::MLFlow, run, basefilename, data) - -Stores an artifact (file) in the run's artifact location. - -!!! note - Assumes that artifact_uri is mapped to a local directory. - At the moment, this only works if both MLFlow and the client are running on the same host or they map a directory that leads to the same location over NFS, for example. - -# Arguments -- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. -- `basefilename`: name of the file to be written. -- `data`: artifact content, an object that can be written directly to a file handle. - -# Throws -- an `ErrorException` if an exception occurs during writing artifact. - -# Returns -path of the artifact that was created. -""" -function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) - mlflowrun = getrun(mlf, run_id) - artifact_uri = mlflowrun.info.artifact_uri - mkpath(artifact_uri) - filepath = joinpath(artifact_uri, basefilename) - try - f = open(filepath, "w") - write(f, data) - close(f) - catch e - error("Unable to create artifact $(filepath): $e") - end - filepath -end -logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) = - logartifact(mlf, run.info, basefilename, data) -logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, basefilename::AbstractString, data) = - logartifact(mlf, run_info.run_id, basefilename, data) - -""" - logartifact(mlf::MLFlow, run, filepath) - -Stores an artifact (file) in the run's artifact location. -The name of the artifact is calculated using `basename(filepath)`. - -Dispatches on `logartifact(mlf::MLFlow, run, basefilename, data)` where `data` is the contents of `filepath`. - -# Throws -- an `ErrorException` if `filepath` does not exist. -- an exception if such occurs while trying to read the contents of `filepath`. - -""" -function logartifact(mlf::MLFlow, run_id::AbstractString, filepath::Union{AbstractPath,AbstractString}) - isfile(filepath) || error("File $filepath does not exist.") - try - f = open(filepath, "r") - data = read(f) - close(f) - return logartifact(mlf, run_id, basename(filepath), data) - catch e - throw(e) - finally - if @isdefined f - close(f) - end - end -end -logartifact(mlf::MLFlow, run::MLFlowRun, filepath::Union{AbstractPath,AbstractString}) = - logartifact(mlf, run.info, filepath) -logartifact(mlf::MLFlow, run_info::MLFlowRunInfo, filepath::Union{AbstractPath,AbstractString}) = - logartifact(mlf, run_info.run_id, filepath) - -""" - listartifacts(mlf::MLFlow, run) - -Lists the artifacts associated with an experiment run. -According to [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#list-artifacts), this API endpoint should return paged results, similar to [`searchruns`](@ref). -However, after some experimentation, this doesn't seem to be the case. Therefore, the paging functionality is not implemented here. - -# Arguments -- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. - -# Keywords -- `path::String`: path of a directory within the artifact location. If set, returns the contents of the directory. By default, this is the root directory of the artifacts. -- `maxdepth::Int64`: depth of listing. Default is 1. This will only return the files/directories in the current `path`. To return all artifacts files and directories, use `maxdepth=-1`. - -# Returns -A vector of `Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}`. -""" -function listartifacts(mlf::MLFlow, run_id::String; path::String="", maxdepth::Int64=1) - endpoint = "artifacts/list" - kwargs = ( - run_id=run_id, - ) - kwargs = (; kwargs..., path=path) - httpresult = mlfget(mlf, endpoint; kwargs...) - "files" ∈ keys(httpresult) || return Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}() - "root_uri" ∈ keys(httpresult) || error("Malformed response from MLFlow REST API.") - root_uri = httpresult["root_uri"] - result = Vector{Union{MLFlowArtifactFileInfo,MLFlowArtifactDirInfo}}() - maxdepth == 0 && return result - - for resultentry ∈ httpresult["files"] - if resultentry["is_dir"] == false - filepath = joinpath(root_uri, resultentry["path"]) - file_size = resultentry["file_size"] - if typeof(file_size) <: Int - filesize = file_size - else - filesize = parse(Int, file_size) - end - push!(result, MLFlowArtifactFileInfo(filepath, filesize)) - elseif resultentry["is_dir"] == true - dirpath = joinpath(root_uri, resultentry["path"]) - push!(result, MLFlowArtifactDirInfo(dirpath)) - if maxdepth != 0 - nextdepthresult = listartifacts(mlf, run_id, path=resultentry["path"], maxdepth=maxdepth - 1) - result = vcat(result, nextdepthresult) - end - else - isdirval = resultentry["is_dir"] - @warn "Malformed response from MLFlow REST API is_dir=$isdirval - skipping" - continue - end - end - result -end -listartifacts(mlf::MLFlow, run::MLFlowRun; kwargs...) = - listartifacts(mlf, run.info.run_id; kwargs...) -listartifacts(mlf::MLFlow, run_info::MLFlowRunInfo; kwargs...) = - listartifacts(mlf, run_info.run_id; kwargs...) - -""" - logbatch(mlf::MLFlow, run_id::String, metrics, params, tags) - -Logs a batch of metrics, parameters and tags to an experiment run. - -# Arguments -- `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. -- `run_id::String`: ID of the run to log to. -- `metrics`: a vector of [`MLFlowRunDataMetric`](@ref) or a vector of -NamedTuples of `(name, value, timestamp)`. -- `params`: a vector of [`MLFlowRunDataParam`](@ref) or a vector of NamedTuples -of `(name, value)`. -- `tags`: a vector of strings. -""" -logbatch(mlf::MLFlow, run_id::String; tags=String[], metrics=Any[], - params=Any[]) = logbatch(mlf, run_id, tags, metrics, params) -function logbatch(mlf::MLFlow, run_id::String, - tags::Union{AbstractVector{<:String}, AbstractVector{Any}}, - metrics::Union{AbstractVector{<:MLFlowRunDataMetric}, AbstractVector{Any}}, - params::Union{AbstractVector{<:MLFlowRunDataParam}, AbstractVector{Any}}) - endpoint = "runs/log-batch" - mlfpost(mlf, endpoint; - run_id=run_id, metrics=metrics, params=params, tags=tags) -end -function logbatch(mlf::MLFlow, run_id::String, - tags::Union{AbstractVector{<:String}, AbstractVector{Any}}, - metrics::Union{AbstractVector{<:AbstractDict}, AbstractVector{Any}}, - params::Union{AbstractVector{<:AbstractDict}, AbstractVector{Any}}) - endpoint = "runs/log-batch" - mlfpost(mlf, endpoint; run_id=run_id, - metrics=MLFlowRunDataMetric.(metrics), - params=MLFlowRunDataParam.(params), tags=tags) -end diff --git a/src/runs.jl b/src/runs.jl deleted file mode 100644 index 1caa140..0000000 --- a/src/runs.jl +++ /dev/null @@ -1,190 +0,0 @@ -""" - createrun(mlf::MLFlow, experiment_id; run_name=missing, start_time=missing, tags=missing) - -Creates a run associated to an experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_id`: experiment identifier. - -# Keywords -- `run_name`: run name. If not specified, MLFlow sets it. -- `start_time`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time. -- `tags`: if provided, must be a key-value structure such as for example: - - [Dict("key" => "foo", "value" => "bar"), Dict("key" => "missy", "value" => "gala")] - -# Returns -- an instance of type [`MLFlowRun`](@ref) -""" -function createrun(mlf::MLFlow, experiment_id; run_name=missing, start_time=missing, tags=missing) - endpoint = "runs/create" - if ismissing(start_time) - start_time = Int(trunc(datetime2unix(now(UTC)) * 1000)) - end - result = mlfpost(mlf, endpoint; experiment_id=experiment_id, run_name=run_name, start_time=start_time, tags=tags) - MLFlowRun(result["run"]["info"], result["run"]["data"]) -end -""" - createrun(mlf::MLFlow, experiment::MLFlowExperiment; run_name=missing, start_time=missing, tags=missing) - -Dispatches to `createrun(mlf::MLFlow, experiment_id; run_name=run_name, start_time=start_time, tags=tags)` -""" -createrun(mlf::MLFlow, experiment::MLFlowExperiment; run_name=missing, start_time=missing, tags=missing) = - createrun(mlf, experiment.experiment_id; run_name=run_name, start_time=start_time, tags=tags) - -""" - getrun(mlf::MLFlow, run_id) - -Retrieves information about an MLFlow run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run_id::String`: run identifier. - -# Returns -- an instance of type [`MLFlowRun`](@ref) -""" -function getrun(mlf::MLFlow, run_id) - endpoint = "runs/get" - result = mlfget(mlf, endpoint; run_id=run_id) - MLFlowRun(result["run"]["info"], result["run"]["data"]) -end - -""" - updaterun(mlf::MLFlow, run, status; end_time=missing) - -Updates the status of an experiment's run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. -- `status`: either `String` and one of ["RUNNING", "SCHEDULED", "FINISHED", "FAILED", "KILLED"], or an instance of `MLFlowRunStatus` - -# Keywords -- `run_name`: if provided, must be a String. By default, not set. -- `end_time`: if provided, must be a UNIX timestamp in milliseconds. By default, set to current time. -""" -function updaterun(mlf::MLFlow, run_id::String, status::MLFlowRunStatus; run_name=missing, end_time=missing) - endpoint = "runs/update" - kwargs = Dict( - :run_id => run_id, - :status => status.status, - :run_name => run_name, - :end_time => end_time - ) - if ismissing(end_time) && status.status == "FINISHED" - end_time = Int(trunc(datetime2unix(now(UTC)) * 1000)) - kwargs[:end_time] = string(end_time) - end - result = mlfpost(mlf, endpoint; kwargs...) - MLFlowRun(result["run_info"]) -end -updaterun(mlf::MLFlow, run_id::String, status::String; run_name=missing, end_time=missing) = - updaterun(mlf, run_id, MLFlowRunStatus(status); run_name=run_name, end_time=end_time) -updaterun(mlf::MLFlow, run_info::MLFlowRunInfo, status::String; run_name=missing, end_time=missing) = - updaterun(mlf, run_info.run_id, MLFlowRunStatus(status); run_name=run_name, end_time=end_time) -updaterun(mlf::MLFlow, run::MLFlowRun, status::String; run_name=missing, end_time=missing) = - updaterun(mlf, run.info, MLFlowRunStatus(status); run_name=run_name, end_time=end_time) -updaterun(mlf::MLFlow, run_info::MLFlowRunInfo, status::MLFlowRunStatus; run_name=missing, end_time=missing) = - updaterun(mlf, run_info.run_id, status; run_name=run_name, end_time=end_time) -updaterun(mlf::MLFlow, run::MLFlowRun, status::MLFlowRunStatus; run_name=missing, end_time=missing) = - updaterun(mlf, run.info, status; run_name=run_name, end_time=end_time) - -""" - deleterun(mlf::MLFlow, run) - -Deletes an experiment's run. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref), or `String`. - -# Returns -`true` if successful. - -""" -function deleterun(mlf::MLFlow, run_id::String) - endpoint = "runs/delete" - mlfpost(mlf, endpoint; run_id=run_id) - true -end -deleterun(mlf::MLFlow, run_info::MLFlowRunInfo) = deleterun(mlf, run_info.run_id) -deleterun(mlf::MLFlow, run::MLFlowRun) = deleterun(mlf, run.info) - -""" - searchruns(mlf::MLFlow, experiment_ids) - -Searches for runs in an experiment. - -# Arguments -- `mlf`: [`MLFlow`](@ref) configuration. -- `experiment_ids::AbstractVector{Integer}`: `experiment_id`s in which to search for runs. Can also be a single `Integer`. - -# Keywords -- `filter::String`: filter as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs) -- `filter_params::AbstractDict{K,V}`: if provided, `filter` is automatically generated based on `filter_params` using [`generatefilterfromparams`](@ref). One can only provide either `filter` or `filter_params`, but not both. -- `run_view_type::String`: one of `ACTIVE_ONLY`, `DELETED_ONLY`, or `ALL`. -- `max_results::Integer`: 50,000 by default. -- `order_by::String`: as defined in [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs) -- `page_token::String`: paging functionality, handled automatically. Not meant to be passed by the user. - -# Returns -- vector of [`MLFlowRun`](@ref) runs that were found in the list of experiments. - -""" -function searchruns(mlf::MLFlow, experiment_ids::AbstractVector{<:Integer}; - filter::String="", - filter_params::AbstractDict{K,V}=Dict{}(), - run_view_type::String="ACTIVE_ONLY", - max_results::Int64=50000, - order_by::AbstractVector{<:String}=["attribute.end_time"], - page_token::String="" -) where {K,V} - endpoint = "runs/search" - run_view_type ∈ ["ACTIVE_ONLY", "DELETED_ONLY", "ALL"] || error("Unsupported run_view_type = $run_view_type") - - if length(filter_params) > 0 && length(filter) > 0 - error("Can only use either filter or filter_params, but not both at the same time.") - end - - if length(filter_params) > 0 - filter = generatefilterfromparams(filter_params) - end - - kwargs = ( - experiment_ids=experiment_ids, - filter=filter, - run_view_type=run_view_type, - max_results=max_results, - order_by=order_by - ) - if !isempty(page_token) - kwargs = (; kwargs..., page_token=page_token) - end - - result = mlfpost(mlf, endpoint; kwargs...) - haskey(result, "runs") || return MLFlowRun[] - - runs = map(x -> MLFlowRun(x["info"], x["data"]), result["runs"]) - - # paging functionality using recursion - if haskey(result, "next_page_token") && !isempty(result["next_page_token"]) - kwargs = ( - filter=filter, - run_view_type=run_view_type, - max_results=max_results, - order_by=order_by, - page_token=result["next_page_token"] - ) - next_runs = searchruns(mlf, experiment_ids; kwargs...) - return vcat(runs, next_runs) - end - - runs -end -searchruns(mlf::MLFlow, experiment_id::Integer; kwargs...) = - searchruns(mlf, [experiment_id]; kwargs...) -searchruns(mlf::MLFlow, exp::MLFlowExperiment; kwargs...) = - searchruns(mlf, exp.experiment_id; kwargs...) -searchruns(mlf::MLFlow, exps::AbstractVector{MLFlowExperiment}; kwargs...) = - searchruns(mlf, getfield.(exps, :experiment_id); kwargs...) diff --git a/src/services/artifact.jl b/src/services/artifact.jl new file mode 100644 index 0000000..a56908b --- /dev/null +++ b/src/services/artifact.jl @@ -0,0 +1,31 @@ +""" + listartifacts(instance::MLFlow, run_id::String; path::String="", page_token::String="") + listartifacts(instance::MLFlow, run::Run; path::String="", page_token::String="") + +List artifacts for a run. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run whose artifacts to list. +- `path`: Filter artifacts matching this path (a relative path from the root artifact +directory). +- `page_token`: Token indicating the page of artifact results to fetch + +# Returns +- Root artifact directory for the run. +- List of file location and metadata for artifacts. +- Token that can be used to retrieve the next page of artifact results. +""" +function listartifacts(instance::MLFlow, run_id::String; path::String="", + page_token::String="")::Tuple{String, Array{FileInfo}, Union{String, Nothing}} + result = mlfget(instance, "artifacts/list"; run_id=run_id, path=path, + page_token=page_token) + + root_uri = get(result, "root_uri", "") + files = get(result, "files", []) |> (x -> [FileInfo(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return root_uri, files, next_page_token +end +listartifacts(instance::MLFlow, run::Run; path::String="", page_token::String="") = + listartifacts(instance, run.info.run_id; path=path, page_token=page_token) diff --git a/test/runtests.jl b/test/runtests.jl index 146e94b..66a6a3c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,8 @@ end include("base.jl") -include("services/experiment.jl") include("services/run.jl") -include("services/loggers.jl") include("services/misc.jl") +include("services/loggers.jl") +include("services/artifact.jl") +include("services/experiment.jl") diff --git a/test/services/artifact.jl b/test/services/artifact.jl new file mode 100644 index 0000000..5024da8 --- /dev/null +++ b/test/services/artifact.jl @@ -0,0 +1,25 @@ +@testset verbose = true "list artifacts" begin + # TODO: Add more specific tests after implementing the complete artifact service + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "using run id" begin + root_uri, files, next_page_token = listartifacts(mlf, run.info.run_id) + + @test run.info.artifact_uri == root_uri + @test isempty(files) + @test isnothing(next_page_token) + end + + @testset "using run" begin + root_uri, files, next_page_token = listartifacts(mlf, run) + + @test run.info.artifact_uri == root_uri + @test isempty(files) + @test isnothing(next_page_token) + end + + deleteexperiment(mlf, experiment_id) +end From 5e782c085ca9f67d062187e2a5506275f69f8a8a Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 3 Nov 2024 00:20:17 -0500 Subject: [PATCH 17/31] Implementing `updaterun` --- src/MLFlowClient.jl | 3 +- src/services/run.jl | 30 ++++++ test/services/run.jl | 32 +++++++ test/test_loggers.jl | 224 ------------------------------------------- 4 files changed, 64 insertions(+), 225 deletions(-) delete mode 100644 test/test_loggers.jl diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 5d750ed..88e60a7 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -45,7 +45,8 @@ export getexperiment, createexperiment, deleteexperiment, setexperimenttag, updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname include("services/run.jl") -export getrun, createrun, deleterun, setruntag, restorerun, searchruns, deleteruntag +export getrun, createrun, deleterun, setruntag, updaterun, restorerun, searchruns, + deleteruntag include("services/loggers.jl") export logbatch, loginputs, logmetric, logparam diff --git a/src/services/run.jl b/src/services/run.jl index 2842b18..ef27c3a 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -184,3 +184,33 @@ function searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], return runs, next_page_token end + +""" + updaterun(instance::MLFlow, run_id::String; status::Union{RunStatus, Missing}=missing, + end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing}=missing) + updaterun(instance::MLFlow, run::Run; status::Union{RunStatus, Missing}=missing, + end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing}=missing) + +Update run metadata. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run_id`: ID of the run to update. +- `status`: Updated status of the run. +- `end_time`: Unix timestamp in milliseconds of when the run ended. +- `run_name`: Updated name of the run. + +# Returns +- An instance of type [`RunInfo`](@ref) with the updated metadata. +""" +function updaterun(instance::MLFlow, run_id::String; + status::Union{RunStatus, Missing}=missing, end_time::Union{Int64, Missing}=missing, + run_name::Union{String, Missing})::RunInfo + result = mlfpost(instance, "runs/update"; run_id=run_id, status=(status |> Integer), + end_time=end_time, run_name=run_name) + return result["run_info"] |> RunInfo +end +updaterun(instance::MLFlow, run::Run; status::Union{RunStatus, Missing}=missing, + end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing})::RunInfo = + updaterun(instance, run.info.run_id; status=status, end_time=end_time, + run_name=run_name) diff --git a/test/services/run.jl b/test/services/run.jl index 19b2a03..fb7af1b 100644 --- a/test/services/run.jl +++ b/test/services/run.jl @@ -185,3 +185,35 @@ end experiment_ids .|> (id -> deleteexperiment(mlf, id)) end + +@testset verbose = true "update run" begin + @ensuremlf + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment_id) + + @testset "update with string id" begin + status = MLFlowClient.FINISHED + end_time = 123 + run_name = "missy" + + run_info = updaterun(mlf, run.info.run_id; status=status, end_time=end_time, run_name=run_name) + + @test run_info.status == status + @test run_info.end_time == end_time + @test run_info.run_name == run_name + end + + @testset "update with Run" begin + status = MLFlowClient.FAILED + end_time = 456 + run_name = "gala" + + run_info = updaterun(mlf, run.info.run_id; status=status, end_time=end_time, run_name=run_name) + + @test run_info.status == status + @test run_info.end_time == end_time + @test run_info.run_name == run_name + end + + deleteexperiment(mlf, experiment_id) +end diff --git a/test/test_loggers.jl b/test/test_loggers.jl deleted file mode 100644 index bf3ea00..0000000 --- a/test/test_loggers.jl +++ /dev/null @@ -1,224 +0,0 @@ -@testset verbose = true "settag" begin - @ensuremlf - expname = "settag-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - - @testset "settag_by_run_id_and_key_value" begin - settag(mlf, r.info.run_id, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter="tags.run_id_key_value = 'test'") - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "settag_by_run_info_and_key_value" begin - settag(mlf, r.info, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter="tags.run_id_key_value = 'test'") - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "settag_by_run_and_key_value" begin - settag(mlf, r, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter="tags.run_id_key_value = 'test'") - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "settag_by_union_and_dict_key_value" begin - settag(mlf, r, Dict("run_id_key_value" => "test")) - retrieved_run = searchruns(mlf, e; filter="tags.run_id_key_value = 'test'") - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - deleteexperiment(mlf, e) -end - -@testset verbose = true "logparam" begin - @ensuremlf - expname = "logparam-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - - @testset "logparam_by_run_id_and_key_value" begin - logparam(mlf, r.info.run_id, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter_params=Dict("run_id_key_value" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "logparam_by_run_info_and_key_value" begin - logparam(mlf, r.info, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter_params=Dict("run_id_key_value" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "logparam_by_run_and_key_value" begin - logparam(mlf, r, "run_id_key_value", "test") - retrieved_run = searchruns(mlf, e; filter_params=Dict("run_id_key_value" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "logparam_by_union_and_dict_key_value" begin - logparam(mlf, r, Dict("run_id_key_value" => "test")) - retrieved_run = searchruns(mlf, e; filter_params=Dict("run_id_key_value" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - deleteexperiment(mlf, e) -end - -@testset verbose = true "logmetric" begin - @ensuremlf - expname = "logmetric-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - - @testset "logmetric_by_run_id_and_key_value" begin - logmetric(mlf, r.info.run_id, "run_id_key_value", 1) - retrieved_run = searchruns(mlf, e) - @test length(retrieved_run) == 1 - @test isa(retrieved_run[1].data.metrics["run_id_key_value"], MLFlowRunDataMetric) - @test retrieved_run[1].data.metrics["run_id_key_value"].value == 1 - end - - @testset "logmetric_by_run_info_and_key_value" begin - logmetric(mlf, r.info, "run_id_key_value", 1) - retrieved_run = searchruns(mlf, e) - @test length(retrieved_run) == 1 - @test isa(retrieved_run[1].data.metrics["run_id_key_value"], MLFlowRunDataMetric) - @test retrieved_run[1].data.metrics["run_id_key_value"].value == 1 - end - - @testset "logmetric_by_run_and_key_value" begin - logmetric(mlf, r, "run_id_key_value", 1) - retrieved_run = searchruns(mlf, e) - @test length(retrieved_run) == 1 - @test isa(retrieved_run[1].data.metrics["run_id_key_value"], MLFlowRunDataMetric) - @test retrieved_run[1].data.metrics["run_id_key_value"].value == 1 - end - - @testset "logmetric_by_union_and_key_arrayvalue" begin - logmetric(mlf, r, "run_id_key_value", [1, 2, 3]) - retrieved_run = searchruns(mlf, e) - @test length(retrieved_run) == 1 - @test isa(retrieved_run[1].data.metrics["run_id_key_value"], MLFlowRunDataMetric) - @test retrieved_run[1].data.metrics["run_id_key_value"].value == 3 - end - - deleteexperiment(mlf, e) -end - -@testset verbose = true "logartifact" begin - @ensuremlf - expname = "logartifact-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname; artifact_location="/tmp/mlflow") - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - artifact_uri = r.info.artifact_uri - - tmpfile = "/tmp/mlflowclient-tempfile.txt" - open(tmpfile, "w") do f - write(f, "test") - end - - @testset "logartifact_by_run_and_filenameanddata" begin - artifact = logartifact(mlf, r, tmpfile, "testing") - @test isfile(artifact) - end - - @testset "logartifact_by_run_id_and_file" begin - artifact = logartifact(mlf, r.info.run_id, tmpfile) - @test isfile(artifact) - end - - @testset "logartifact_by_run_and_file" begin - artifact = logartifact(mlf, r, tmpfile) - @test isfile(artifact) - end - - @testset "logartifact_by_run_info_and_file" begin - artifact = logartifact(mlf, r.info, tmpfile) - @test isfile(artifact) - end - - @testset "logartifact_using_IOBuffer" begin - io = IOBuffer() - write(io, "testing IOBuffer") - seekstart(io) - artifact = logartifact(mlf, r, tmpfile, io) - @test isfile(artifact) - end - - @testset "logartifact_error" begin - @test_broken logartifact(mlf, r, "/etc/shadow") - end - - deleteexperiment(mlf, e) -end - -@testset verbose=true "logbatch" begin - @ensuremlf - expname = "logbatch-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - - @testset "logbatch_by_types" begin - param_array = [MLFlowRunDataParam("test_param_type", "test")] - metric_array = [MLFlowRunDataMetric("test_metric", 5, 3, 1)] - logbatch(mlf, r.info.run_id; params=param_array, metrics=metric_array) - - retrieved_run = searchruns(mlf, e; - filter_params=Dict("test_param_type" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - @testset "logbatch_by_dicts" begin - param_dict_array = [Dict("key"=>"test_param_dict", "value"=>"test")] - metric_dict_array = [ - Dict("key"=>"test_metric", "value"=>5, "step"=>3, "timestamp"=>1)] - logbatch(mlf, r.info.run_id; - params=param_dict_array, metrics=metric_dict_array) - - retrieved_run = searchruns(mlf, e; - filter_params=Dict("test_param_dict" => "test")) - @test length(retrieved_run) == 1 - @test retrieved_run[1].info.run_id == r.info.run_id - end - - deleteexperiment(mlf, e) -end - -@testset verbose=true "listartifacts" begin - @ensuremlf - expname = "listartifacts-$(UUIDs.uuid4())" - e = getorcreateexperiment(mlf, expname) - runname = "run-$(UUIDs.uuid4())" - r = createrun(mlf, e.experiment_id) - - @testset "listartifacts_by_run_id" begin - artifacts = listartifacts(mlf, r.info.run_id) - @test length(artifacts) == 0 - end - - @testset "listartifacts_by_run" begin - artifacts = listartifacts(mlf, r) - @test length(artifacts) == 0 - end - - @testset "listartifacts_by_run_info" begin - artifacts = listartifacts(mlf, r.info) - @test length(artifacts) == 0 - end - - deleteexperiment(mlf, e) -end From 78ccb90d73ab9d353f0a6b5a535f752da451c898 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 3 Nov 2024 01:07:30 -0500 Subject: [PATCH 18/31] Updating `CI.yml` with `MLJFlow.jl` pipeline style --- .github/workflows/CI.yml | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a3acf34..c6a105e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,38 +1,41 @@ name: CI on: + pull_request: push: branches: - main tags: '*' - pull_request: -concurrency: - # Skip intermediate builds: always. - # Cancel intermediate builds: only if it is a pull request build. - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: test: - - services: - mlflow: - image: adacotechjp/mlflow:2.3.1 - ports: - - 5000:5000 - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false + max-parallel: 1 matrix: version: - - '1.6' - - '1' + - '1.10' + - '1' # automatically expands to the latest stable 1.x release of Julia. os: - ubuntu-latest arch: - x64 steps: - uses: actions/checkout@v2 + - name: Setup custom python requirements + if: hashFiles('**/requirements.txt', '**/pyproject.toml') == '' + run: | + touch ./requirements.txt + echo "mlflow==2.17.2" > ./requirements.txt + - uses: actions/setup-python@v4 + with: + python-version: '3.10.13' + cache: 'pip' + - name: Setup mlflow locally + run: | + pip install -r ./requirements.txt + python3 /opt/hostedtoolcache/Python/3.10.13/x64/bin/mlflow server --host 0.0.0.0 --port 5000 & + sleep 5 - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} @@ -50,9 +53,10 @@ jobs: - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: + JULIA_NUM_THREADS: '2' MLFLOW_TRACKING_URI: "http://localhost:5000/api" - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v2 + - uses: codecov/codecov-action@v3 with: files: lcov.info docs: From 50004ce680003863469d23def4a8be95645bb4b0 Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Sun, 3 Nov 2024 03:26:22 -0500 Subject: [PATCH 19/31] Fixing documentation --- Project.toml | 2 - README.md | 4 -- docs/Project.toml | 4 +- docs/make.jl | 32 +++------ docs/src/{ => images}/mlflowexp.png | Bin docs/src/{ => images}/mlflowexpmetric1.png | Bin docs/src/{ => images}/withoutmlflow.png | Bin docs/src/reference.md | 59 --------------- docs/src/reference/artifact.md | 4 ++ docs/src/reference/experiment.md | 11 +++ docs/src/reference/loggers.md | 7 ++ docs/src/reference/misc.md | 5 ++ docs/src/reference/run.md | 11 +++ docs/src/reference/types.md | 21 ++++++ docs/src/tutorial.md | 6 +- src/MLFlowClient.jl | 19 +++-- src/services/artifact.jl | 8 +-- src/services/experiment.jl | 80 +++++++++++---------- src/services/loggers.jl | 45 ++++++------ src/services/misc.jl | 35 ++++++--- src/services/run.jl | 70 +++++++++--------- src/types/dataset.jl | 10 +-- src/types/enums.jl | 2 +- src/types/experiment.jl | 2 +- src/types/mlflow.jl | 8 +-- src/types/model_version.jl | 10 +-- src/types/registered_model.jl | 6 +- src/types/run.jl | 13 ++-- src/utils.jl | 4 -- 29 files changed, 239 insertions(+), 239 deletions(-) rename docs/src/{ => images}/mlflowexp.png (100%) rename docs/src/{ => images}/mlflowexpmetric1.png (100%) rename docs/src/{ => images}/withoutmlflow.png (100%) delete mode 100644 docs/src/reference.md create mode 100644 docs/src/reference/artifact.md create mode 100644 docs/src/reference/experiment.md create mode 100644 docs/src/reference/loggers.md create mode 100644 docs/src/reference/misc.md create mode 100644 docs/src/reference/run.md create mode 100644 docs/src/reference/types.md diff --git a/Project.toml b/Project.toml index 59d3b69..00d90f9 100644 --- a/Project.toml +++ b/Project.toml @@ -5,7 +5,6 @@ version = "0.5.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" @@ -13,7 +12,6 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] -FilePathsBase = "0.9" HTTP = "1.9" JSON = "0.21" ShowCases = "0.1" diff --git a/README.md b/README.md index 51e9194..4cf00a3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,3 @@ Julia client for [MLFlow](https://www.mlflow.org/) - -This package is still under development and interfaces may change. See the documentation for current features and limitations. - -Tested against `mlflow==1.21.0` and `mlflow==1.22.0`. diff --git a/docs/Project.toml b/docs/Project.toml index dee67d4..d364e82 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,8 +1,6 @@ [deps] -Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -MLFlowClient = "64a0f543-368b-4a9a-827a-e71edb2a0b83" +ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/docs/make.jl b/docs/make.jl index 3e6244d..264bc6d 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,27 +1,15 @@ -using MLFlowClient +push!(LOAD_PATH,"../src/") using Documenter - -DocMeta.setdocmeta!(MLFlowClient, :DocTestSetup, :(using MLFlowClient); recursive=true) +using MLFlowClient makedocs(; - modules=[MLFlowClient], - authors="@deyandyankov and contributors", - repo="https://github.com/JuliaAI.jl/blob/{commit}{path}#{line}", sitename="MLFlowClient.jl", - format=Documenter.HTML(; - prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://juliaai.github.io/MLFlowClient.jl", - assets=String[] - ), - pages=[ - "Home" => "index.md", - "Tutorial" => "tutorial.md", - "Reference" => "reference.md" - ], - checkdocs=:exports -) + authors="@deyandyankov and contributors", + pages=["Home" => "index.md", "Tutorial" => "tutorial.md", "Reference" => [ + "Types" => "reference/types.md", "Artifact operations" => "reference/artifact.md", + "Experiment operations" => "reference/experiment.md", + "Logging operations" => "reference/loggers.md", + "Miscellaneous operations" => "reference/misc.md", + "Run operations" => "reference/run.md"]]) -deploydocs(; - repo="github.com/JuliaAI/MLFlowClient.jl", - devbranch="main" -) +deploydocs(; repo="github.com/JuliaAI/MLFlowClient.jl", devbranch="main") diff --git a/docs/src/mlflowexp.png b/docs/src/images/mlflowexp.png similarity index 100% rename from docs/src/mlflowexp.png rename to docs/src/images/mlflowexp.png diff --git a/docs/src/mlflowexpmetric1.png b/docs/src/images/mlflowexpmetric1.png similarity index 100% rename from docs/src/mlflowexpmetric1.png rename to docs/src/images/mlflowexpmetric1.png diff --git a/docs/src/withoutmlflow.png b/docs/src/images/withoutmlflow.png similarity index 100% rename from docs/src/withoutmlflow.png rename to docs/src/images/withoutmlflow.png diff --git a/docs/src/reference.md b/docs/src/reference.md deleted file mode 100644 index 7f79daf..0000000 --- a/docs/src/reference.md +++ /dev/null @@ -1,59 +0,0 @@ -# Reference - -```@meta -CurrentModule = MLFlowClient -``` - -# Types - -TODO: Document accessors. - -```@docs -MLFlow -MLFlowExperiment -MLFlowRun -MLFlowRunInfo -MLFlowRunData -MLFlowRunDataParam -MLFlowRunDataMetric -MLFlowRunStatus -MLFlowArtifactFileInfo -MLFlowArtifactDirInfo -``` - -# Experiments - -```@docs -createexperiment -getexperiment -getorcreateexperiment -deleteexperiment -searchexperiments -restoreexperiment -``` - -# Runs - -```@docs -createrun -getrun -updaterun -deleterun -searchruns -logparam -logmetric -logbatch -logartifact -listartifacts -``` - -# Utilities - -```@docs -mlfget -mlfpost -uri -generatefilterfromentity_type -generatefilterfromparams -generatefilterfromattributes -``` diff --git a/docs/src/reference/artifact.md b/docs/src/reference/artifact.md new file mode 100644 index 0000000..0e28093 --- /dev/null +++ b/docs/src/reference/artifact.md @@ -0,0 +1,4 @@ +# Artifact operations +```@docs +listartifacts +``` diff --git a/docs/src/reference/experiment.md b/docs/src/reference/experiment.md new file mode 100644 index 0000000..ba98435 --- /dev/null +++ b/docs/src/reference/experiment.md @@ -0,0 +1,11 @@ +# Experiment operations +```@docs +createexperiment +getexperiment +getexperimentbyname +deleteexperiment +restoreexperiment +updateexperiment +searchexperiments +setexperimenttag +``` diff --git a/docs/src/reference/loggers.md b/docs/src/reference/loggers.md new file mode 100644 index 0000000..2bad6ce --- /dev/null +++ b/docs/src/reference/loggers.md @@ -0,0 +1,7 @@ +# Logging operations +```@docs +logmetric +logbatch +loginputs +logparam +``` diff --git a/docs/src/reference/misc.md b/docs/src/reference/misc.md new file mode 100644 index 0000000..7baadfc --- /dev/null +++ b/docs/src/reference/misc.md @@ -0,0 +1,5 @@ +# Miscellaneous operations +```@docs +getmetrichistory +refresh +``` diff --git a/docs/src/reference/run.md b/docs/src/reference/run.md new file mode 100644 index 0000000..5fbd25c --- /dev/null +++ b/docs/src/reference/run.md @@ -0,0 +1,11 @@ +# Run operations +```@docs +createrun +deleterun +restorerun +getrun +setruntag +deleteruntag +searchruns +updaterun +``` diff --git a/docs/src/reference/types.md b/docs/src/reference/types.md new file mode 100644 index 0000000..e7db289 --- /dev/null +++ b/docs/src/reference/types.md @@ -0,0 +1,21 @@ +# Types +```@docs +MLFlow +Tag +ViewType +RunStatus +ModelVersionStatus +Dataset +DatasetInput +FileInfo +ModelVersion +RegisteredModel +RegisteredModelAlias +Experiment +Run +Param +Metric +RunData +RunInfo +RunInputs +``` diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 3fbf101..d7565c3 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -40,7 +40,7 @@ p This could result in the following plot: -![](withoutmlflow.png) +![](images/withoutmlflow.png) Now, suppose that you are interested in turning this into an experiment which stores its metadata and results in MLFlow using `MLFlowClient`. You could amend the code like this: @@ -114,8 +114,8 @@ updaterun(mlf, exprun, "FINISHED") This will result in the folowing experiment created in your `MLFlow` which is running on `http://localhost/`: -![](mlflowexp.png) +![](images/mlflowexp.png) You can also observe series logged against individual metrics, i.e. `pricepath1` looks like this in `MLFlow`: -![](mlflowexpmetric1.png) +![](images/mlflowexpmetric1.png) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 88e60a7..99638e7 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -1,3 +1,14 @@ +""" + MLFlowClient + +[MLFlowClient](https://github.com/JuliaAI.jl) is a [Julia](https://julialang.org/) package +for working with [MLFlow](https://mlflow.org/) using the REST +[API v2.0](https://www.mlflow.org/docs/latest/rest-api.html). + +`MLFlowClient` allows you to create and manage `MLFlow` experiments, runs, and log metrics +and artifacts. If you are not familiar with `MLFlow` and its concepts, please refer to +[MLFlow documentation](https://mlflow.org/docs/latest/index.html). +""" module MLFlowClient using Dates @@ -6,7 +17,6 @@ using HTTP using URIs using JSON using ShowCases -using FilePathsBase: AbstractPath include("types/mlflow.jl") export MLFlow @@ -35,11 +45,10 @@ export Experiment include("types/run.jl") export Run, Param, Metric, RunData, RunInfo, RunInputs -include("utils.jl") -export refresh - include("api.jl") +include("utils.jl") + include("services/experiment.jl") export getexperiment, createexperiment, deleteexperiment, setexperimenttag, updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname @@ -55,6 +64,6 @@ include("services/artifact.jl") export listartifacts include("services/misc.jl") -export getmetrichistory +export refresh, getmetrichistory end diff --git a/src/services/artifact.jl b/src/services/artifact.jl index a56908b..5a73613 100644 --- a/src/services/artifact.jl +++ b/src/services/artifact.jl @@ -2,17 +2,17 @@ listartifacts(instance::MLFlow, run_id::String; path::String="", page_token::String="") listartifacts(instance::MLFlow, run::Run; path::String="", page_token::String="") -List artifacts for a run. +List artifacts for a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run whose artifacts to list. +- `run_id`: ID of the [`Run`](@ref) whose artifacts to list. - `path`: Filter artifacts matching this path (a relative path from the root artifact -directory). + directory). - `page_token`: Token indicating the page of artifact results to fetch # Returns -- Root artifact directory for the run. +- Root artifact directory for the [`Run`](@ref). - List of file location and metadata for artifacts. - Token that can be used to retrieve the next page of artifact results. """ diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 1233d48..b0e99a3 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -1,21 +1,22 @@ """ - createexperiment(instance::MLFlow, name::String; artifact_location::String="", - tags::Union{Dict{<:Any}, Array{<:Any}}=[]) + createexperiment(instance::MLFlow, name::String; + artifact_location::Union{String, Missing}=missing, + tags::MLFlowUpsertData{Tag}=Tag[]) -Create an experiment with a name. Returns the newly created experiment. Validates that -another experiment with the same name does not already exist and fails if another -experiment with the same name already exists. +Create an [`Experiment`](@ref) with a name. Returns the newly created [`Experiment`](@ref). +Validates that another [`Experiment`](@ref) with the same name does not already exist and +fails if another [`Experiment`](@ref) with the same name already exists. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `name`: Experiment name. This field is required. -- `artifact_location`: Location where all artifacts for the experiment -are stored. If not provided, the remote server will select an appropriate -default. -- `tags`: A collection of tags to set on the experiment. +- `name`: [`Experiment`](@ref) name. This field is required. +- `artifact_location`: Location where all artifacts for the [`Experiment`](@ref) + are stored. If not provided, the remote server will select an appropriate + default. +- `tags`: A collection of [`Tag`](@ref) to set on the [`Experiment`](@ref). # Returns -The ID of the newly created experiment. +The ID of the newly created [`Experiment`](@ref). """ function createexperiment(instance::MLFlow, name::String; artifact_location::Union{String, Missing}=missing, @@ -29,11 +30,11 @@ end getexperiment(instance::MLFlow, experiment_id::String) getexperiment(instance::MLFlow, experiment_id::Integer) -Get metadata for an experiment. This method works on deleted experiments. +Get metadata for an [`Experiment`](@ref). This method works on deleted experiments. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns An instance of type [`Experiment`](@ref). @@ -48,15 +49,15 @@ getexperiment(instance::MLFlow, experiment_id::Integer)::Experiment = """ getexperimentbyname(instance::MLFlow, experiment_name::String) -Get metadata for an experiment. +Get metadata for an [`Experiment`](@ref). -This endpoint will return deleted experiments, but prefers the active experiment if an -active and deleted experiment share the same name. If multiple deleted experiments share -the same name, the API will return one of them. +This endpoint will return deleted experiments, but prefers the active [`Experiment`](@ref) +if an active and deleted [`Experiment`](@ref) share the same name. If multiple deleted +experiments share the same name, the API will return one of them. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_name`: Name of the associated experiment. +- `experiment_name`: Name of the associated [`Experiment`](@ref). # Returns An instance of type [`Experiment`](@ref). @@ -71,12 +72,13 @@ end deleteexperiment(instance::MLFlow, experiment_id::Integer) deleteexperiment(instance::MLFlow, experiment::Experiment) -Mark an experiment and associated metadata, runs, metrics, params, and tags for deletion. -If the experiment uses FileStore, artifacts associated with experiment are also deleted. +Mark an [`Experiment`](@ref) and associated metadata, runs, metrics, params, and tags for +deletion. If the [`Experiment`](@ref) uses FileStore, artifacts associated with +[`Experiment`](@ref) are also deleted. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -95,13 +97,13 @@ deleteexperiment(instance::MLFlow, experiment::Experiment)::Bool = restoreexperiment(instance::MLFlow, experiment_id::Integer) restoreexperiment(instance::MLFlow, experiment::Experiment) -Restore an experiment marked for deletion. This also restores associated metadata, runs, -metrics, params, and tags. If experiment uses FileStore, underlying artifacts associated -with experiment are also restored. +Restore an [`Experiment`](@ref) marked for deletion. This also restores associated +metadata, runs, metrics, params, and tags. If [`Experiment`](@ref) uses FileStore, +underlying artifacts associated with [`Experiment`](@ref) are also restored. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. +- `experiment_id`: ID of the associated [`Experiment`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -120,13 +122,13 @@ restoreexperiment(instance::MLFlow, experiment::Experiment)::Bool = updateexperiment(instance::MLFlow, experiment_id::Integer, new_name::String) updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String) -Update experiment metadata. +Update [`Experiment`](@ref) metadata. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. -- `new_name`: If provided, the experiment’s name is changed to the new name. The new name -must be unique. +- `experiment_id`: ID of the associated [`Experiment`](@ref). +- `new_name`: If provided, the [`Experiment`](@ref) name is changed to the new name. The new name + must be unique. # Returns `true` if successful. Otherwise, raises exception. @@ -142,21 +144,22 @@ updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String)::Bo """ searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", - filter::String="", order_by::Array{String}=[], view_type::ViewType=ACTIVE_ONLY) + filter::String="", order_by::Array{String}=String[], + view_type::ViewType=ACTIVE_ONLY) # Arguments - `instance`: [`MLFlow`](@ref) configuration. - `max_results`: Maximum number of experiments desired. - `page_token`: Token indicating the page of experiments to fetch. -- `filter`: A filter expression over experiment attributes and tags that allows returning a -subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). -- `order_by`: List of columns for ordering search results, which can include experiment -name and id with an optional “DESC” or “ASC” annotation, where “ASC” is the default. +- `filter`: A filter expression over [`Experiment`](@ref) attributes and tags that allows returning a + subset of experiments. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-experiments). +- `order_by`: List of columns for ordering search results, which can include [`Experiment`](@ref) + name and id with an optional “DESC” or “ASC” annotation, where “ASC” is the default. - `view_type`: Qualifier for type of experiments to be returned. If unspecified, return -only active experiments. + only active experiments. For more values, see [`ViewType`](@ref). # Returns -- Vector of [`Experiment`](@ref) that were found in the MLFlow instance. +- Vector of [`Experiment`](@ref) that were found in the [`MLFlow`](@ref) instance. - The next page token if there are more results. """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, @@ -181,10 +184,11 @@ end setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, value::String) setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, value::String) -Set a tag on an experiment. Experiment tags are metadata that can be updated. +Set a tag on an [`Experiment`](@ref). [`Experiment`](@ref) tags are metadata that can be +updated. # Arguments -- `experiment_id`: ID of the experiment under which to log the tag. +- `experiment_id`: ID of the [`Experiment`](@ref) under which to log the tag. - `key`: Name of the tag. - `value`: String value of the tag being logged. diff --git a/src/services/loggers.jl b/src/services/loggers.jl index ecab95a..ea1f57a 100644 --- a/src/services/loggers.jl +++ b/src/services/loggers.jl @@ -6,17 +6,17 @@ timestamp::Int64=round(Int, now() |> datetime2unix), step::Union{Int64, Missing}=missing) -Log a metric for a run. A metric is a key-value pair (string key, float value) with an -associated timestamp. Examples include the various metrics that represent ML model -accuracy. A metric can be logged multiple times. +Log a [`Metric`](@ref) for a [`Run`](@ref). A [`Metric`](@ref) is a key-value pair (string +key, float value) with an associated timestamp. Examples include the various metrics that +represent ML model accuracy. A [`Metric`](@ref) can be logged multiple times. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the metric. -- `key`: Name of the metric. -- `value`: Double value of the metric being logged. -- `timestamp`: Unix timestamp in milliseconds at the time metric was logged. -- `step`: Step at which to log the metric. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Metric`](@ref). +- `key`: Name of the [`Metric`](@ref). +- `value`: Double value of the [`Metric`](@ref) being logged. +- `timestamp`: Unix timestamp in milliseconds at the time [`Metric`](@ref) was logged. +- `step`: Step at which to log the [`Metric`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -45,17 +45,17 @@ logmetric(instance::MLFlow, run::Run, metric::Metric)::Bool = logbatch(instance::MLFlow, run::Run; metrics::Array{Metric}, params::MLFlowUpsertData{Param}, tags::MLFlowUpsertData{Tag}) -Log a batch of metrics, params, and tags for a run. In case of error, partial data may be -written. +Log a batch of metrics, params, and tags for a [`Run`](@ref). In case of error, partial +data may be written. For more information about this function, check [MLFlow official documentation](https://mlflow.org/docs/latest/rest-api.html#log-batch). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to log under. -- `metrics`: Metrics to log. -- `params`: Params to log. -- `tags`: Tags to log. +- `run_id`: ID of the [`Run`](@ref) to log under. +- `metrics`: A collection of [`Metric`](@ref) to log. +- `params`: A collection of [`Param`](@ref) to log. +- `tags`: A collection of [`Tag`](@ref) to log. **Note**: A single request can contain up to 1000 metrics, and up to 1000 metrics, params, and tags in total. @@ -80,8 +80,8 @@ logbatch(instance::MLFlow, run::Run; metrics::MLFlowUpsertData{Metric}=Metric[], # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to log under This field is required. -- `datasets`: Dataset inputs. +- `run_id`: ID of the [`Run`](@ref) to log under this field is required. +- `datasets`: A collection of [`DatasetInput`](@ref) to log. # Returns `true` if successful. Otherwise, raises exception. @@ -99,15 +99,16 @@ loginputs(instance::MLFlow, run::Run, datasets::Array{DatasetInput})::Bool = logparam(instance::MLFlow, run_id::String, param::Param) logparam(instance::MLFlow, run::Run, param::Param) -Log a param used for a run. A param is a key-value pair (string key, string value). -Examples include hyperparameters used for ML model training and constant dates and values -used in an ETL pipeline. A param can be logged only once for a run. +Log a [`Param`](@ref) used for a [`Run`](@ref). A [`Param`](@ref) is a key-value pair +(string key, string value). Examples include hyperparameters used for ML model training and +constant dates and values used in an ETL pipeline. A [`Param`](@ref) can be logged only +once for a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the param. -- `key`: Name of the param. -- `value`: String value of the param being logged. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Param`](@ref). +- `key`: Name of the [`Param`](@ref). +- `value`: String value of the [`Param`](@ref) being logged. # Returns `true` if successful. Otherwise, raises exception. diff --git a/src/services/misc.jl b/src/services/misc.jl index 7be2137..35f7a11 100644 --- a/src/services/misc.jl +++ b/src/services/misc.jl @@ -1,20 +1,20 @@ """ getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; - page_token::String="", max_results::Int32=1) + page_token::String="", max_results::Union{Int64, Missing}=missing) -Get a list of all values for the specified metric for a given run. +Get a list of all values for the specified [`Metric`](@ref) for a given [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run from which to fetch metric values. -- `metric_key`: Name of the metric. -- `page_token`: Token indicating the page of metric history to fetch. -- `max_results`: Maximum number of logged instances of a metric for a run to -return per call. +- `run_id`: ID of the [`Run`](@ref) from which to fetch [`Metric`](@ref) values. +- `metric_key`: Name of the [`Metric`](@ref) to fetch. +- `page_token`: Token indicating the page of [`Metric`](@ref) history to fetch. +- `max_results`: Maximum number of logged instances of a [`Metric`](@ref) for a + [`Run`](@ref) to return per call. # Returns -- A list of all metric historical values for the specified metric in the -specified run. +- A list of all historical values for the specified [`Metric`](@ref) in the specified + [`Run`](@ref). - The next page token if there are more results. """ function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; @@ -39,3 +39,20 @@ getmetrichistory(instance::MLFlow, run::Run, metric::Metric; page_token::String= )::Tuple{Array{Metric}, Union{String, Nothing}} = getmetrichistory(instance, run.info.run_id, metric.key; page_token=page_token, max_results=max_results) + +""" + refresh(instance::MLFlow, run::Run) + refresh(instance::MLFlow, experiment::Experiment) + +Get the latest metadata for a [`Run`](@ref) or [`Experiment`](@ref). + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `run` or `experiment`: [`Run`](@ref) or [`Experiment`](@ref) to refresh. + +# Returns +An instance of type [`Run`](@ref) or [`Experiment`](@ref). +""" +refresh(instance::MLFlow, experiment::Experiment)::Experiment = + getexperiment(instance, experiment.experiment_id) +refresh(instance::MLFlow, run::Run)::Run = getrun(instance, run.info.run_id) diff --git a/src/services/run.jl b/src/services/run.jl index ef27c3a..caaef6e 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -4,15 +4,15 @@ start_time::Union{Int64, Missing}=missing, tags::Union{Dict{<:Any}, Array{<:Any}}=[]) -Create a new run within an experiment. A run is usually a single execution of a machine -learning or data ETL pipeline. +Create a new [`Run`](@ref) within an [`Experiment`](@ref). A [`Run`](@ref) is usually a +single execution of a machine learning or data ETL pipeline. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_id`: ID of the associated experiment. -- `run_name`: Name of the run. -- `start_time`: Unix timestamp in milliseconds of when the run started. -- `tags`: Additional metadata for run. +- `experiment_id`: ID of the associated [`Experiment`](@ref). +- `run_name`: Name of the [`Run`](@ref). +- `start_time`: Unix timestamp in milliseconds of when the [`Run`](@ref) started. +- `tags`: Additional metadata for [`Run`](@ref). # Returns An instance of type [`Run`](@ref). @@ -39,11 +39,11 @@ createrun(instance::MLFlow, experiment::Experiment; deleterun(instance::MLFlow, run_id::String) deleterun(instance::MLFlow, run::Run) -Mark a run for deletion. +Mark a [`Run`](@ref) for deletion. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to delete. +- `run_id`: ID of the [`Run`](@ref) to delete. # Returns `true` if successful. Otherwise, raises exception. @@ -59,11 +59,11 @@ deleterun(instance::MLFlow, run::Run)::Bool = restorerun(instance::MLFlow, run_id::String) restorerun(instance::MLFlow, run::Run) -Restore a deleted run. +Restore a deleted [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to restore. +- `run_id`: ID of the [`Run`](@ref) to restore. # Returns `true` if successful. Otherwise, raises exception. @@ -78,13 +78,14 @@ restorerun(instance::MLFlow, run::Run)::Bool = """ getrun(instance::MLFlow, run_id::String) -Get metadata, metrics, params, and tags for a run. In the case where multiple metrics with -the same key are logged for a run, return only the value with the latest timestamp. If -there are multiple values with the latest timestamp, return the maximum of these values. +Get metadata, metrics, params, and tags for a [`Run`](@ref). In the case where multiple +metrics with the same key are logged for a [`Run`](@ref), return only the value with the +latest timestamp. If there are multiple values with the latest timestamp, return the +maximum of these values. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to fetch. +- `run_id`: ID of the [`Run`](@ref) to fetch. # Returns An instance of type [`Run`](@ref). @@ -99,13 +100,13 @@ end setruntag(instance::MLFlow, run::Run, key::String, value::String) setruntag(instance::MLFlow, run::Run, tag::Tag) -Set a tag on a run. +Set a [`Tag`](@ref) on a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run under which to log the tag. -- `key`: Name of the tag. -- `value`: String value of the tag being logged. +- `run_id`: ID of the [`Run`](@ref) under which to log the [`Tag`](@ref). +- `key`: Name of the [`Tag`](@ref). +- `value`: String value of the [`Tag`](@ref) being logged. # Returns `true` if successful. Otherwise, raises exception. @@ -120,16 +121,16 @@ setruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = setruntag(instance, run.info.run_id, tag.key, tag.value) """ - deletetag(instance::MLFlow, run_id::String, key::String) - deletetag(instance::MLFlow, run::Run, key::String) - deletetag(instance::MLFlow, run::Run, tag::Tag) + deleteruntag(instance::MLFlow, run_id::String, key::String) + deleteruntag(instance::MLFlow, run::Run, key::String) + deleteruntag(instance::MLFlow, run::Run, tag::Tag) -Delete a tag on a run. +Delete a [`Tag`](@ref) on a [`Run`](@ref). # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run that the tag was logged under. -- `key`: Name of the tag. +- `run_id`: ID of the [`Run`](@ref) that the [`Tag`](@ref) was logged under. +- `key`: Name of the [`Tag`](@ref). # Returns `true` if successful. Otherwise, raises exception. @@ -148,18 +149,19 @@ deleteruntag(instance::MLFlow, run::Run, tag::Tag)::Bool = run_view_type::ViewType=ACTIVE_ONLY, max_results::Int=1000, order_by::Array{String}=String[], page_token::String="") -Search for runs that satisfy expressions. Search expressions can use Metric and Param keys. +Search for runs that satisfy expressions. Search expressions can use [`Metric`](@ref) and +[`Param`](@ref) keys. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `experiment_ids`: List of experiment IDs to search over. +- `experiment_ids`: List of [`Experiment`](@ref) IDs to search over. - `filter`: A filter expression over params, metrics, and tags, that allows returning a -subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). + subset of runs. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-runs). - `run_view_type`: Whether to display only active, only deleted, or all runs. Defaults to -only active runs. + only active runs. - `max_results`: Maximum number of runs desired. - `order_by`: List of columns to be ordered by, including attributes, params, metrics, and -tags with an optional “DESC” or “ASC” annotation, where “ASC” is the default. + tags with an optional “DESC” or “ASC” annotation, where “ASC” is the default. - `page_token`: Token indicating the page of runs to fetch. # Returns @@ -191,14 +193,14 @@ end updaterun(instance::MLFlow, run::Run; status::Union{RunStatus, Missing}=missing, end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing}=missing) -Update run metadata. +Update [`Run`](@ref) metadata. # Arguments - `instance`: [`MLFlow`](@ref) configuration. -- `run_id`: ID of the run to update. -- `status`: Updated status of the run. -- `end_time`: Unix timestamp in milliseconds of when the run ended. -- `run_name`: Updated name of the run. +- `run_id`: ID of the [`Run`](@ref) to update. +- `status`: Updated status of the [`Run`](@ref). +- `end_time`: Unix timestamp in milliseconds of when the [`Run`](@ref) ended. +- `run_name`: Updated name of the [`Run`](@ref). # Returns - An instance of type [`RunInfo`](@ref) with the updated metadata. diff --git a/src/types/dataset.jl b/src/types/dataset.jl index 7843b2a..011fb33 100644 --- a/src/types/dataset.jl +++ b/src/types/dataset.jl @@ -11,10 +11,6 @@ development process. - `source::String`: Source information for the dataset. - `schema::String`: The schema of the dataset. This field is optional. - `profile::String`: The profile of the dataset. This field is optional. - -# Constructors -- `Dataset(name, digest, source_type, source, schema, profile)` -- `Dataset(name, digest, source_type, source; schema=nothing, profile=nothing)` """ struct Dataset name::String @@ -36,11 +32,7 @@ Represents a dataset and input tags. # Fields - `tags::Array{Tag}`: A list of tags for the dataset input. -- `dataset::Dataset`: The dataset being used as a Run input. - -# Constructors -- `DatasetInput(tags, dataset)` -- `DatasetInput(dataset; tags=[])` +- `dataset::Dataset`: The dataset being used as a run input. """ struct DatasetInput tags::Array{Tag} diff --git a/src/types/enums.jl b/src/types/enums.jl index 4c2cb69..b9c1104 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -3,7 +3,7 @@ # Members - `PENDING_REGISTRATION`: Request to register a new model version is pending as server -performs background tasks. + performs background tasks. - `FAILED_REGISTRATION`: Request to register a new model version has failed. - `READY`: Model version is ready for use. """ diff --git a/src/types/experiment.jl b/src/types/experiment.jl index 1529dbc..105a981 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -6,7 +6,7 @@ - `name::String`: Human readable name that identifies the experiment. - `artifact_location::String`: Location where artifacts for the experiment are stored. - `lifecycle_stage::String`: Current life cycle stage of the experiment: “active” or -“deleted”. Deleted experiments are not returned by APIs. + “deleted”. Deleted experiments are not returned by APIs. - `last_update_time::Int64`: Last update time. - `creation_time::Int64`: Creation time. - `tags::Array{Tag}`: Additional metadata key-value pairs. diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index 9ba4e6a..ff0a282 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -7,13 +7,7 @@ Base type which defines location and version for MLFlow API service. - `apiroot::String`: API root URL, e.g. `http://localhost:5000/api` - `apiversion::Union{Integer, AbstractFloat}`: used API version, e.g. `2.0` - `headers::Dict`: HTTP headers to be provided with the REST API requests (useful for -authetication tokens) Default is `false`, using the REST API endpoint. - -# Constructors - -- `MLFlow(apiroot; apiversion=2.0,headers=Dict())` -- `MLFlow()` - defaults to `MLFlow(ENV["MLFLOW_TRACKING_URI"])` or -`MLFlow("http://localhost:5000/api")` + authetication tokens) Default is `false`, using the REST API endpoint. # Examples diff --git a/src/types/model_version.jl b/src/types/model_version.jl index 3b07655..1c17c05 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -6,20 +6,20 @@ - `version::String`: Model’s version number. - `creation_timestamp::Int64`: Timestamp recorded when this model_version was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this model_version -was last updated. + was last updated. - `user_id::String`: User that created this model_version. - `current_stage::String`: Current stage for this model_version. - `description::String`: Description of this model_version. - `source::String`: URI indicating the location of the source model artifacts, used when -creating model_version. + creating model_version. - `run_id::String`: MLflow run ID used when creating model_version, if source was generated -by an experiment run stored in MLflow tracking server. + by an experiment run stored in MLflow tracking server. - `status::ModelVersionStatus`: Current status of model_version. - `status_message::String`: Details on current status, if it is pending or failed. - `tags::Array{Tag}`: Additional metadata key-value pairs. - `run_link::String`: Direct link to the run that generated this version. This field is set -at model version creation time only for model versions whose source run is from a tracking -server that is different from the registry server. + at model version creation time only for model versions whose source run is from a + tracking server that is different from the registry server. - `aliases::Array{String}`: Aliases pointing to this model_version. """ struct ModelVersion diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index ebdd865..7d1f217 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -20,14 +20,14 @@ Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true - `name::String`: Unique name for the model. - `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this -RegisteredModel was last updated. + RegisteredModel was last updated. - `user_id::String`: User that created this RegisteredModel. - `description::String`: Description of this RegisteredModel. - `latest_versions::Array{ModelVersion}`: Collection of latest model versions for each -stage. Only contains models with current READY status. + stage. Only contains models with current READY status. - `tags::Array{Tag}`: Additional metadata key-value pairs. - `aliases::Array{RegisteredModelAlias}`: Aliases pointing to model versions associated -with this RegisteredModel. + with this RegisteredModel. """ struct RegisteredModel name::String diff --git a/src/types/run.jl b/src/types/run.jl index 44d51b6..4b0e94c 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -48,11 +48,11 @@ Metadata of a single run. - `start_time::Int64`: Unix timestamp of when the run started in milliseconds. - `end_time::Int64`: Unix timestamp of when the run ended in milliseconds. - `artifact_uri::String`: URI of the directory where artifacts should be uploaded. This can -be a local path (starting with “/”), or a distributed file system (DFS) path, -like s3://bucket/directory or dbfs:/my/directory. If not set, the local ./mlruns directory -is chosen. + be a local path (starting with “/”), or a distributed file system (DFS) path, + like s3://bucket/directory or dbfs:/my/directory. If not set, the local ./mlruns + directory is chosen. - `lifecycle_stage::String`: Current life cycle stage of the experiment: "active" or -"deleted". + "deleted". """ struct RunInfo run_id::String @@ -109,6 +109,11 @@ Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) Run A single run. + +# Fields +- `info::RunInfo`: Metadata of the run. +- `data::RunData`: Run data (metrics, params, and tags). +- `inputs::RunInputs`: Run inputs. """ struct Run info::RunInfo diff --git a/src/utils.jl b/src/utils.jl index 7a28a60..c6993a7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -91,7 +91,3 @@ function parse(::Type{T}, entities::MLFlowUpsertData{T}) where T<:LoggingData end return entities end - -refresh(instance::MLFlow, experiment::Experiment)::Experiment = - getexperiment(instance, experiment.experiment_id) -refresh(instance::MLFlow, run::Run)::Run = getrun(instance, run.info.run_id) From ea5fe2928a51e87a655ca50530824ad33831368f Mon Sep 17 00:00:00 2001 From: Jose Esparza <28990958+pebeto@users.noreply.github.com> Date: Mon, 4 Nov 2024 00:08:50 -0500 Subject: [PATCH 20/31] Implementing `registered_model` service --- docs/make.jl | 3 +- docs/src/reference/registered_model.md | 8 +++ src/MLFlowClient.jl | 4 ++ src/api.jl | 45 +++++++++++++ src/services/registered_model.jl | 91 ++++++++++++++++++++++++++ src/types/enums.jl | 1 + src/types/model_version.jl | 6 ++ src/types/registered_model.jl | 16 +++-- test/runtests.jl | 1 + test/services/registered_model.jl | 87 ++++++++++++++++++++++++ 10 files changed, 257 insertions(+), 5 deletions(-) create mode 100644 docs/src/reference/registered_model.md create mode 100644 src/services/registered_model.jl create mode 100644 test/services/registered_model.jl diff --git a/docs/make.jl b/docs/make.jl index 264bc6d..48f7878 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,6 +10,7 @@ makedocs(; "Experiment operations" => "reference/experiment.md", "Logging operations" => "reference/loggers.md", "Miscellaneous operations" => "reference/misc.md", - "Run operations" => "reference/run.md"]]) + "Run operations" => "reference/run.md", + "Registered model operations" => "reference/registered_model.md"]]) deploydocs(; repo="github.com/JuliaAI/MLFlowClient.jl", devbranch="main") diff --git a/docs/src/reference/registered_model.md b/docs/src/reference/registered_model.md new file mode 100644 index 0000000..f0a2739 --- /dev/null +++ b/docs/src/reference/registered_model.md @@ -0,0 +1,8 @@ +# Registered model operations +```@docs +createregisteredmodel +getregisteredmodel +renameregisteredmodel +updateregisteredmodel +deleteregisteredmodel +``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 99638e7..7be85f6 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -66,4 +66,8 @@ export listartifacts include("services/misc.jl") export refresh, getmetrichistory +include("services/registered_model.jl") +export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, + renameregisteredmodel, updateregisteredmodel + end diff --git a/src/api.jl b/src/api.jl index a8ff985..44236bf 100644 --- a/src/api.jl +++ b/src/api.jl @@ -66,3 +66,48 @@ function mlfpost(mlf, endpoint; kwargs...) throw(ErrorException(error_message)) end end + +""" + mlfpatch(mlf, endpoint; kwargs...) + +Performs a HTTP PATCH to the specified endpoint. kwargs are converted to JSON and become +the PATCH body. +""" +function mlfpatch(mlf, endpoint; kwargs...) + apiuri = uri(mlf, endpoint;) + apiheaders = headers(mlf, Dict("Content-Type" => "application/json")) + body = JSON.json(kwargs) + + try + response = HTTP.patch(apiuri, apiheaders, body) + return response.body |> String |> JSON.parse + catch e + error_response = e.response.body |> String |> JSON.parse + error_message = "$(error_response["error_code"]) - $(error_response["message"])" + @error error_message + throw(ErrorException(error_message)) + end +end + +""" + mlfdelete(mlf, endpoint; kwargs...) + +Performs a HTTP DELETE to the specified endpoint. kwargs are converted to JSON and become +the DELETE body. +""" +function mlfdelete(mlf, endpoint; kwargs...) + apiuri = uri(mlf, endpoint; + parameters=Dict(k => v for (k, v) in kwargs if v !== missing)) + apiheaders = headers(mlf, Dict("Content-Type" => "application/json")) + body = JSON.json(kwargs) + + try + response = HTTP.delete(apiuri, apiheaders, body) + return response.body |> String |> JSON.parse + catch e + error_response = e.response.body |> String |> JSON.parse + error_message = "$(error_response["error_code"]) - $(error_response["message"])" + @error error_message + throw(ErrorException(error_message)) + end +end diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl new file mode 100644 index 0000000..f8c1380 --- /dev/null +++ b/src/services/registered_model.jl @@ -0,0 +1,91 @@ +""" + createregisteredmodel(instance::MLFlow, name::String; + tags::MLFlowUpsertData{Tag}=Tag[], description::Union{String, Missing}=missing) + +Create a [`RegisteredModel`](@ref) with a name. Returns the newly created +[`RegisteredModel`](@ref). Validates that another [`RegisteredModel`](@ref) with the same +name does not already exist and fails if another [`RegisteredModel`](@ref) with the same +name already exists. + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name`: Register models under this name. +- `tags`: A collection of [`Tag`](@ref). +- `description`: Optional description for [`RegisteredModel`](@ref). + +# Returns +An instance of type [`RegisteredModel`](@ref). +""" +function createregisteredmodel(instance::MLFlow, name::String; + tags::MLFlowUpsertData{Tag}=Tag[], + description::Union{String, Missing}=missing)::RegisteredModel + result = mlfpost(instance, "registered-models/create"; name=name, + tags=parse(Tag, tags), description=description) + return result["registered_model"] |> RegisteredModel +end + +""" + getregisteredmodel(instance::MLFlow, name::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name`: [`RegisteredModel`](@ref) model unique name identifier. + +# Returns +An instance of type [`RegisteredModel`](@ref). +""" +function getregisteredmodel(instance::MLFlow, name::String)::RegisteredModel + result = mlfget(instance, "registered-models/get"; name=name) + return result["registered_model"] |> RegisteredModel +end + +""" + renameregisteredmodel(instance::MLFlow, name::String, new_name::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name`: [`RegisteredModel`](@ref) unique name identifier. +- `new_name`: If provided, updates the name for this [`RegisteredModel`](@ref). + +# Returns +An instance of type [`RegisteredModel`](@ref). +""" +function renameregisteredmodel(instance::MLFlow, name::String, + new_name::String)::RegisteredModel + result = mlfpost(instance, "registered-models/rename"; name=name, new_name=new_name) + return result["registered_model"] |> RegisteredModel +end + +""" + updateregisteredmodel(instance::MLFlow, name::String; + description::Union{String, Missing}=missing) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name`: [`RegisteredModel`](@ref) unique name identifier. +- `description`: If provided, updates the description for this [`RegisteredModel`](@ref). + +# Returns +An instance of type [`RegisteredModel`](@ref). +""" +function updateregisteredmodel(instance::MLFlow, name::String; + description::Union{String, Missing}=missing)::RegisteredModel + result = mlfpatch(instance, "registered-models/update"; name=name, + description=description) + return result["registered_model"] |> RegisteredModel +end + +""" + deleteregisteredmodel(instance::MLFlow, name::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `name`: [`RegisteredModel`](@ref) unique name identifier. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteregisteredmodel(instance::MLFlow, name::String)::Bool + mlfdelete(instance, "registered-models/delete"; name=name) + return true +end diff --git a/src/types/enums.jl b/src/types/enums.jl index b9c1104..70cb598 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -12,6 +12,7 @@ FAILED_REGISTRATION=2 READY=3 end +ModelVersionStatus(status::String) = Dict(value => key for (key, value) in ModelVersionStatus |> Base.Enums.namemap)[status |> Symbol] |> ModelVersionStatus """ RunStatus diff --git a/src/types/model_version.jl b/src/types/model_version.jl index 1c17c05..2c572d8 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -38,4 +38,10 @@ struct ModelVersion run_link::String aliases::Array{String} end +ModelVersion(data::Dict{String, Any}) = ModelVersion(data["name"], data["version"], + data["creation_timestamp"], data["last_updated_timestamp"], data["user_id"], + data["current_stage"], data["description"], data["source"], data["run_id"], + ModelVersionStatus(data["status"]), data["status_message"], + [Tag(tag) for tag in get(data, "tags", [])], data["run_link"], + get(data, "aliases", [])) Base.show(io::IO, t::ModelVersion) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index 7d1f217..0d909c4 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -11,6 +11,8 @@ struct RegisteredModelAlias alias::String version::String end +RegisteredModelAlias(data::Dict{String, Any}) = RegisteredModelAlias(data["alias"], + data["version"]) Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true)) """ @@ -21,8 +23,8 @@ Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true - `creation_timestamp::Int64`: Timestamp recorded when this RegisteredModel was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this RegisteredModel was last updated. -- `user_id::String`: User that created this RegisteredModel. -- `description::String`: Description of this RegisteredModel. +- `user_id::Union{String, Nothing}`: User that created this RegisteredModel. +- `description::Union{String, Nothing}`: Description of this RegisteredModel. - `latest_versions::Array{ModelVersion}`: Collection of latest model versions for each stage. Only contains models with current READY status. - `tags::Array{Tag}`: Additional metadata key-value pairs. @@ -33,10 +35,16 @@ struct RegisteredModel name::String creation_timestamp::Int64 last_updated_timestamp::Int64 - user_id::String - description::String + user_id::Union{String, Nothing} + description::Union{String, Nothing} latest_versions::Array{ModelVersion} tags::Array{Tag} aliases::Array{RegisteredModelAlias} end +RegisteredModel(data::Dict{String, Any}) = RegisteredModel(data["name"], + data["creation_timestamp"], data["last_updated_timestamp"], + get(data, "user_id", nothing), get(data, "description", nothing), + [ModelVersion(version) for version in get(data, "latest_versions", [])], + [Tag(tag) for tag in get(data, "tags", [])], + [RegisteredModelAlias(alias) for alias in get(data, "aliases", [])]) Base.show(io::IO, t::RegisteredModel) = show(io, ShowCase(t, new_lines=true)) diff --git a/test/runtests.jl b/test/runtests.jl index 66a6a3c..cc0d5fe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,3 +9,4 @@ include("services/misc.jl") include("services/loggers.jl") include("services/artifact.jl") include("services/experiment.jl") +include("services/registered_model.jl") diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl new file mode 100644 index 0000000..8028926 --- /dev/null +++ b/test/services/registered_model.jl @@ -0,0 +1,87 @@ +@testset verbose = true "create registered model" begin + @ensuremlf + + @testset "base" begin + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + + @test registered_model isa RegisteredModel + @test registered_model.name == "missy" + @test registered_model.description == "gala" + end + + @testset "name exists" begin + registered_model = getregisteredmodel(mlf, "missy") + @test_throws ErrorException createregisteredmodel(mlf, registered_model.name) + deleteregisteredmodel(mlf, "missy") + end + + @testset "with tags as array of tags" begin + createregisteredmodel(mlf, "missy"; tags=[Tag("test_key", "test_value")]) + deleteregisteredmodel(mlf, "missy") + end + + @testset "with tags as array of pairs" begin + createregisteredmodel(mlf, "missy"; tags=["test_key" => "test_value"]) + deleteregisteredmodel(mlf, "missy") + end + + @testset "with tags as array of dicts" begin + createregisteredmodel(mlf, "missy"; + tags=[Dict("key" => "test_key", "value" => "test_value")]) + deleteregisteredmodel(mlf, "missy") + end + + @testset "with tags as dict" begin + createregisteredmodel(mlf, "missy"; tags=Dict("test_key" => "test_value")) + deleteregisteredmodel(mlf, "missy") + end +end + +@testset verbose = true "get registered model" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + retrieved_registered_model = getregisteredmodel(mlf, registered_model.name) + + @test retrieved_registered_model isa RegisteredModel + @test retrieved_registered_model.name == registered_model.name + @test retrieved_registered_model.description == registered_model.description + + deleteregisteredmodel(mlf, "missy") +end + +@testset verbose = true "rename registered model" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + renamed_registered_model = renameregisteredmodel(mlf, registered_model.name, "mister") + + @test renamed_registered_model isa RegisteredModel + @test renamed_registered_model.name == "mister" + @test renamed_registered_model.description == registered_model.description + + deleteregisteredmodel(mlf, "mister") +end + +@testset verbose = true "update registered model" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + updated_registered_model = updateregisteredmodel(mlf, registered_model.name; + description="ana") + + @test updated_registered_model isa RegisteredModel + @test updated_registered_model.name == registered_model.name + @test updated_registered_model.description == "ana" + + deleteregisteredmodel(mlf, "missy") +end + +@testset verbose = true "delete registered model" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + deleteregisteredmodel(mlf, "missy") + + @test_throws ErrorException getregisteredmodel(mlf, "missy") +end From 722f20f06ac2f7d7ea8e5c4764f979a89cccf09b Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 06:11:43 -0500 Subject: [PATCH 21/31] Implemented ModelVersion operations --- .gitignore | 2 + Project.toml | 2 +- docs/make.jl | 2 +- docs/src/reference/{loggers.md => logger.md} | 0 src/MLFlowClient.jl | 7 +- src/services/{loggers.jl => logger.jl} | 0 src/services/model_version.jl | 171 ++++++++++++++++++ src/types/model_version.jl | 16 +- test/runtests.jl | 3 +- test/services/{loggers.jl => logger.jl} | 0 test/services/model_version.jl | 173 +++++++++++++++++++ 11 files changed, 364 insertions(+), 12 deletions(-) rename docs/src/reference/{loggers.md => logger.md} (100%) rename src/services/{loggers.jl => logger.jl} (100%) create mode 100644 src/services/model_version.jl rename test/services/{loggers.jl => logger.jl} (100%) create mode 100644 test/services/model_version.jl diff --git a/.gitignore b/.gitignore index f4f6dab..87a8c44 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ Manifest.toml /docs/build/ mlruns coverage +Pipfile +Pipfile.lock diff --git a/Project.toml b/Project.toml index 00d90f9..c59e57a 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] -HTTP = "1.9" +HTTP = "1.0" JSON = "0.21" ShowCases = "0.1" URIs = "1.0" diff --git a/docs/make.jl b/docs/make.jl index 48f7878..671eeae 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,7 +8,7 @@ makedocs(; pages=["Home" => "index.md", "Tutorial" => "tutorial.md", "Reference" => [ "Types" => "reference/types.md", "Artifact operations" => "reference/artifact.md", "Experiment operations" => "reference/experiment.md", - "Logging operations" => "reference/loggers.md", + "Logging operations" => "reference/logger.md", "Miscellaneous operations" => "reference/misc.md", "Run operations" => "reference/run.md", "Registered model operations" => "reference/registered_model.md"]]) diff --git a/docs/src/reference/loggers.md b/docs/src/reference/logger.md similarity index 100% rename from docs/src/reference/loggers.md rename to docs/src/reference/logger.md diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 7be85f6..d583477 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -57,7 +57,7 @@ include("services/run.jl") export getrun, createrun, deleterun, setruntag, updaterun, restorerun, searchruns, deleteruntag -include("services/loggers.jl") +include("services/logger.jl") export logbatch, loginputs, logmetric, logparam include("services/artifact.jl") @@ -70,4 +70,9 @@ include("services/registered_model.jl") export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, renameregisteredmodel, updateregisteredmodel +include("services/model_version.jl") +export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelversion, + updatemodelversion, searchmodelversions, getdownloaduriformodelversionartifacts, + transitionmodelversionstage + end diff --git a/src/services/loggers.jl b/src/services/logger.jl similarity index 100% rename from src/services/loggers.jl rename to src/services/logger.jl diff --git a/src/services/model_version.jl b/src/services/model_version.jl new file mode 100644 index 0000000..bad6d5f --- /dev/null +++ b/src/services/model_version.jl @@ -0,0 +1,171 @@ +""" + getlatestmodelversions(instance::MLFlow, name::String; + stages::Array{String}=String[]) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `stages:` List of stages. + +# Returns +Latest [`ModelVersion`](@ref) for each requests stage. +""" +function getlatestmodelversions(instance::MLFlow, name::String; + stages::Array{String}=String[])::Array{ModelVersion} +result = mlfpost(instance, "registered-models/get-latest-versions"; name=name, + stages=stages) + return result["model_versions"] .|> ModelVersion +end + +""" + createmodelversion(instance::MLFlow, name::String, source::String; + run_id::Union{String, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[], + run_link::Union{String, Missing}=missing, + description::Union{String, Missing}=missing) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Register model under this name. +- `source:` URI indicating the location of the model artifacts. +- `run_id`: [`Run`](@ref) id for correlation. +- `tags:` List of [`Tag`](@ref) to associate with the model version. +- `run_link:` Link to the [`Run`](@ref) that generated the [`ModelVersion`](@ref). +- `description:` Optional description for [`ModelVersion`](@ref). + +# Returns +[`ModelVersion`](@ref) created. +""" +function createmodelversion(instance::MLFlow, name::String, source::String; + run_id::Union{String, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[], + run_link::Union{String, Missing}=missing, + description::Union{String, Missing}=missing)::ModelVersion + result = mlfpost(instance, "model-versions/create"; name=name, source=source, + run_id=run_id, tags=parse(Tag, tags), run_link=run_link, description=description) + return result["model_version"] |> ModelVersion +end + +""" + getmodelversion(instance::MLFlow, name::String, version::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `version:` [`ModelVersion`](@ref) number. + +# Returns +[`ModelVersion`](@ref) requested. +""" +function getmodelversion(instance::MLFlow, name::String, version::String)::ModelVersion + result = mlfget(instance, "model-versions/get"; name=name, version=version) + return result["model_version"] |> ModelVersion +end + +""" + updatemodelversion(instance::MLFlow, name::String, version::String; + description::Union{String, Missing}=missing) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `version:` [`ModelVersion`](@ref) number. +- `description:` Optional description for [`ModelVersion`](@ref). + +# Returns +[`ModelVersion`](@ref) generated for this model in registry. +""" +function updatemodelversion(instance::MLFlow, name::String, version::String; + description::Union{String, Missing}=missing)::ModelVersion + result = mlfpatch(instance, "model-versions/update"; name=name, version=version, + description=description) + return result["model_version"] |> ModelVersion +end + +""" + deletemodelversion(instance::MLFlow, name::String, version::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `version:` [`ModelVersion`](@ref) number. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deletemodelversion(instance::MLFlow, name::String, version::String)::Bool + mlfdelete(instance, "model-versions/delete"; name=name, version=version) + return true +end + +""" + searchmodelversions(instance::MLFlow, filter::String, max_results::Int64, + order_by::String, page_token::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `filter`: String filter condition. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-modelversions). +- `max_results`: Maximum number of models desired. +- `order_by`: List of columns to be ordered by including model name, version, stage with an +optional “DESC” or “ASC” annotation, where “ASC” is the default. Tiebreaks are done by +latest stage transition timestamp, followed by name ASC, followed by version DESC. +- `page_token`: Pagination token to go to next page based on previous search query. + +# Returns +- Vector of [`ModelVersion`](@ref) that were found in the [`MLFlow`](@ref) instance. +- The next page token if there are more results. +""" +function searchmodelversions(instance::MLFlow; filter::String="", + max_results::Int64=200000, order_by::Array{String}=String[], + page_token::String="")::Tuple{Array{ModelVersion}, Union{String, Nothing}} + parameters = (; max_results, page_token, filter) + + if order_by |> !isempty + parameters = (; order_by, parameters...) + end + + result = mlfget(instance, "model-versions/search"; parameters...) + + model_versions = get(result, "model_versions", []) |> (x -> [ModelVersion(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return model_versions, next_page_token +end + +""" + getdownloaduriformodelversionartifacts(instance::MLFlow, name::String, version::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `version:` [`ModelVersion`](@ref) number. + +# Returns +URI corresponding to where artifacts for this [`ModelVersion`](@ref) are stored. +""" +function getdownloaduriformodelversionartifacts(instance::MLFlow, name::String, + version::String)::String + result = mlfget(instance, "model-versions/get-download-uri"; name=name, version=version) + return result["artifact_uri"] +end + +""" + transitionmodelversionstage(instance::MLFlow, name::String, version::String, + stage::String, archive_existing_versions::Bool) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `version:` [`ModelVersion`](@ref) number. +- `stage:` Transition [`ModelVersion`](@ref) to new stage. +- `archive_existing_versions:` When transitioning a model version to a particular stage, +this flag dictates whether all existing model versions in that stage should be atomically +moved to the “archived” stage. This ensures that at-most-one model version exists in the +target stage. + +# Returns +Updated [`ModelVersion`](@ref). +""" +function transitionmodelversionstage(instance::MLFlow, name::String, version::String, + stage::String, archive_existing_versions::Bool)::ModelVersion + result = mlfpost(instance, "model-versions/transition-stage"; name=name, + version=version, stage=stage, archive_existing_versions=archive_existing_versions) + return result["model_version"] |> ModelVersion +end diff --git a/src/types/model_version.jl b/src/types/model_version.jl index 2c572d8..d2993fa 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -7,7 +7,7 @@ - `creation_timestamp::Int64`: Timestamp recorded when this model_version was created. - `last_updated_timestamp::Int64`: Timestamp recorded when metadata for this model_version was last updated. -- `user_id::String`: User that created this model_version. +- `user_id::Union{String, Nothing}`: User that created this model_version. - `current_stage::String`: Current stage for this model_version. - `description::String`: Description of this model_version. - `source::String`: URI indicating the location of the source model artifacts, used when @@ -27,21 +27,21 @@ struct ModelVersion version::String creation_timestamp::Int64 last_updated_timestamp::Int64 - user_id::String + user_id::Union{String, Nothing} current_stage::String description::String source::String run_id::String status::ModelVersionStatus - status_message::String + status_message::Union{String, Nothing} tags::Array{Tag} run_link::String aliases::Array{String} end ModelVersion(data::Dict{String, Any}) = ModelVersion(data["name"], data["version"], - data["creation_timestamp"], data["last_updated_timestamp"], data["user_id"], - data["current_stage"], data["description"], data["source"], data["run_id"], - ModelVersionStatus(data["status"]), data["status_message"], - [Tag(tag) for tag in get(data, "tags", [])], data["run_link"], - get(data, "aliases", [])) + data["creation_timestamp"], data["last_updated_timestamp"], + get(data, "user_id", nothing), data["current_stage"], data["description"], + data["source"], data["run_id"], ModelVersionStatus(data["status"]), + get(data, "status_message", nothing), [Tag(tag) for tag in get(data, "tags", [])], + data["run_link"], get(data, "aliases", [])) Base.show(io::IO, t::ModelVersion) = show(io, ShowCase(t, new_lines=true)) diff --git a/test/runtests.jl b/test/runtests.jl index cc0d5fe..6e23148 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,7 +6,8 @@ include("base.jl") include("services/run.jl") include("services/misc.jl") -include("services/loggers.jl") +include("services/logger.jl") include("services/artifact.jl") include("services/experiment.jl") include("services/registered_model.jl") +include("services/model_version.jl") diff --git a/test/services/loggers.jl b/test/services/logger.jl similarity index 100% rename from test/services/loggers.jl rename to test/services/logger.jl diff --git a/test/services/model_version.jl b/test/services/model_version.jl new file mode 100644 index 0000000..de56fa5 --- /dev/null +++ b/test/services/model_version.jl @@ -0,0 +1,173 @@ +@testset verbose = true "get latest model versions" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + createmodelversion(mlf, "missy", run.info.artifact_uri) + createmodelversion(mlf, "missy", run.info.artifact_uri) + + model_versions = getlatestmodelversions(mlf, "missy") + + @test model_versions isa Array{ModelVersion} + @test length(model_versions) == 1 + @test (model_versions |> first).name == "missy" + @test (model_versions |> first).source == run.info.artifact_uri + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "create model version" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + @testset "base" begin + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + + @test model_version isa ModelVersion + @test model_version.name == "missy" + @test model_version.source == run.info.artifact_uri + end + + @testset "with all params" begin + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri; + run_id=run.info.run_id, tags=[Tag("test_key", "test_value")], + run_link="run.link", description="test_description") + end + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "get model version" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + retrieved_model_version = getmodelversion(mlf, "missy", model_version.version) + + @test retrieved_model_version isa ModelVersion + @test retrieved_model_version.name == model_version.name + @test retrieved_model_version.version == model_version.version + @test retrieved_model_version.source == model_version.source + @test retrieved_model_version.run_id == model_version.run_id + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "update model version" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + updated_model_version = updatemodelversion(mlf, "missy", model_version.version; + description="test_description") + + @test updated_model_version isa ModelVersion + @test updated_model_version.name == model_version.name + @test updated_model_version.version == model_version.version + @test updated_model_version.source == model_version.source + @test updated_model_version.run_id == model_version.run_id + @test updated_model_version.description != model_version.description + @test updated_model_version.description == "test_description" + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "delete model version" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + deletemodelversion(mlf, "missy", model_version.version) + + @test_throws ErrorException getmodelversion(mlf, "missy", model_version.version) + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "search model versions" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + createregisteredmodel(mlf, "gala") + + createmodelversion(mlf, "missy", run.info.artifact_uri) + createmodelversion(mlf, "gala", run.info.artifact_uri) + + @testset "default search" begin + model_versions, next_page_token = searchmodelversions(mlf) + + @test length(model_versions) == 2 # four because of the default experiment + @test next_page_token |> isnothing + end + + @testset "with pagination" begin + experiments, next_page_token = searchexperiments(mlf; max_results=1) + + @test length(experiments) == 1 + @test next_page_token |> !isnothing + @test next_page_token isa String + end + + deleteregisteredmodel(mlf, "missy") + deleteregisteredmodel(mlf, "gala") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "get download uri for model version artifacts" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + download_uri = getdownloaduriformodelversionartifacts(mlf, "missy", model_version.version) + + @test download_uri isa String + @test download_uri == run.info.artifact_uri + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "transition model version stage" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + updated_model_version = transitionmodelversionstage(mlf, "missy", model_version.version, "Production", true) + + @test updated_model_version isa ModelVersion + @test updated_model_version.name == model_version.name + @test updated_model_version.version == model_version.version + @test updated_model_version.source == model_version.source + @test updated_model_version.run_id == model_version.run_id + @test updated_model_version.current_stage == "Production" + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end From a81255b62afd41dd427bf01b9cb3882e8f66a0ca Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 06:21:27 -0500 Subject: [PATCH 22/31] Including model version documentation --- docs/make.jl | 3 ++- docs/src/reference/model_version.md | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 docs/src/reference/model_version.md diff --git a/docs/make.jl b/docs/make.jl index 671eeae..e6fcb8c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,6 +11,7 @@ makedocs(; "Logging operations" => "reference/logger.md", "Miscellaneous operations" => "reference/misc.md", "Run operations" => "reference/run.md", - "Registered model operations" => "reference/registered_model.md"]]) + "Registered model operations" => "reference/registered_model.md", + "Model version operations" => "reference/model_version.md",]]) deploydocs(; repo="github.com/JuliaAI/MLFlowClient.jl", devbranch="main") diff --git a/docs/src/reference/model_version.md b/docs/src/reference/model_version.md new file mode 100644 index 0000000..6ee14b9 --- /dev/null +++ b/docs/src/reference/model_version.md @@ -0,0 +1,12 @@ +# Model version operations + +```@docs +getlatestmodelversions +createmodelversion +getmodelversion +updatemodelversion +deletemodelversion +searchmodelversions +getdownloaduriformodelversionartifacts +transitionmodelversionstage +``` From 645866f3d18acf0a8fe9e72b6ab6ef5181d437c9 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 06:32:15 -0500 Subject: [PATCH 23/31] Implementing `searchregisteredmodels` --- docs/src/reference/registered_model.md | 1 + src/MLFlowClient.jl | 2 +- src/services/model_version.jl | 12 ++++----- src/services/registered_model.jl | 34 ++++++++++++++++++++++++++ test/services/registered_model.jl | 25 +++++++++++++++++++ 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/docs/src/reference/registered_model.md b/docs/src/reference/registered_model.md index f0a2739..307e008 100644 --- a/docs/src/reference/registered_model.md +++ b/docs/src/reference/registered_model.md @@ -5,4 +5,5 @@ getregisteredmodel renameregisteredmodel updateregisteredmodel deleteregisteredmodel +searchregisteredmodels ``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index d583477..cef94e0 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -68,7 +68,7 @@ export refresh, getmetrichistory include("services/registered_model.jl") export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, - renameregisteredmodel, updateregisteredmodel + renameregisteredmodel, updateregisteredmodel, searchregisteredmodels include("services/model_version.jl") export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelversion, diff --git a/src/services/model_version.jl b/src/services/model_version.jl index bad6d5f..e23cbb6 100644 --- a/src/services/model_version.jl +++ b/src/services/model_version.jl @@ -11,7 +11,7 @@ Latest [`ModelVersion`](@ref) for each requests stage. """ function getlatestmodelversions(instance::MLFlow, name::String; stages::Array{String}=String[])::Array{ModelVersion} -result = mlfpost(instance, "registered-models/get-latest-versions"; name=name, + result = mlfpost(instance, "registered-models/get-latest-versions"; name=name, stages=stages) return result["model_versions"] .|> ModelVersion end @@ -104,8 +104,8 @@ end - `filter`: String filter condition. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-modelversions). - `max_results`: Maximum number of models desired. - `order_by`: List of columns to be ordered by including model name, version, stage with an -optional “DESC” or “ASC” annotation, where “ASC” is the default. Tiebreaks are done by -latest stage transition timestamp, followed by name ASC, followed by version DESC. + optional “DESC” or “ASC” annotation, where “ASC” is the default. Tiebreaks are done by + latest stage transition timestamp, followed by name ASC, followed by version DESC. - `page_token`: Pagination token to go to next page based on previous search query. # Returns @@ -156,9 +156,9 @@ end - `version:` [`ModelVersion`](@ref) number. - `stage:` Transition [`ModelVersion`](@ref) to new stage. - `archive_existing_versions:` When transitioning a model version to a particular stage, -this flag dictates whether all existing model versions in that stage should be atomically -moved to the “archived” stage. This ensures that at-most-one model version exists in the -target stage. + this flag dictates whether all existing model versions in that stage should be atomically + moved to the “archived” stage. This ensures that at-most-one model version exists in the + target stage. # Returns Updated [`ModelVersion`](@ref). diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl index f8c1380..2a59cd3 100644 --- a/src/services/registered_model.jl +++ b/src/services/registered_model.jl @@ -89,3 +89,37 @@ function deleteregisteredmodel(instance::MLFlow, name::String)::Bool mlfdelete(instance, "registered-models/delete"; name=name) return true end + +""" + searchregisteredmodels(instance::MLFlow, filter::String, max_results::Int64, + order_by::String, page_token::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `filter`: String filter condition. See [MLFlow documentation](https://mlflow.org/docs/latest/rest-api.html#search-registeredmodels). +- `max_results`: Maximum number of models desired. +- `order_by`: List of columns for ordering search results, which can include model name + and last updated timestamp with an optional “DESC” or “ASC” annotation, where “ASC” is + the default. Tiebreaks are done by model name ASC. +- `page_token`: Pagination token to go to the next page based on a previous search query. + +# Returns +- Vector of [`RegisteredModel`](@ref) that were found in the [`MLFlow`](@ref) instance. +- The next page token if there are more results. +""" +function searchregisteredmodels(instance::MLFlow; filter::String="", + max_results::Int64=100, order_by::Array{String}=String[], + page_token::String="")::Tuple{Array{RegisteredModel}, Union{String, Nothing}} + parameters = (; max_results, page_token, filter) + + if order_by |> !isempty + parameters = (; order_by, parameters...) + end + + result = mlfget(instance, "registered-models/search"; parameters...) + + registered_models = get(result, "registered_models", []) |> (x -> [RegisteredModel(y) for y in x]) + next_page_token = get(result, "next_page_token", nothing) + + return registered_models, next_page_token +end diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl index 8028926..2a9814b 100644 --- a/test/services/registered_model.jl +++ b/test/services/registered_model.jl @@ -85,3 +85,28 @@ end @test_throws ErrorException getregisteredmodel(mlf, "missy") end + +@testset verbose = true "search registered models" begin + @ensuremlf + + createregisteredmodel(mlf, "missy"; description="gala") + createregisteredmodel(mlf, "gala"; description="missy") + + @testset "default search" begin + registered_models, next_page_token = searchregisteredmodels(mlf) + + @test length(registered_models) == 2 # four because of the default experiment + @test next_page_token |> isnothing + end + + @testset "with pagination" begin + registered_models, next_page_token = searchregisteredmodels(mlf; max_results=1) + + @test length(registered_models) == 1 + @test next_page_token |> !isnothing + @test next_page_token isa String + end + + deleteregisteredmodel(mlf, "missy") + deleteregisteredmodel(mlf, "gala") +end From 03aa48aed23713e6de4f0810933fb63e971b5ed9 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 07:16:14 -0500 Subject: [PATCH 24/31] Implementing tag operations for both `RegisteredModel` and `ModelVersion` --- docs/src/reference/model_version.md | 2 ++ docs/src/reference/registered_model.md | 2 ++ src/MLFlowClient.jl | 5 +-- src/services/model_version.jl | 38 ++++++++++++++++++++++ src/services/registered_model.jl | 33 +++++++++++++++++++ test/services/model_version.jl | 45 ++++++++++++++++++++++++-- test/services/registered_model.jl | 27 ++++++++++++++++ 7 files changed, 148 insertions(+), 4 deletions(-) diff --git a/docs/src/reference/model_version.md b/docs/src/reference/model_version.md index 6ee14b9..7ca454f 100644 --- a/docs/src/reference/model_version.md +++ b/docs/src/reference/model_version.md @@ -9,4 +9,6 @@ deletemodelversion searchmodelversions getdownloaduriformodelversionartifacts transitionmodelversionstage +setmodelversiontag +deletemodelversiontag ``` diff --git a/docs/src/reference/registered_model.md b/docs/src/reference/registered_model.md index 307e008..02e10f7 100644 --- a/docs/src/reference/registered_model.md +++ b/docs/src/reference/registered_model.md @@ -6,4 +6,6 @@ renameregisteredmodel updateregisteredmodel deleteregisteredmodel searchregisteredmodels +setregisteredmodeltag +deleteregisteredmodeltag ``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index cef94e0..f5a6a58 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -68,11 +68,12 @@ export refresh, getmetrichistory include("services/registered_model.jl") export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, - renameregisteredmodel, updateregisteredmodel, searchregisteredmodels + renameregisteredmodel, updateregisteredmodel, searchregisteredmodels, + setregisteredmodeltag, deleteregisteredmodeltag include("services/model_version.jl") export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelversion, updatemodelversion, searchmodelversions, getdownloaduriformodelversionartifacts, - transitionmodelversionstage + transitionmodelversionstage, setmodelversiontag, deletemodelversiontag end diff --git a/src/services/model_version.jl b/src/services/model_version.jl index e23cbb6..1599ff3 100644 --- a/src/services/model_version.jl +++ b/src/services/model_version.jl @@ -169,3 +169,41 @@ function transitionmodelversionstage(instance::MLFlow, name::String, version::St version=version, stage=stage, archive_existing_versions=archive_existing_versions) return result["model_version"] |> ModelVersion end + +""" + setmodelversiontag(instance::MLFlow, name::String, key::String, value::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Unique name of the model. +- `version:` Model version number. +- `key:` Name of the [`Tag`](@ref). +- `value:` String value of the tag being logged. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function setmodelversiontag(instance::MLFlow, name::String, version::String, key::String, + value::String)::Bool + mlfpost(instance, "model-versions/set-tag"; name=name, version=version, key=key, + value=value) + return true +end + +""" + deletemodelversiontag(instance::MLFlow, name::String, version::String, key::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref) that the tag was logged under. +- `version:` [`ModelVersion`](@ref) number that the tag was logged under. +- `key:` Name of the [`Tag`](@ref). + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deletemodelversiontag(instance::MLFlow, name::String, version::String, + key::String)::Bool + mlfdelete(instance, "model-versions/delete-tag"; name=name, version=version, key=key) + return true +end diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl index 2a59cd3..9b50e90 100644 --- a/src/services/registered_model.jl +++ b/src/services/registered_model.jl @@ -123,3 +123,36 @@ function searchregisteredmodels(instance::MLFlow; filter::String="", return registered_models, next_page_token end + +""" + setregisteredmodeltag(instance::MLFlow, name::String, key::String, value::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Unique name of the model. +- `key:` Name of the [`Tag`](@ref). +- `value:` String value of the [`Tag`](@ref) being logged. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function setregisteredmodeltag(instance::MLFlow, name::String, key::String, value::String)::Bool + mlfpost(instance, "registered-models/set-tag"; name=name, key=key, value=value) + return true +end + +""" + deleteregisteredmodeltag(instance::MLFlow, name::String, key::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref) that the tag was logged under. +- `key:` Name of the [`Tag`](@ref). + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteregisteredmodeltag(instance::MLFlow, name::String, key::String)::Bool + mlfdelete(instance, "registered-models/delete-tag"; name=name, key=key) + return true +end diff --git a/test/services/model_version.jl b/test/services/model_version.jl index de56fa5..cd49bdf 100644 --- a/test/services/model_version.jl +++ b/test/services/model_version.jl @@ -142,7 +142,8 @@ end createregisteredmodel(mlf, "missy") model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) - download_uri = getdownloaduriformodelversionartifacts(mlf, "missy", model_version.version) + download_uri = getdownloaduriformodelversionartifacts(mlf, model_version.name, + model_version.version) @test download_uri isa String @test download_uri == run.info.artifact_uri @@ -159,7 +160,8 @@ end createregisteredmodel(mlf, "missy") model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) - updated_model_version = transitionmodelversionstage(mlf, "missy", model_version.version, "Production", true) + updated_model_version = transitionmodelversionstage(mlf, model_version.name, + model_version.version, "Production", true) @test updated_model_version isa ModelVersion @test updated_model_version.name == model_version.name @@ -171,3 +173,42 @@ end deleteregisteredmodel(mlf, "missy") deleteexperiment(mlf, experiment) end + +@testset verbose = true "set model version tag" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + setmodelversiontag(mlf, model_version.name, model_version.version, "test_key", + "test_value") + retrieved_model_version = getmodelversion(mlf, "missy", model_version.version) + + @test retrieved_model_version.tags |> length == 1 + @test (retrieved_model_version.tags |> first).key == "test_key" + @test (retrieved_model_version.tags |> first).value == "test_value" + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "delete model version tag" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + setmodelversiontag(mlf, model_version.name, model_version.version, "test_key", + "test_value") + deletemodelversiontag(mlf, model_version.name, model_version.version, "test_key") + retrieved_model_version = getmodelversion(mlf, "missy", model_version.version) + + @test isempty(retrieved_model_version.tags) + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl index 2a9814b..0521e0b 100644 --- a/test/services/registered_model.jl +++ b/test/services/registered_model.jl @@ -110,3 +110,30 @@ end deleteregisteredmodel(mlf, "missy") deleteregisteredmodel(mlf, "gala") end + +@testset verbose = true "set registered model tag" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + setregisteredmodeltag(mlf, registered_model.name, "test_key", "test_value") + + retrieved_registered_model = getregisteredmodel(mlf, registered_model.name) + @test retrieved_registered_model.tags |> !isempty + @test (retrieved_registered_model.tags |> first).key == "test_key" + @test (retrieved_registered_model.tags |> first).value == "test_value" + + deleteregisteredmodel(mlf, "missy") +end + +@testset verbose = true "delete registered model tag" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + setregisteredmodeltag(mlf, registered_model.name, "test_key", "test_value") + deleteregisteredmodeltag(mlf, registered_model.name, "test_key") + + retrieved_registered_model = getregisteredmodel(mlf, registered_model.name) + @test retrieved_registered_model.tags |> isempty + + deleteregisteredmodel(mlf, "missy") +end From 7067af050ba8488520988e874d54a525da0b8eb0 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 08:52:26 -0500 Subject: [PATCH 25/31] Implementing alias operations for `RegisteredModel` and `ModelVersion` --- docs/src/reference/model_version.md | 1 + docs/src/reference/registered_model.md | 2 ++ src/MLFlowClient.jl | 6 ++-- src/services/model_version.jl | 17 ++++++++++ src/services/registered_model.jl | 34 +++++++++++++++++++ test/services/model_version.jl | 19 +++++++++++ test/services/registered_model.jl | 45 ++++++++++++++++++++++++-- 7 files changed, 119 insertions(+), 5 deletions(-) diff --git a/docs/src/reference/model_version.md b/docs/src/reference/model_version.md index 7ca454f..3f6e4f7 100644 --- a/docs/src/reference/model_version.md +++ b/docs/src/reference/model_version.md @@ -11,4 +11,5 @@ getdownloaduriformodelversionartifacts transitionmodelversionstage setmodelversiontag deletemodelversiontag +getmodelversionbyalias ``` diff --git a/docs/src/reference/registered_model.md b/docs/src/reference/registered_model.md index 02e10f7..4d93da6 100644 --- a/docs/src/reference/registered_model.md +++ b/docs/src/reference/registered_model.md @@ -8,4 +8,6 @@ deleteregisteredmodel searchregisteredmodels setregisteredmodeltag deleteregisteredmodeltag +deleteregisteredmodelalias +setregisteredmodelalias ``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index f5a6a58..8eccae3 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -69,11 +69,13 @@ export refresh, getmetrichistory include("services/registered_model.jl") export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, renameregisteredmodel, updateregisteredmodel, searchregisteredmodels, - setregisteredmodeltag, deleteregisteredmodeltag + setregisteredmodeltag, deleteregisteredmodeltag, deleteregisteredmodelalias, + setregisteredmodelalias include("services/model_version.jl") export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelversion, updatemodelversion, searchmodelversions, getdownloaduriformodelversionartifacts, - transitionmodelversionstage, setmodelversiontag, deletemodelversiontag + transitionmodelversionstage, setmodelversiontag, deletemodelversiontag, + getmodelversionbyalias end diff --git a/src/services/model_version.jl b/src/services/model_version.jl index 1599ff3..9ff8ba4 100644 --- a/src/services/model_version.jl +++ b/src/services/model_version.jl @@ -207,3 +207,20 @@ function deletemodelversiontag(instance::MLFlow, name::String, version::String, mlfdelete(instance, "model-versions/delete-tag"; name=name, version=version, key=key) return true end + +""" + getmodelversionbyalias(instance::MLFlow, name::String, alias::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `alias:` Name of the alias. + +# Returns +[`ModelVersion`](@ref) requested. +""" +function getmodelversionbyalias(instance::MLFlow, name::String, + alias::String)::ModelVersion + result = mlfget(instance, "registered-models/alias"; name=name, alias=alias) + return result["model_version"] |> ModelVersion +end diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl index 9b50e90..d547d74 100644 --- a/src/services/registered_model.jl +++ b/src/services/registered_model.jl @@ -156,3 +156,37 @@ function deleteregisteredmodeltag(instance::MLFlow, name::String, key::String):: mlfdelete(instance, "registered-models/delete-tag"; name=name, key=key) return true end + +""" + deleteregisteredmodelalias(instance::MLFlow, name::String, alias::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `alias:` Name of the alias. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteregisteredmodelalias(instance::MLFlow, name::String, alias::String)::Bool + mlfdelete(instance, "registered-models/alias"; name=name, alias=alias) + return true +end + +""" + setregisteredmodelalias(instance::MLFlow, name::String, alias::String, version::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` Name of the [`RegisteredModel`](@ref). +- `alias:` Name of the alias. +- `version:` [`ModelVersion`](@ref) number. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function setregisteredmodelalias(instance::MLFlow, name::String, alias::String, + version::String)::Bool + mlfpost(instance, "registered-models/alias"; name=name, alias=alias, version=version) + return true +end diff --git a/test/services/model_version.jl b/test/services/model_version.jl index cd49bdf..fbf9a20 100644 --- a/test/services/model_version.jl +++ b/test/services/model_version.jl @@ -212,3 +212,22 @@ end deleteregisteredmodel(mlf, "missy") deleteexperiment(mlf, experiment) end + +@testset verbose = true "get model version by alias" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + registered_model = createregisteredmodel(mlf, "missy") + + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + setregisteredmodelalias(mlf, registered_model.name, "gala", model_version.version) + + retrieved_model_version = getmodelversionbyalias(mlf, "missy", "gala") + + @assert retrieved_model_version.aliases |> !isempty + @assert (retrieved_model_version.aliases |> first) == "gala" + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl index 0521e0b..9b03163 100644 --- a/test/services/registered_model.jl +++ b/test/services/registered_model.jl @@ -54,13 +54,13 @@ end @ensuremlf registered_model = createregisteredmodel(mlf, "missy"; description="gala") - renamed_registered_model = renameregisteredmodel(mlf, registered_model.name, "mister") + renamed_registered_model = renameregisteredmodel(mlf, registered_model.name, "gala") @test renamed_registered_model isa RegisteredModel - @test renamed_registered_model.name == "mister" + @test renamed_registered_model.name == "gala" @test renamed_registered_model.description == registered_model.description - deleteregisteredmodel(mlf, "mister") + deleteregisteredmodel(mlf, "gala") end @testset verbose = true "update registered model" begin @@ -137,3 +137,42 @@ end deleteregisteredmodel(mlf, "missy") end + +@testset verbose = true "delete registered model alias" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + + setregisteredmodelalias(mlf, registered_model.name, "gala", model_version.version) + deleteregisteredmodelalias(mlf, registered_model.name, "gala") + + retrieved_registered_model = getregisteredmodel(mlf, registered_model.name) + @test retrieved_registered_model.aliases |> isempty + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end + +@testset verbose = true "delete registered model alias" begin + @ensuremlf + + experiment = createexperiment(mlf, UUIDs.uuid4() |> string) + run = createrun(mlf, experiment) + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + model_version = createmodelversion(mlf, "missy", run.info.artifact_uri) + + setregisteredmodelalias(mlf, registered_model.name, "gala", model_version.version) + setregisteredmodelalias(mlf, registered_model.name, "missy", model_version.version) + + retrieved_registered_model = getregisteredmodel(mlf, registered_model.name) + @test retrieved_registered_model.aliases |> !isempty + @test length(retrieved_registered_model.aliases) == 2 + + deleteregisteredmodel(mlf, "missy") + deleteexperiment(mlf, experiment) +end From ff8bd99e1793b3c0d1a068cd3163e9655629087b Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 08:57:50 -0500 Subject: [PATCH 26/31] Applying formatting from Julia LSP --- Project.toml | 1 - docs/make.jl | 2 +- src/api.jl | 2 +- src/services/artifact.jl | 2 +- src/services/experiment.jl | 4 ++-- src/services/logger.jl | 4 ++-- src/services/misc.jl | 12 +++++------ src/services/model_version.jl | 10 ++++----- src/services/registered_model.jl | 6 +++--- src/services/run.jl | 19 +++++++++-------- src/types/dataset.jl | 8 ++++---- src/types/enums.jl | 26 +++++++++++------------ src/types/experiment.jl | 2 +- src/types/mlflow.jl | 2 +- src/types/model_version.jl | 6 +++--- src/types/registered_model.jl | 8 ++++---- src/types/run.jl | 16 +++++++-------- src/types/tag.jl | 2 +- src/utils.jl | 34 +++++++++++++++---------------- test/services/experiment.jl | 2 +- test/services/logger.jl | 26 +++++++++++------------ test/services/registered_model.jl | 2 +- 22 files changed, 98 insertions(+), 98 deletions(-) diff --git a/Project.toml b/Project.toml index c59e57a..2456be1 100644 --- a/Project.toml +++ b/Project.toml @@ -23,4 +23,3 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test"] - diff --git a/docs/make.jl b/docs/make.jl index e6fcb8c..737fdcd 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -push!(LOAD_PATH,"../src/") +push!(LOAD_PATH, "../src/") using Documenter using MLFlowClient diff --git a/src/api.jl b/src/api.jl index 44236bf..13763be 100644 --- a/src/api.jl +++ b/src/api.jl @@ -9,7 +9,7 @@ MLFlowClient.uri(mlf, "experiments/get", Dict(:experiment_id=>10)) ``` """ uri(mlf::MLFlow, endpoint::String; - parameters::Dict{Symbol, <:Any}=Dict{Symbol, NumberOrString}()) = + parameters::Dict{Symbol,<:Any}=Dict{Symbol,NumberOrString}()) = URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow/$(endpoint)"; query=parameters) """ diff --git a/src/services/artifact.jl b/src/services/artifact.jl index 5a73613..5c8c7de 100644 --- a/src/services/artifact.jl +++ b/src/services/artifact.jl @@ -17,7 +17,7 @@ List artifacts for a [`Run`](@ref). - Token that can be used to retrieve the next page of artifact results. """ function listartifacts(instance::MLFlow, run_id::String; path::String="", - page_token::String="")::Tuple{String, Array{FileInfo}, Union{String, Nothing}} + page_token::String="")::Tuple{String,Array{FileInfo},Union{String,Nothing}} result = mlfget(instance, "artifacts/list"; run_id=run_id, path=path, page_token=page_token) diff --git a/src/services/experiment.jl b/src/services/experiment.jl index b0e99a3..766e77c 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -19,7 +19,7 @@ fails if another [`Experiment`](@ref) with the same name already exists. The ID of the newly created [`Experiment`](@ref). """ function createexperiment(instance::MLFlow, name::String; - artifact_location::Union{String, Missing}=missing, + artifact_location::Union{String,Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::String result = mlfpost(instance, "experiments/create"; name=name, artifact_location=artifact_location, tags=parse(Tag, tags)) @@ -164,7 +164,7 @@ updateexperiment(instance::MLFlow, experiment::Experiment, new_name::String)::Bo """ function searchexperiments(instance::MLFlow; max_results::Int64=20000, page_token::String="", filter::String="", order_by::Array{String}=String[], - view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment}, Union{String, Nothing}} + view_type::ViewType=ACTIVE_ONLY)::Tuple{Array{Experiment},Union{String,Nothing}} parameters = (; max_results, page_token, filter, :view_type => view_type |> Integer) if order_by |> !isempty diff --git a/src/services/logger.jl b/src/services/logger.jl index ea1f57a..43c2a39 100644 --- a/src/services/logger.jl +++ b/src/services/logger.jl @@ -23,14 +23,14 @@ represent ML model accuracy. A [`Metric`](@ref) can be logged multiple times. """ function logmetric(instance::MLFlow, run_id::String, key::String, value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), - step::Union{Int64, Missing}=missing)::Bool + step::Union{Int64,Missing}=missing)::Bool mlfpost(instance, "runs/log-metric"; run_id=run_id, key=key, value=value, timestamp=timestamp, step=step) return true end logmetric(instance::MLFlow, run::Run, key::String, value::Float64; timestamp::Int64=round(Int, now() |> datetime2unix), - step::Union{Int64, Missing}=missing)::Bool = + step::Union{Int64,Missing}=missing)::Bool = logmetric(instance, run.info.run_id, key, value; timestamp=timestamp, step=step) logmetric(instance::MLFlow, run_id::String, metric::Metric)::Bool = logmetric(instance, run_id, metric.key, metric.value, timestamp=metric.timestamp, diff --git a/src/services/misc.jl b/src/services/misc.jl index 35f7a11..c7a023a 100644 --- a/src/services/misc.jl +++ b/src/services/misc.jl @@ -18,8 +18,8 @@ Get a list of all values for the specified [`Metric`](@ref) for a given [`Run`]( - The next page token if there are more results. """ function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; - page_token::String="", max_results::Union{Int64, Missing}=missing -)::Tuple{Array{Metric}, Union{String, Nothing}} + page_token::String="", max_results::Union{Int64,Missing}=missing +)::Tuple{Array{Metric},Union{String,Nothing}} result = mlfget(instance, "metrics/get-history"; run_id=run_id, metric_key=metric_key, page_token=page_token, max_results=(ismissing(max_results) ? max_results : (max_results |> Int32))) @@ -30,13 +30,13 @@ function getmetrichistory(instance::MLFlow, run_id::String, metric_key::String; return metrics, next_page_token end getmetrichistory(instance::MLFlow, run::Run, metric_key::String; page_token::String="", - max_results::Union{Int64, Missing}=missing -)::Tuple{Array{Metric}, Union{String, Nothing}} = + max_results::Union{Int64,Missing}=missing +)::Tuple{Array{Metric},Union{String,Nothing}} = getmetrichistory(instance, run.info.run_id, metric_key; page_token=page_token, max_results=max_results) getmetrichistory(instance::MLFlow, run::Run, metric::Metric; page_token::String="", - max_results::Union{Int64, Missing}=missing -)::Tuple{Array{Metric}, Union{String, Nothing}} = + max_results::Union{Int64,Missing}=missing +)::Tuple{Array{Metric},Union{String,Nothing}} = getmetrichistory(instance, run.info.run_id, metric.key; page_token=page_token, max_results=max_results) diff --git a/src/services/model_version.jl b/src/services/model_version.jl index 9ff8ba4..2bf0ff0 100644 --- a/src/services/model_version.jl +++ b/src/services/model_version.jl @@ -35,9 +35,9 @@ end [`ModelVersion`](@ref) created. """ function createmodelversion(instance::MLFlow, name::String, source::String; - run_id::Union{String, Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[], - run_link::Union{String, Missing}=missing, - description::Union{String, Missing}=missing)::ModelVersion + run_id::Union{String,Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[], + run_link::Union{String,Missing}=missing, + description::Union{String,Missing}=missing)::ModelVersion result = mlfpost(instance, "model-versions/create"; name=name, source=source, run_id=run_id, tags=parse(Tag, tags), run_link=run_link, description=description) return result["model_version"] |> ModelVersion @@ -73,7 +73,7 @@ end [`ModelVersion`](@ref) generated for this model in registry. """ function updatemodelversion(instance::MLFlow, name::String, version::String; - description::Union{String, Missing}=missing)::ModelVersion + description::Union{String,Missing}=missing)::ModelVersion result = mlfpatch(instance, "model-versions/update"; name=name, version=version, description=description) return result["model_version"] |> ModelVersion @@ -114,7 +114,7 @@ end """ function searchmodelversions(instance::MLFlow; filter::String="", max_results::Int64=200000, order_by::Array{String}=String[], - page_token::String="")::Tuple{Array{ModelVersion}, Union{String, Nothing}} + page_token::String="")::Tuple{Array{ModelVersion},Union{String,Nothing}} parameters = (; max_results, page_token, filter) if order_by |> !isempty diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl index d547d74..f178a85 100644 --- a/src/services/registered_model.jl +++ b/src/services/registered_model.jl @@ -18,7 +18,7 @@ An instance of type [`RegisteredModel`](@ref). """ function createregisteredmodel(instance::MLFlow, name::String; tags::MLFlowUpsertData{Tag}=Tag[], - description::Union{String, Missing}=missing)::RegisteredModel + description::Union{String,Missing}=missing)::RegisteredModel result = mlfpost(instance, "registered-models/create"; name=name, tags=parse(Tag, tags), description=description) return result["registered_model"] |> RegisteredModel @@ -69,7 +69,7 @@ end An instance of type [`RegisteredModel`](@ref). """ function updateregisteredmodel(instance::MLFlow, name::String; - description::Union{String, Missing}=missing)::RegisteredModel + description::Union{String,Missing}=missing)::RegisteredModel result = mlfpatch(instance, "registered-models/update"; name=name, description=description) return result["registered_model"] |> RegisteredModel @@ -109,7 +109,7 @@ end """ function searchregisteredmodels(instance::MLFlow; filter::String="", max_results::Int64=100, order_by::Array{String}=String[], - page_token::String="")::Tuple{Array{RegisteredModel}, Union{String, Nothing}} + page_token::String="")::Tuple{Array{RegisteredModel},Union{String,Nothing}} parameters = (; max_results, page_token, filter) if order_by |> !isempty diff --git a/src/services/run.jl b/src/services/run.jl index caaef6e..c2d5b5c 100644 --- a/src/services/run.jl +++ b/src/services/run.jl @@ -18,19 +18,19 @@ single execution of a machine learning or data ETL pipeline. An instance of type [`Run`](@ref). """ function createrun(instance::MLFlow, experiment_id::String; - run_name::Union{String, Missing}=missing, start_time::Union{Int64, Missing}=missing, + run_name::Union{String,Missing}=missing, start_time::Union{Int64,Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run result = mlfpost(instance, "runs/create"; experiment_id=experiment_id, run_name=run_name, start_time=start_time, tags=parse(Tag, tags)) return result["run"] |> Run end createrun(instance::MLFlow, experiment_id::Integer; - run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, + run_name::Union{String,Missing}=missing, start_time::Union{Integer,Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run = createrun(instance, string(experiment_id); run_name=run_name, start_time=start_time, tags=tags) createrun(instance::MLFlow, experiment::Experiment; - run_name::Union{String, Missing}=missing, start_time::Union{Integer, Missing}=missing, + run_name::Union{String,Missing}=missing, start_time::Union{Integer,Missing}=missing, tags::MLFlowUpsertData{Tag}=Tag[])::Run = createrun(instance, string(experiment.experiment_id); run_name=run_name, start_time=start_time, tags=tags) @@ -111,7 +111,8 @@ Set a [`Tag`](@ref) on a [`Run`](@ref). # Returns `true` if successful. Otherwise, raises exception. """ -function setruntag(instance::MLFlow, run_id::String, key::String, value::String):Bool +function setruntag(instance::MLFlow, run_id::String, key::String, value::String) + :Bool mlfpost(instance, "runs/set-tag"; run_id=run_id, key=key, value=value) return true end @@ -171,7 +172,7 @@ Search for runs that satisfy expressions. Search expressions can use [`Metric`]( function searchruns(instance::MLFlow; experiment_ids::Array{String}=String[], filter::String="", run_view_type::ViewType=ACTIVE_ONLY, max_results::Int=1000, order_by::Array{String}=String[], - page_token::String="")::Tuple{Array{Run}, Union{String, Nothing}} + page_token::String="")::Tuple{Array{Run},Union{String,Nothing}} parameters = (; experiment_ids, filter, :run_view_type => run_view_type |> Integer, max_results, page_token) @@ -206,13 +207,13 @@ Update [`Run`](@ref) metadata. - An instance of type [`RunInfo`](@ref) with the updated metadata. """ function updaterun(instance::MLFlow, run_id::String; - status::Union{RunStatus, Missing}=missing, end_time::Union{Int64, Missing}=missing, - run_name::Union{String, Missing})::RunInfo + status::Union{RunStatus,Missing}=missing, end_time::Union{Int64,Missing}=missing, + run_name::Union{String,Missing})::RunInfo result = mlfpost(instance, "runs/update"; run_id=run_id, status=(status |> Integer), end_time=end_time, run_name=run_name) return result["run_info"] |> RunInfo end -updaterun(instance::MLFlow, run::Run; status::Union{RunStatus, Missing}=missing, - end_time::Union{Int64, Missing}=missing, run_name::Union{String, Missing})::RunInfo = +updaterun(instance::MLFlow, run::Run; status::Union{RunStatus,Missing}=missing, + end_time::Union{Int64,Missing}=missing, run_name::Union{String,Missing})::RunInfo = updaterun(instance, run.info.run_id; status=status, end_time=end_time, run_name=run_name) diff --git a/src/types/dataset.jl b/src/types/dataset.jl index 011fb33..5ff967a 100644 --- a/src/types/dataset.jl +++ b/src/types/dataset.jl @@ -17,10 +17,10 @@ struct Dataset digest::String source_type::String source::String - schema::Union{String, Nothing} - profile::Union{String, Nothing} + schema::Union{String,Nothing} + profile::Union{String,Nothing} end -Dataset(data::Dict{String, Any}) = Dataset(data["name"], data["digest"], +Dataset(data::Dict{String,Any}) = Dataset(data["name"], data["digest"], data["source_type"], data["source"], get(data, "schema", nothing), get(data, "profile", nothing)) Base.show(io::IO, t::Dataset) = show(io, ShowCase(t, new_lines=true)) @@ -38,6 +38,6 @@ struct DatasetInput tags::Array{Tag} dataset::Dataset end -DatasetInput(data::Dict{String, Any}) = DatasetInput( +DatasetInput(data::Dict{String,Any}) = DatasetInput( [Tag(tag) for tag in get(data, "tags", [])], Dataset(data["dataset"])) Base.show(io::IO, t::DatasetInput) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/enums.jl b/src/types/enums.jl index 70cb598..1ad1dcd 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -8,11 +8,11 @@ - `READY`: Model version is ready for use. """ @enum ModelVersionStatus begin - PENDING_REGISTRATION=1 - FAILED_REGISTRATION=2 - READY=3 + PENDING_REGISTRATION = 1 + FAILED_REGISTRATION = 2 + READY = 3 end -ModelVersionStatus(status::String) = Dict(value => key for (key, value) in ModelVersionStatus |> Base.Enums.namemap)[status |> Symbol] |> ModelVersionStatus +ModelVersionStatus(status::String) = Dict(value => key for (key, value) in ModelVersionStatus |> Base.Enums.namemap)[status|>Symbol] |> ModelVersionStatus """ RunStatus @@ -27,13 +27,13 @@ Status of a run. - `KILLED`: Run killed by user. """ @enum RunStatus begin - RUNNING=1 - SCHEDULED=2 - FINISHED=3 - FAILED=4 - KILLED=5 + RUNNING = 1 + SCHEDULED = 2 + FINISHED = 3 + FAILED = 4 + KILLED = 5 end -RunStatus(status::String) = Dict(value => key for (key, value) in RunStatus |> Base.Enums.namemap)[status |> Symbol] |> RunStatus +RunStatus(status::String) = Dict(value => key for (key, value) in RunStatus |> Base.Enums.namemap)[status|>Symbol] |> RunStatus """ ViewType @@ -46,7 +46,7 @@ View type for ListExperiments query. - `ALL`: Get all experiments. """ @enum ViewType begin - ACTIVE_ONLY=1 - DELETED_ONLY=2 - ALL=3 + ACTIVE_ONLY = 1 + DELETED_ONLY = 2 + ALL = 3 end diff --git a/src/types/experiment.jl b/src/types/experiment.jl index 105a981..b480122 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -20,7 +20,7 @@ struct Experiment creation_time::Int64 tags::Array{Tag} end -Experiment(data::Dict{String, Any}) = Experiment(data["experiment_id"], data["name"], +Experiment(data::Dict{String,Any}) = Experiment(data["experiment_id"], data["name"], data["artifact_location"], data["lifecycle_stage"], data["last_update_time"], data["creation_time"], [Tag(tag) for tag in get(data, "tags", [])]) Base.show(io::IO, t::Experiment) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/mlflow.jl b/src/types/mlflow.jl index ff0a282..111232e 100644 --- a/src/types/mlflow.jl +++ b/src/types/mlflow.jl @@ -32,6 +32,6 @@ MLFlow(; apiroot="http://localhost:5000/api", apiversion=2.0, headers=Dict()) = ENV["MLFLOW_TRACKING_URI"] : apiroot), apiversion, headers) Base.show(io::IO, t::MLFlow) = - show(io, ShowCase(t, [:apiroot,:apiversion], new_lines=true)) + show(io, ShowCase(t, [:apiroot, :apiversion], new_lines=true)) abstract type LoggingData end diff --git a/src/types/model_version.jl b/src/types/model_version.jl index d2993fa..a0542c7 100644 --- a/src/types/model_version.jl +++ b/src/types/model_version.jl @@ -27,18 +27,18 @@ struct ModelVersion version::String creation_timestamp::Int64 last_updated_timestamp::Int64 - user_id::Union{String, Nothing} + user_id::Union{String,Nothing} current_stage::String description::String source::String run_id::String status::ModelVersionStatus - status_message::Union{String, Nothing} + status_message::Union{String,Nothing} tags::Array{Tag} run_link::String aliases::Array{String} end -ModelVersion(data::Dict{String, Any}) = ModelVersion(data["name"], data["version"], +ModelVersion(data::Dict{String,Any}) = ModelVersion(data["name"], data["version"], data["creation_timestamp"], data["last_updated_timestamp"], get(data, "user_id", nothing), data["current_stage"], data["description"], data["source"], data["run_id"], ModelVersionStatus(data["status"]), diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index 0d909c4..71da3fa 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -11,7 +11,7 @@ struct RegisteredModelAlias alias::String version::String end -RegisteredModelAlias(data::Dict{String, Any}) = RegisteredModelAlias(data["alias"], +RegisteredModelAlias(data::Dict{String,Any}) = RegisteredModelAlias(data["alias"], data["version"]) Base.show(io::IO, t::RegisteredModelAlias) = show(io, ShowCase(t, new_lines=true)) @@ -35,13 +35,13 @@ struct RegisteredModel name::String creation_timestamp::Int64 last_updated_timestamp::Int64 - user_id::Union{String, Nothing} - description::Union{String, Nothing} + user_id::Union{String,Nothing} + description::Union{String,Nothing} latest_versions::Array{ModelVersion} tags::Array{Tag} aliases::Array{RegisteredModelAlias} end -RegisteredModel(data::Dict{String, Any}) = RegisteredModel(data["name"], +RegisteredModel(data::Dict{String,Any}) = RegisteredModel(data["name"], data["creation_timestamp"], data["last_updated_timestamp"], get(data, "user_id", nothing), get(data, "description", nothing), [ModelVersion(version) for version in get(data, "latest_versions", [])], diff --git a/src/types/run.jl b/src/types/run.jl index 4b0e94c..92ff484 100644 --- a/src/types/run.jl +++ b/src/types/run.jl @@ -13,9 +13,9 @@ struct Metric <: LoggingData key::String value::Float64 timestamp::Int64 - step::Union{Int64, Nothing} + step::Union{Int64,Nothing} end -Metric(data::Dict{String, Any}) = Metric(data["key"], data["value"], data["timestamp"], +Metric(data::Dict{String,Any}) = Metric(data["key"], data["value"], data["timestamp"], data["step"]) Base.show(io::IO, t::Metric) = show(io, ShowCase(t, new_lines=true)) @@ -32,7 +32,7 @@ struct Param <: LoggingData key::String value::String end -Param(data::Dict{String, Any}) = Param(data["key"], data["value"]) +Param(data::Dict{String,Any}) = Param(data["key"], data["value"]) Base.show(io::IO, t::Param) = show(io, ShowCase(t, new_lines=true)) """ @@ -60,11 +60,11 @@ struct RunInfo experiment_id::String status::RunStatus start_time::Int64 - end_time::Union{Int64, Nothing} + end_time::Union{Int64,Nothing} artifact_uri::String lifecycle_stage::String end -RunInfo(data::Dict{String, Any}) = RunInfo(data["run_id"], data["run_name"], +RunInfo(data::Dict{String,Any}) = RunInfo(data["run_id"], data["run_name"], data["experiment_id"], RunStatus(data["status"]), data["start_time"], get(data, "end_time", nothing), data["artifact_uri"], data["lifecycle_stage"]) Base.show(io::IO, t::RunInfo) = show(io, ShowCase(t, new_lines=true)) @@ -84,7 +84,7 @@ struct RunData params::Array{Param} tags::Array{Tag} end -RunData(data::Dict{String, Any}) = RunData( +RunData(data::Dict{String,Any}) = RunData( [Metric(metric) for metric in get(data, "metrics", [])], [Param(param) for param in get(data, "params", [])], [Tag(tag) for tag in get(data, "tags", [])]) @@ -101,7 +101,7 @@ Run inputs. struct RunInputs dataset_inputs::Array{DatasetInput} end -RunInputs(data::Dict{String, Any}) = RunInputs( +RunInputs(data::Dict{String,Any}) = RunInputs( [DatasetInput(dataset_input) for dataset_input in get(data, "dataset_inputs", [])]) Base.show(io::IO, t::RunInputs) = show(io, ShowCase(t, new_lines=true)) @@ -120,6 +120,6 @@ struct Run data::RunData inputs::RunInputs end -Run(data::Dict{String, Any}) = Run(RunInfo(data["info"]), RunData(data["data"]), +Run(data::Dict{String,Any}) = Run(RunInfo(data["info"]), RunData(data["data"]), RunInputs(data["inputs"])) Base.show(io::IO, t::Run) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/tag.jl b/src/types/tag.jl index 5dffe63..5eae9af 100644 --- a/src/types/tag.jl +++ b/src/types/tag.jl @@ -11,5 +11,5 @@ struct Tag <: LoggingData key::String value::String end -Tag(data::Dict{String, Any})::Tag = Tag(data["key"], data["value"] |> string) +Tag(data::Dict{String,Any})::Tag = Tag(data["key"], data["value"] |> string) Base.show(io::IO, t::Tag) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/utils.jl b/src/utils.jl index c6993a7..7543b4c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,13 +1,13 @@ -const NumberOrString = Union{Number, String} -const MLFlowUpsertData{T} = Union{Array{T}, Array{<:Dict{String, <:Any}}, - Dict{String, <:NumberOrString}, Array{<:Pair{String, <:NumberOrString}}, - Array{<:Tuple{String, <:NumberOrString}}} +const NumberOrString = Union{Number,String} +const MLFlowUpsertData{T} = Union{Array{T},Array{<:Dict{String,<:Any}}, + Dict{String,<:NumberOrString},Array{<:Pair{String,<:NumberOrString}}, + Array{<:Tuple{String,<:NumberOrString}}} function dict_to_T_array(::Type{T}, - dict::Dict{String, <:NumberOrString}) where T<:LoggingData + dict::Dict{String,<:NumberOrString}) where {T<:LoggingData} entities = T[] for (key, value) in dict - if T<:Metric + if T <: Metric push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), nothing)) else @@ -18,11 +18,11 @@ function dict_to_T_array(::Type{T}, return entities end -function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where T<:LoggingData +function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where {T<:LoggingData} entities = T[] for pair in pair_array key = pair.first |> string - if T<:Metric + if T <: Metric value = pair.second push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), nothing)) @@ -36,7 +36,7 @@ function pairarray_to_T_array(::Type{T}, pair_array::Array{<:Pair}) where T<:Log end function tuplearray_to_T_array(::Type{T}, - tuple_array::Array{<:Tuple{String, <:NumberOrString}}) where T<:LoggingData + tuple_array::Array{<:Tuple{String,<:NumberOrString}}) where {T<:LoggingData} entities = T[] for tuple in tuple_array if length(tuple) != 2 @@ -44,7 +44,7 @@ function tuplearray_to_T_array(::Type{T}, end key = tuple |> first |> string - if T<: Metric + if T <: Metric value = tuple |> last push!(entities, Metric(key, Float64(value), round(Int, now() |> datetime2unix), nothing)) @@ -58,11 +58,11 @@ function tuplearray_to_T_array(::Type{T}, end function dictarray_to_T_array(::Type{T}, - dict_array::Array{<:Dict{String, <:Any}}) where T<:LoggingData + dict_array::Array{<:Dict{String,<:Any}}) where {T<:LoggingData} entities = T[] for dict in dict_array key = dict["key"] |> string - if T<:Metric + if T <: Metric value = Float64(dict["value"]) if haskey(dict, "timestamp") timestamp = dict["timestamp"] @@ -79,14 +79,14 @@ function dictarray_to_T_array(::Type{T}, return entities end -function parse(::Type{T}, entities::MLFlowUpsertData{T}) where T<:LoggingData - if entities isa Dict{String, <:NumberOrString} +function parse(::Type{T}, entities::MLFlowUpsertData{T}) where {T<:LoggingData} + if entities isa Dict{String,<:NumberOrString} return dict_to_T_array(T, entities) - elseif entities isa Array{<:Dict{String, <:Any}} + elseif entities isa Array{<:Dict{String,<:Any}} return dictarray_to_T_array(T, entities) - elseif entities isa Array{<:Pair{String, <:NumberOrString}} + elseif entities isa Array{<:Pair{String,<:NumberOrString}} return pairarray_to_T_array(T, entities) - elseif entities isa Array{<:Tuple{String, <:NumberOrString}} + elseif entities isa Array{<:Tuple{String,<:NumberOrString}} return tuplearray_to_T_array(T, entities) end return entities diff --git a/test/services/experiment.jl b/test/services/experiment.jl index e020f8d..66368df 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -42,7 +42,7 @@ end @testset verbose = true "get experiment" begin @ensuremlf experiment_name = UUIDs.uuid4() |> string - artifact_location="test_location" + artifact_location = "test_location" tags = [Tag("test_key", "test_value")] experiment_id = createexperiment(mlf, experiment_name; artifact_location=artifact_location, tags=tags) diff --git a/test/services/logger.jl b/test/services/logger.jl index 4430673..32bad21 100644 --- a/test/services/logger.jl +++ b/test/services/logger.jl @@ -6,7 +6,7 @@ @testset "with run id as string" begin run = createrun(mlf, experiment_id) logmetric(mlf, run.info.run_id, "missy", 0.9) - + run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -19,7 +19,7 @@ @testset "with run" begin run = createrun(mlf, experiment_id) logmetric(mlf, run, "gala", 0.1) - + run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -32,7 +32,7 @@ @testset "with run id as string and metric" begin run = createrun(mlf, experiment_id) logmetric(mlf, run.info.run_id, Metric("missy", 0.9, 123, 1)) - + run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -47,7 +47,7 @@ @testset "with run and metric" begin run = createrun(mlf, experiment_id) logmetric(mlf, run, Metric("gala", 0.1, 123, 1)) - + run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -70,7 +70,7 @@ end @testset "with run id as string" begin run = createrun(mlf, experiment_id) logbatch(mlf, run.info.run_id; metrics=[("gala", 0.1)]) - + run = refresh(mlf, run) last_metric = run.data.metrics |> last @@ -104,7 +104,7 @@ end last_param = run.data.params |> last last_tag = run.data.tags[ findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] - + @test last_metric isa Metric @test last_metric.key == "ana" @test last_metric.value == 0.5 @@ -130,7 +130,7 @@ end last_param = run.data.params |> last last_tag = run.data.tags[ findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] - + @test last_metric isa Metric @test last_metric.key == "ana" @test last_metric.value == 0.5 @@ -156,7 +156,7 @@ end last_param = run.data.params |> last last_tag = run.data.tags[ findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] - + @test last_metric isa Metric @test last_metric.key == "ana" @test last_metric.value == 0.5 @@ -184,7 +184,7 @@ end last_param = run.data.params |> last last_tag = run.data.tags[ findall(x -> !occursin("mlflow.runName", x.key), run.data.tags)[1]] - + @test last_metric isa Metric @test last_metric.key == "ana" @test last_metric.value == 0.5 @@ -247,7 +247,7 @@ end @testset "with run id as string" begin run = createrun(mlf, experiment_id) logparam(mlf, run.info.run_id, "missy", "0.9") - + run = refresh(mlf, run) last_param = run.data.params |> last @@ -260,7 +260,7 @@ end @testset "with run" begin run = createrun(mlf, experiment_id) logparam(mlf, run, "gala", "0.1") - + run = refresh(mlf, run) last_param = run.data.params |> last @@ -273,7 +273,7 @@ end @testset "with run id as string and param" begin run = createrun(mlf, experiment_id) logparam(mlf, run.info.run_id, Param("missy", "0.9")) - + run = refresh(mlf, run) last_param = run.data.params |> last @@ -286,7 +286,7 @@ end @testset "with run and param" begin run = createrun(mlf, experiment_id) logparam(mlf, run, Param("gala", "0.1")) - + run = refresh(mlf, run) last_param = run.data.params |> last diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl index 9b03163..a5fa9af 100644 --- a/test/services/registered_model.jl +++ b/test/services/registered_model.jl @@ -52,7 +52,7 @@ end @testset verbose = true "rename registered model" begin @ensuremlf - + registered_model = createregisteredmodel(mlf, "missy"; description="gala") renamed_registered_model = renameregisteredmodel(mlf, registered_model.name, "gala") From 99b8eb8ea2d9f16960f12b961926758fa229a37f Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 09:22:41 -0500 Subject: [PATCH 27/31] Adding authentication data structures --- docs/src/reference/types.md | 4 ++++ src/MLFlowClient.jl | 9 ++++++--- src/types/enums.jl | 18 ++++++++++++++++++ src/types/experiment.jl | 17 +++++++++++++++++ src/types/registered_model.jl | 17 +++++++++++++++++ src/types/user.jl | 19 +++++++++++++++++++ 6 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 src/types/user.jl diff --git a/docs/src/reference/types.md b/docs/src/reference/types.md index e7db289..b6782ba 100644 --- a/docs/src/reference/types.md +++ b/docs/src/reference/types.md @@ -18,4 +18,8 @@ Metric RunData RunInfo RunInputs +User +Permission +ExperimentPermission +RegisteredModelPermission ``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 8eccae3..c8d108d 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -25,7 +25,7 @@ include("types/tag.jl") export Tag include("types/enums.jl") -export ViewType, RunStatus, ModelVersionStatus +export ViewType, RunStatus, ModelVersionStatus, Permission include("types/dataset.jl") export Dataset, DatasetInput @@ -37,14 +37,17 @@ include("types/model_version.jl") export ModelVersion include("types/registered_model.jl") -export RegisteredModel, RegisteredModelAlias +export RegisteredModel, RegisteredModelAlias, RegisteredModelPermission include("types/experiment.jl") -export Experiment +export Experiment, ExperimentPermission include("types/run.jl") export Run, Param, Metric, RunData, RunInfo, RunInputs +include("types/user.jl") +export User + include("api.jl") include("utils.jl") diff --git a/src/types/enums.jl b/src/types/enums.jl index 1ad1dcd..945de64 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -50,3 +50,21 @@ View type for ListExperiments query. DELETED_ONLY = 2 ALL = 3 end + +""" + Permission + +Permission of a user to an experiment or a registered model. + +# Members +- `READ`: Can read. +- `EDIT`: Can read and update. +- `MANAGE`: Can read, update, delete and manage. +- `NO_PERMISSIONS`: No permissions. +""" +@enum Permission begin + READ = 1 + EDIT = 2 + MANAGE = 3 + NO_PERMISSIONS = 4 +end diff --git a/src/types/experiment.jl b/src/types/experiment.jl index b480122..c7e8b89 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -24,3 +24,20 @@ Experiment(data::Dict{String,Any}) = Experiment(data["experiment_id"], data["nam data["artifact_location"], data["lifecycle_stage"], data["last_update_time"], data["creation_time"], [Tag(tag) for tag in get(data, "tags", [])]) Base.show(io::IO, t::Experiment) = show(io, ShowCase(t, new_lines=true)) + +""" + ExperimentPermission + +# Fields +- `experiment_id::String`: [`Experiment`](@ref) id. +- `user_id::String`: [`User`](@ref) id. +- `permission::Permission`: [`Permission`](@ref) granted. +""" +struct ExperimentPermission + experiment_id::String + user_id::String + permission::Permission +end +ExperimentPermission(data::Dict{String,Any}) = ExperimentPermission(data["experiment_id"], + data["user_id"], Permission(data["permission"])) +Base.show(io::IO, t::ExperimentPermission) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index 71da3fa..fbfc0ad 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -48,3 +48,20 @@ RegisteredModel(data::Dict{String,Any}) = RegisteredModel(data["name"], [Tag(tag) for tag in get(data, "tags", [])], [RegisteredModelAlias(alias) for alias in get(data, "aliases", [])]) Base.show(io::IO, t::RegisteredModel) = show(io, ShowCase(t, new_lines=true)) + +""" + RegisteredModelPermission + +# Fields +- `name::String`: [`RegisteredModel`](@ref) name. +- `user_id::String`: [`User`](@ref) id. +- `permission::Permission`: [`Permission`](@ref) granted. +""" +struct RegisteredModelPermission + name::String + user_id::String + permission::Permission +end +RegisteredModelPermission(data::Dict{String,Any}) = RegisteredModelPermission(data["name"], + data["user_id"], Permission(data["permission"])) +Base.show(io::IO, t::RegisteredModelPermission) = show(io, ShowCase(t, new_lines=true)) diff --git a/src/types/user.jl b/src/types/user.jl new file mode 100644 index 0000000..fc2c113 --- /dev/null +++ b/src/types/user.jl @@ -0,0 +1,19 @@ +""" + User + +# Fields +- `id::String`: User ID. +- `username::String`: Username. +- `is_admin::Bool`: Whether the user is an admin. +- `experiment_permissions::Array{ExperimentPermission}`: All experiment permissions + explicitly granted to the user. +- `registered_model_permissions::Array{RegisteredModelPermission}`: All registered model + explicitly granted to the user. +""" +struct User + id::String + username::String + is_admin::Bool + experiment_permissions::Array{ExperimentPermission} + registered_model_permissions::Array{RegisteredModelPermission} +end From 30ea1fcbc19e79a5a67fa1613879b7e2b0cd84f9 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 11:16:32 -0500 Subject: [PATCH 28/31] Adding user authentication functionality and including authorization in CI pipeline --- .github/workflows/CI.yml | 2 +- .gitignore | 1 + Project.toml | 3 +- src/MLFlowClient.jl | 3 ++ src/services/user.jl | 77 ++++++++++++++++++++++++++++++++++++++++ src/types/user.jl | 4 +++ test/base.jl | 8 +++-- test/runtests.jl | 1 + test/services/user.jl | 62 ++++++++++++++++++++++++++++++++ 9 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 src/services/user.jl create mode 100644 test/services/user.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c6a105e..5698431 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -34,7 +34,7 @@ jobs: - name: Setup mlflow locally run: | pip install -r ./requirements.txt - python3 /opt/hostedtoolcache/Python/3.10.13/x64/bin/mlflow server --host 0.0.0.0 --port 5000 & + python3 /opt/hostedtoolcache/Python/3.10.13/x64/bin/mlflow server --app-name basic-auth --host 0.0.0.0 --port 5000 & sleep 5 - uses: julia-actions/setup-julia@v1 with: diff --git a/.gitignore b/.gitignore index 87a8c44..24e4fdd 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ mlruns coverage Pipfile Pipfile.lock +*.db diff --git a/Project.toml b/Project.toml index 2456be1..f069775 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,8 @@ URIs = "1.0" julia = "1.0" [extras] +Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Base64", "Test"] diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index c8d108d..3df2c29 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -81,4 +81,7 @@ export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelv transitionmodelversionstage, setmodelversiontag, deletemodelversiontag, getmodelversionbyalias +include("services/user.jl") +export createuser, getuser, updateuserpassword, updateuseradmin, deleteuser + end diff --git a/src/services/user.jl b/src/services/user.jl new file mode 100644 index 0000000..be0f23f --- /dev/null +++ b/src/services/user.jl @@ -0,0 +1,77 @@ +""" + createuser(instance::MLFlow, username::String, password::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `username`: Username. +- `password`: Password. + +# Returns +An [`User`](@ref) object. +""" +function createuser(instance::MLFlow, username::String, password::String)::User + result = mlfpost(instance, "users/create"; username=username, password=password) + return result["user"] |> User +end + +""" + getuser(instance::MLFlow, username::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `username`: Username. + +# Returns +An [`User`](@ref) object. +""" +function getuser(instance::MLFlow, username::String)::User + result = mlfget(instance, "users/get"; username=username) + return result["user"] |> User +end + +""" + deleteuser(instance::MLFlow, username::String, password::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `username`: Username. +- `password`: New password. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function updateuserpassword(instance::MLFlow, username::String, password::String)::Bool + mlfpatch(instance, "users/update-password"; username=username, password=password) + return true +end + +""" + updateuseradmin(instance::MLFlow, username::String, is_admin::Bool) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `username`: Username. +- `is_admin`: New admin status. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function updateuseradmin(instance::MLFlow, username::String, is_admin::Bool)::Bool + mlfpatch(instance, "users/update-admin"; username=username, is_admin=is_admin) + return true +end + +""" + deleteuser(instance::MLFlow, username::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `username`: Username. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteuser(instance::MLFlow, username::String)::Bool + mlfdelete(instance, "users/delete"; username=username) + return true +end diff --git a/src/types/user.jl b/src/types/user.jl index fc2c113..038ffae 100644 --- a/src/types/user.jl +++ b/src/types/user.jl @@ -17,3 +17,7 @@ struct User experiment_permissions::Array{ExperimentPermission} registered_model_permissions::Array{RegisteredModelPermission} end +User(data::Dict{String,Any}) = User(data["id"] |> string, data["username"], data["is_admin"], + [ExperimentPermission(permission) for permission in get(data, "experiment_permissions", [])], + [RegisteredModelPermission(permission) for permission in get(data, "registered_model_permissions", [])]) +Base.show(io::IO, t::User) = show(io, ShowCase(t, new_lines=true)) diff --git a/test/base.jl b/test/base.jl index 2561ed0..8206d5c 100644 --- a/test/base.jl +++ b/test/base.jl @@ -1,7 +1,8 @@ -using MLFlowClient using Test -using UUIDs using Dates +using UUIDs +using Base64 +using MLFlowClient function mlflow_server_is_running(mlf::MLFlow) try @@ -16,7 +17,8 @@ end # skips test if mlflow is not available on default location, ENV["MLFLOW_TRACKING_URI"] macro ensuremlf() e = quote - mlf = MLFlow() + encoded_credentials = Base64.base64encode("admin:password") + mlf = MLFlow(headers=Dict("Authorization" => "Basic $(encoded_credentials)")) mlflow_server_is_running(mlf) || return nothing end eval(e) diff --git a/test/runtests.jl b/test/runtests.jl index 6e23148..6ccb44f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,3 +11,4 @@ include("services/artifact.jl") include("services/experiment.jl") include("services/registered_model.jl") include("services/model_version.jl") +include("services/user.jl") diff --git a/test/services/user.jl b/test/services/user.jl new file mode 100644 index 0000000..7e81ea8 --- /dev/null +++ b/test/services/user.jl @@ -0,0 +1,62 @@ +@testset verbose = true "create user" begin + @ensuremlf + + user = createuser(mlf, "missy", "gala") + + @test user isa User + @test user.username == "missy" + @test user.is_admin == false + + deleteuser(mlf, user.username) +end + +@testset verbose = true "get user" begin + @ensuremlf + + user = createuser(mlf, "missy", "gala") + + retrieved_user = getuser(mlf, "missy") + + @test retrieved_user isa User + @test retrieved_user.username == "missy" + @test retrieved_user.is_admin == false + + deleteuser(mlf, retrieved_user.username) +end + +@testset verbose = true "update user password" begin + @ensuremlf + + getmlfinstance(encoded_credentials::String) = + MLFlow(headers=Dict("Authorization" => "Basic $(encoded_credentials)")) + + user = createuser(mlf, "missy", "gala") + encoded_credentials = Base64.base64encode("$(user.username):gala") + + updateuserpassword(getmlfinstance(encoded_credentials), "missy", "ana") + encoded_credentials = Base64.base64encode("$(user.username):ana") + + @test_nowarn searchexperiments(getmlfinstance(encoded_credentials)) + deleteuser(mlf, user.username) +end + +@testset verbose = true "update user admin" begin + @ensuremlf + + user = createuser(mlf, "missy", "gala") + updateuseradmin(mlf, "missy", true) + + retrieved_user = getuser(mlf, "missy") + @test retrieved_user.is_admin == true + + deleteuser(mlf, retrieved_user.username) +end + +@testset verbose = true "delete user" begin + @ensuremlf + + user = createuser(mlf, "missy", "gala") + deleteuser(mlf, "missy") + + @test_throws ErrorException getuser(mlf, "missy") +end From 133333881b54d5a9c0d9a5312017f84a462d4eec Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 11:40:12 -0500 Subject: [PATCH 29/31] Upgrading python and mlflow versions --- .github/workflows/CI.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5698431..241bb99 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -26,15 +26,15 @@ jobs: if: hashFiles('**/requirements.txt', '**/pyproject.toml') == '' run: | touch ./requirements.txt - echo "mlflow==2.17.2" > ./requirements.txt + echo "mlflow==2.20.1" > ./requirements.txt - uses: actions/setup-python@v4 with: - python-version: '3.10.13' + python-version: '3.12.3' cache: 'pip' - name: Setup mlflow locally run: | pip install -r ./requirements.txt - python3 /opt/hostedtoolcache/Python/3.10.13/x64/bin/mlflow server --app-name basic-auth --host 0.0.0.0 --port 5000 & + python3 /opt/hostedtoolcache/Python/3.12.3/x64/bin/mlflow server --app-name basic-auth --host 0.0.0.0 --port 5000 & sleep 5 - uses: julia-actions/setup-julia@v1 with: From 3602bd9b3d30fe831c76a0973fc1c5fedc44453b Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 13:23:15 -0500 Subject: [PATCH 30/31] Implementing `ExperimentPermission` operations --- docs/make.jl | 3 +- docs/src/reference/experiment.md | 4 + docs/src/reference/user.md | 8 ++ src/MLFlowClient.jl | 4 +- src/services/experiment.jl | 112 +++++++++++++++++++++ src/services/user.jl | 2 +- src/types/enums.jl | 1 + src/types/experiment.jl | 2 +- test/services/experiment.jl | 163 +++++++++++++++++++++++++++++++ 9 files changed, 295 insertions(+), 4 deletions(-) create mode 100644 docs/src/reference/user.md diff --git a/docs/make.jl b/docs/make.jl index 737fdcd..905a055 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -12,6 +12,7 @@ makedocs(; "Miscellaneous operations" => "reference/misc.md", "Run operations" => "reference/run.md", "Registered model operations" => "reference/registered_model.md", - "Model version operations" => "reference/model_version.md",]]) + "Model version operations" => "reference/model_version.md", + "User operations" => "reference/user.md",]]) deploydocs(; repo="github.com/JuliaAI/MLFlowClient.jl", devbranch="main") diff --git a/docs/src/reference/experiment.md b/docs/src/reference/experiment.md index ba98435..ad3ced6 100644 --- a/docs/src/reference/experiment.md +++ b/docs/src/reference/experiment.md @@ -8,4 +8,8 @@ restoreexperiment updateexperiment searchexperiments setexperimenttag +createexperimentpermission +getexperimentpermission +updateexperimentpermission +deleteexperimentpermission ``` diff --git a/docs/src/reference/user.md b/docs/src/reference/user.md new file mode 100644 index 0000000..9fcc78f --- /dev/null +++ b/docs/src/reference/user.md @@ -0,0 +1,8 @@ +# User operations +```@docs +createuser +getuser +updateuserpassword +updateuseradmin +deleteuser +``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 3df2c29..972547f 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -54,7 +54,9 @@ include("utils.jl") include("services/experiment.jl") export getexperiment, createexperiment, deleteexperiment, setexperimenttag, - updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname + updateexperiment, restoreexperiment, searchexperiments, getexperimentbyname, + createexperimentpermission, getexperimentpermission, updateexperimentpermission, + deleteexperimentpermission include("services/run.jl") export getrun, createrun, deleterun, setruntag, updaterun, restorerun, searchruns, diff --git a/src/services/experiment.jl b/src/services/experiment.jl index 766e77c..f321116 100644 --- a/src/services/experiment.jl +++ b/src/services/experiment.jl @@ -207,3 +207,115 @@ setexperimenttag(instance::MLFlow, experiment_id::Integer, key::String, setexperimenttag(instance::MLFlow, experiment::Experiment, key::String, value::String)::Bool = setexperimenttag(instance, experiment.experiment_id, key, value) + +""" + createexperimentpermission(instance::MLFlow, experiment_id::String, username::String, + permission::Permission) + createexperimentpermission(instance::MLFlow, experiment_id::Integer, username::String, + permission::Permission) + createexperimentpermission(instance::MLFlow, experiment::Experiment, username::String, + permission::Permission) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: [`Experiment`](@ref) id. +- `username`: [`User`](@ref) username. +- `permission`: [`Permission`](@ref) to grant. + +# Returns +An instance of type [`ExperimentPermission`](@ref). +""" +function createexperimentpermission(instance::MLFlow, experiment_id::String, + username::String, permission::Permission)::ExperimentPermission + result = mlfpost(instance, "experiments/permissions/create"; + experiment_id=experiment_id, username=username, permission=permission) + return result["experiment_permission"] |> ExperimentPermission +end +createexperimentpermission(instance::MLFlow, experiment_id::Integer, + username::String, permission::Permission)::ExperimentPermission = + createexperimentpermission(instance, experiment_id |> string, username, permission) +createexperimentpermission(instance::MLFlow, experiment::Experiment, + username::String, permission::Permission)::ExperimentPermission = + createexperimentpermission(instance, experiment.experiment_id, username, permission) + +""" + getexperimentpermission(instance::MLFlow, experiment_id::String, username::String) + getexperimentpermission(instance::MLFlow, experiment_id::Integer, username::String) + getexperimentpermission(instance::MLFlow, experiment::Experiment, username::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: [`Experiment`](@ref) id. +- `username`: [`User`](@ref) username. + +# Returns +An instance of type [`ExperimentPermission`](@ref). +""" +function getexperimentpermission(instance::MLFlow, experiment_id::String, + username::String)::ExperimentPermission + result = mlfget(instance, "experiments/permissions/get"; experiment_id=experiment_id, + username=username) + return result["experiment_permission"] |> ExperimentPermission +end +getexperimentpermission(instance::MLFlow, experiment_id::Integer, + username::String)::ExperimentPermission = + getexperimentpermission(instance, experiment_id |> string, username) +getexperimentpermission(instance::MLFlow, experiment::Experiment, + username::String)::ExperimentPermission = + getexperimentpermission(instance, experiment.experiment_id, username) + +""" + updateexperimentpermission(instance::MLFlow, experiment_id::String, username::String, + permission::Permission) + updateexperimentpermission(instance::MLFlow, experiment_id::Integer, username::String, + permission::Permission) + updateexperimentpermission(instance::MLFlow, experiment::Experiment, username::String, + permission::Permission) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: [`Experiment`](@ref) id. +- `username`: [`User`](@ref) username. +- `permission`: [`Permission`](@ref) to grant. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function updateexperimentpermission(instance::MLFlow, experiment_id::String, + username::String, permission::Permission)::Bool + mlfpatch(instance, "experiments/permissions/update"; experiment_id=experiment_id, + username=username, permission=permission) + return true +end +updateexperimentpermission(instance::MLFlow, experiment_id::Integer, + username::String, permission::Permission)::Bool = + updateexperimentpermission(instance, experiment_id |> string, username, permission) +updateexperimentpermission(instance::MLFlow, experiment::Experiment, + username::String, permission::Permission)::Bool = + updateexperimentpermission(instance, experiment.experiment_id, username, permission) + +""" + deleteexperimentpermission(instance::MLFlow, experiment_id::String, username::String) + deleteexperimentpermission(instance::MLFlow, experiment_id::Integer, username::String) + deleteexperimentpermission(instance::MLFlow, experiment::Experiment, username::String) + +# Arguments +- `instance`: [`MLFlow`](@ref) configuration. +- `experiment_id`: [`Experiment`](@ref) id. +- `username`: [`User`](@ref) username. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteexperimentpermission(instance::MLFlow, experiment_id::String, + username::String)::Bool + mlfdelete(instance, "experiments/permissions/delete"; experiment_id=experiment_id, + username=username) + return true +end +deleteexperimentpermission(instance::MLFlow, experiment_id::Integer, + username::String)::Bool = + deleteexperimentpermission(instance, experiment_id |> string, username) +deleteexperimentpermission(instance::MLFlow, experiment::Experiment, + username::String)::Bool = + deleteexperimentpermission(instance, experiment.experiment_id, username) diff --git a/src/services/user.jl b/src/services/user.jl index be0f23f..e8dd415 100644 --- a/src/services/user.jl +++ b/src/services/user.jl @@ -30,7 +30,7 @@ function getuser(instance::MLFlow, username::String)::User end """ - deleteuser(instance::MLFlow, username::String, password::String) + updateuserpassword(instance::MLFlow, username::String, password::String) # Arguments - `instance`: [`MLFlow`](@ref) configuration. diff --git a/src/types/enums.jl b/src/types/enums.jl index 945de64..4edfb6e 100644 --- a/src/types/enums.jl +++ b/src/types/enums.jl @@ -68,3 +68,4 @@ Permission of a user to an experiment or a registered model. MANAGE = 3 NO_PERMISSIONS = 4 end +Permission(permission::String) = Dict(value => key for (key, value) in Permission |> Base.Enums.namemap)[permission|>Symbol] |> Permission diff --git a/src/types/experiment.jl b/src/types/experiment.jl index c7e8b89..26ddcf4 100644 --- a/src/types/experiment.jl +++ b/src/types/experiment.jl @@ -39,5 +39,5 @@ struct ExperimentPermission permission::Permission end ExperimentPermission(data::Dict{String,Any}) = ExperimentPermission(data["experiment_id"], - data["user_id"], Permission(data["permission"])) + data["user_id"] |> string, Permission(data["permission"])) Base.show(io::IO, t::ExperimentPermission) = show(io, ShowCase(t, new_lines=true)) diff --git a/test/services/experiment.jl b/test/services/experiment.jl index 66368df..e713e92 100644 --- a/test/services/experiment.jl +++ b/test/services/experiment.jl @@ -214,3 +214,166 @@ end deleteexperiment(mlf, experiment_id) end end + +@testset verbose = true "create experiment permission" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + permission = Permission("READ") + + @testset "with string experiment id" begin + user = createuser(mlf, "missy", "gala") + experiment_permission = + createexperimentpermission(mlf, experiment_id, user.username, permission) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + deleteuser(mlf, user.username) + end + + @testset "with integer experiment id" begin + user = createuser(mlf, "missy", "gala") + experiment_permission = + createexperimentpermission(mlf, parse(Int, experiment_id), user.username, permission) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + deleteuser(mlf, user.username) + end + + @testset "with Experiment" begin + experiment = getexperiment(mlf, experiment_id) + user = createuser(mlf, "missy", "gala") + experiment_permission = + createexperimentpermission(mlf, experiment, user.username, permission) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + deleteuser(mlf, user.username) + end + + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "get experiment permission" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + permission = Permission("READ") + user = createuser(mlf, "missy", "gala") + + @testset "with string experiment id" begin + createexperimentpermission(mlf, experiment_id, user.username, permission) + experiment_permission = getexperimentpermission(mlf, experiment_id, user.username) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + @testset "with integer experiment id" begin + createexperimentpermission(mlf, parse(Int, experiment_id), user.username, permission) + experiment_permission = getexperimentpermission(mlf, parse(Int, experiment_id), user.username) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + @testset "with Experiment" begin + experiment = getexperiment(mlf, experiment_id) + createexperimentpermission(mlf, experiment, user.username, permission) + experiment_permission = getexperimentpermission(mlf, experiment, user.username) + + @test experiment_permission isa ExperimentPermission + @test experiment_permission.experiment_id == experiment_id + @test experiment_permission.user_id == user.id + @test experiment_permission.permission == permission + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + deleteuser(mlf, user.username) + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "update experiment permission" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + permission = Permission("READ") + user = createuser(mlf, "missy", "gala") + + @testset "with string experiment id" begin + createexperimentpermission(mlf, experiment_id, user.username, permission) + updateexperimentpermission(mlf, experiment_id, user.username, Permission("EDIT")) + experiment_permission = getexperimentpermission(mlf, experiment_id, user.username) + + @test experiment_permission.permission == Permission("EDIT") + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + @testset "with integer experiment id" begin + createexperimentpermission(mlf, parse(Int, experiment_id), user.username, permission) + updateexperimentpermission(mlf, parse(Int, experiment_id), user.username, Permission("EDIT")) + experiment_permission = getexperimentpermission(mlf, parse(Int, experiment_id), user.username) + + @test experiment_permission.permission == Permission("EDIT") + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + @testset "with Experiment" begin + experiment = getexperiment(mlf, experiment_id) + createexperimentpermission(mlf, experiment, user.username, permission) + updateexperimentpermission(mlf, experiment, user.username, Permission("EDIT")) + experiment_permission = getexperimentpermission(mlf, experiment, user.username) + + @test experiment_permission.permission == Permission("EDIT") + deleteexperimentpermission(mlf, experiment_id, user.username) + end + + deleteuser(mlf, user.username) + deleteexperiment(mlf, experiment_id) +end + +@testset verbose = true "delete experiment permission" begin + @ensuremlf + + experiment_id = createexperiment(mlf, UUIDs.uuid4() |> string) + permission = Permission("READ") + user = createuser(mlf, "missy", "gala") + + @testset "with string experiment id" begin + createexperimentpermission(mlf, experiment_id, user.username, permission) + deleteexperimentpermission(mlf, experiment_id, user.username) + @test_throws ErrorException getexperimentpermission(mlf, experiment_id, user.username) + end + + @testset "with integer experiment id" begin + createexperimentpermission(mlf, parse(Int, experiment_id), user.username, permission) + deleteexperimentpermission(mlf, parse(Int, experiment_id), user.username) + @test_throws ErrorException getexperimentpermission(mlf, parse(Int, experiment_id), user.username) + end + + @testset "with Experiment" begin + experiment = getexperiment(mlf, experiment_id) + createexperimentpermission(mlf, experiment, user.username, permission) + deleteexperimentpermission(mlf, experiment, user.username) + @test_throws ErrorException getexperimentpermission(mlf, experiment, user.username) + end + + deleteuser(mlf, user.username) + deleteexperiment(mlf, experiment_id) +end From 3628b3da7496f7d8c643eb20f2491f3bf2a4af58 Mon Sep 17 00:00:00 2001 From: Jose Esparza Date: Mon, 10 Feb 2025 14:16:02 -0500 Subject: [PATCH 31/31] Implementing `RegisteredModel` operations --- docs/src/reference/registered_model.md | 4 ++ src/MLFlowClient.jl | 3 +- src/services/registered_model.jl | 76 ++++++++++++++++++++++++++ src/types/registered_model.jl | 2 +- test/services/registered_model.jl | 67 +++++++++++++++++++++++ 5 files changed, 150 insertions(+), 2 deletions(-) diff --git a/docs/src/reference/registered_model.md b/docs/src/reference/registered_model.md index 4d93da6..a062ed4 100644 --- a/docs/src/reference/registered_model.md +++ b/docs/src/reference/registered_model.md @@ -10,4 +10,8 @@ setregisteredmodeltag deleteregisteredmodeltag deleteregisteredmodelalias setregisteredmodelalias +createregisteredmodelpermission +getregisteredmodelpermission +updateregisteredmodelpermission +deleteregisteredmodelpermission ``` diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 972547f..2a46265 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -75,7 +75,8 @@ include("services/registered_model.jl") export getregisteredmodel, createregisteredmodel, deleteregisteredmodel, renameregisteredmodel, updateregisteredmodel, searchregisteredmodels, setregisteredmodeltag, deleteregisteredmodeltag, deleteregisteredmodelalias, - setregisteredmodelalias + setregisteredmodelalias, createregisteredmodelpermission, getregisteredmodelpermission, + updateregisteredmodelpermission, deleteregisteredmodelpermission include("services/model_version.jl") export getlatestmodelversions, getmodelversion, createmodelversion, deletemodelversion, diff --git a/src/services/registered_model.jl b/src/services/registered_model.jl index f178a85..e7b99df 100644 --- a/src/services/registered_model.jl +++ b/src/services/registered_model.jl @@ -190,3 +190,79 @@ function setregisteredmodelalias(instance::MLFlow, name::String, alias::String, mlfpost(instance, "registered-models/alias"; name=name, alias=alias, version=version) return true end + +""" + createregisteredmodelpermission(instance::MLFlow, name::String, username::String, + permission::Permission) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` [`RegisteredModel`](@ref) name. +- `username:` [`User`](@ref) username. +- `permission:` [`Permission`](@ref) to grant. + +# Returns +An instance of type [`RegisteredModelPermission`](@ref). +""" +function createregisteredmodelpermission(instance::MLFlow, name::String, username::String, + permission::Permission)::RegisteredModelPermission + result = mlfpost(instance, "registered-models/permissions/create"; name=name, + username=username, permission=permission) + return result["registered_model_permission"] |> RegisteredModelPermission +end + +""" + getregisteredmodelpermission(instance::MLFlow, name::String, username::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` [`RegisteredModel`](@ref) name. +- `username:` [`User`](@ref) username. + +# Returns +An instance of type [`RegisteredModelPermission`](@ref). +""" +function getregisteredmodelpermission(instance::MLFlow, name::String, + username::String)::RegisteredModelPermission + result = mlfget(instance, "registered-models/permissions/get"; name=name, + username=username) + return result["registered_model_permission"] |> RegisteredModelPermission +end + +""" + updateregisteredmodelpermission(instance::MLFlow, name::String, username::String, + permission::Permission) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` [`RegisteredModel`](@ref) name. +- `username:` [`User`](@ref) username. +- `permission:` New [`Permission`](@ref) to grant. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function updateregisteredmodelpermission(instance::MLFlow, name::String, username::String, + permission::Permission)::Bool + mlfpatch(instance, "registered-models/permissions/update"; name=name, username=username, + permission=permission) + return true +end + +""" + deleteregisteredmodelpermission(instance::MLFlow, name::String, username::String) + +# Arguments +- `instance:` [`MLFlow`](@ref) configuration. +- `name:` [`RegisteredModel`](@ref) name. +- `username:` [`User`](@ref) username. + +# Returns +`true` if successful. Otherwise, raises exception. +""" +function deleteregisteredmodelpermission(instance::MLFlow, name::String, + username::String)::Bool + mlfdelete(instance, "registered-models/permissions/delete"; name=name, + username=username) + return true +end diff --git a/src/types/registered_model.jl b/src/types/registered_model.jl index fbfc0ad..cb20ef8 100644 --- a/src/types/registered_model.jl +++ b/src/types/registered_model.jl @@ -63,5 +63,5 @@ struct RegisteredModelPermission permission::Permission end RegisteredModelPermission(data::Dict{String,Any}) = RegisteredModelPermission(data["name"], - data["user_id"], Permission(data["permission"])) + data["user_id"] |> string, Permission(data["permission"])) Base.show(io::IO, t::RegisteredModelPermission) = show(io, ShowCase(t, new_lines=true)) diff --git a/test/services/registered_model.jl b/test/services/registered_model.jl index a5fa9af..3140099 100644 --- a/test/services/registered_model.jl +++ b/test/services/registered_model.jl @@ -176,3 +176,70 @@ end deleteregisteredmodel(mlf, "missy") deleteexperiment(mlf, experiment) end + +@testset verbose = true "create registered model permission" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + user = createuser(mlf, "missy", "gala") + permission = createregisteredmodelpermission(mlf, registered_model.name, user.username, Permission("READ")) + + @test permission isa RegisteredModelPermission + @test permission.name == registered_model.name + @test permission.user_id == user.id + @test permission.permission == Permission("READ") + + deleteregisteredmodelpermission(mlf, registered_model.name, user.username) + deleteuser(mlf, user.username) + deleteregisteredmodel(mlf, "missy") +end + +@testset verbose = true "get registered model permission" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + user = createuser(mlf, "missy", "gala") + permission = createregisteredmodelpermission(mlf, registered_model.name, user.username, Permission("READ")) + retrieved_permission = getregisteredmodelpermission(mlf, registered_model.name, user.username) + + @test retrieved_permission isa RegisteredModelPermission + @test retrieved_permission.name == registered_model.name + @test retrieved_permission.user_id == user.id + @test retrieved_permission.permission == Permission("READ") + + deleteregisteredmodelpermission(mlf, registered_model.name, user.username) + deleteuser(mlf, user.username) + deleteregisteredmodel(mlf, "missy") +end + +@testset verbose = true "update registered model permission" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + user = createuser(mlf, "missy", "gala") + permission = createregisteredmodelpermission(mlf, registered_model.name, user.username, Permission("READ")) + updateregisteredmodelpermission(mlf, registered_model.name, user.username, Permission("MANAGE")) + retrieved_permission = getregisteredmodelpermission(mlf, registered_model.name, user.username) + + @test retrieved_permission isa RegisteredModelPermission + @test retrieved_permission.name == registered_model.name + @test retrieved_permission.user_id == user.id + @test retrieved_permission.permission == Permission("MANAGE") + + deleteregisteredmodelpermission(mlf, registered_model.name, user.username) + deleteuser(mlf, user.username) + deleteregisteredmodel(mlf, "missy") +end +# +@testset verbose = true "delete registered model permission" begin + @ensuremlf + + registered_model = createregisteredmodel(mlf, "missy"; description="gala") + user = createuser(mlf, "missy", "gala") + permission = createregisteredmodelpermission(mlf, registered_model.name, user.username, Permission("READ")) + deleteregisteredmodelpermission(mlf, registered_model.name, user.username) + + @test_throws ErrorException getregisteredmodelpermission(mlf, registered_model.name, user.username) + deleteuser(mlf, user.username) + deleteregisteredmodel(mlf, "missy") +end