From 6e33fc6d2e8856468b6b7ff397025f9d34c6e488 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 11:11:48 -0800
Subject: [PATCH 01/20] bump Ray version

---
 .ci/docker/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index 086633cf043..d9e7b338cfd 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -32,7 +32,7 @@ bs4
 awscliv2==2.1.1
 flask
 spacy==3.4.1
-ray[tune]==2.7.2
+ray[tune]==2.52.1
 tensorboard
 jinja2==3.1.3
 pytorch-lightning

From bededb95ec9700067cd9805f0174714c85ded5ac Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 15:01:34 -0800
Subject: [PATCH 02/20] Ignore more data stuff

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3f1f927ee33..ea478ca180d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,8 @@ beginner_source/hymenoptera_data/
 intermediate_source/data/
 *.zip
 MNIST/
+data/cifar-10-batches-py/*
+*.tar.gz
 
 #builds
 _build/
@@ -132,3 +134,4 @@ dictionary.dic
 
 # linters
 /.lintbin
+

From 352ad9b3ddae3dd142dbf5fb09158c601c4d3403 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 15:20:25 -0800
Subject: [PATCH 03/20] Update Ray Tune tutorial to use new API and improve
 formatting

---
 .../hyperparameter_tuning_tutorial.py         | 448 ++++++++++--------
 1 file changed, 240 insertions(+), 208 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index dd3fe65699e..e3f1b15f1a9 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -1,44 +1,51 @@
-# -*- coding: utf-8 -*-
 """
 Hyperparameter tuning with Ray Tune
 ===================================
 
-Hyperparameter tuning can make the difference between an average model and a highly
-accurate one. Often simple things like choosing a different learning rate or changing
-a network layer size can have a dramatic impact on your model performance.
+Hyperparameter tuning can make the difference between an average model
+and a highly accurate one. Often simple things like choosing a different
+learning rate or changing a network layer size can have a dramatic
+impact on your model performance.
 
-Fortunately, there are tools that help with finding the best combination of parameters.
-`Ray Tune <https://docs.ray.io/en/latest/tune.html>`_ is an industry standard tool for
-distributed hyperparameter tuning. Ray Tune includes the latest hyperparameter search
-algorithms, integrates with various analysis libraries, and natively
-supports distributed training through `Ray's distributed machine learning engine
-<https://ray.io/>`_.
+Fortunately, there are tools that help with finding the best combination
+of parameters. `Ray Tune <https://docs.ray.io/en/latest/tune.html>`__ is
+an industry standard tool for distributed hyperparameter tuning. Ray
+Tune includes the latest hyperparameter search algorithms, integrates
+with various analysis libraries, and natively supports distributed
+training through `Ray’s distributed machine learning
+engine <https://ray.io/>`__.
 
-In this tutorial, we will show you how to integrate Ray Tune into your PyTorch
-training workflow. We will extend `this tutorial from the PyTorch documentation
-<https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`_ for training
-a CIFAR10 image classifier.
+In this tutorial, we will show you how to integrate Ray Tune into your
+PyTorch training workflow. We will extend `this tutorial from the
+PyTorch
+documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__
+for training a CIFAR10 image classifier.
 
-As you will see, we only need to add some slight modifications. In particular, we
-need to
+As you will see, we only need to add some slight modifications. In
+particular, we need to
 
 1. wrap data loading and training in functions,
 2. make some network parameters configurable,
 3. add checkpointing (optional),
 4. and define the search space for the model tuning
 
-|
+| 
 
 To run this tutorial, please make sure the following packages are
 installed:
 
--  ``ray[tune]``: Distributed hyperparameter tuning library
--  ``torchvision``: For the data transformers
+- ``ray[tune]``: Distributed hyperparameter tuning library
+- ``torchvision``: For the data transformers
 
 Setup / Imports
 ---------------
-Let's start with the imports:
+
+Let’s start with the imports:
+
 """
+
+# %matplotlib inline
+
 from functools import partial
 import os
 import tempfile
@@ -51,28 +58,29 @@
 import torchvision
 import torchvision.transforms as transforms
 # sphinx_gallery_start_ignore
-# Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
+# Fixes `AttributeError: '_LoggingTee' object has no attribute 'fileno'`.
 # This is only needed to run with sphinx-build.
 import sys
 if not hasattr(sys.stdout, "encoding"):
     sys.stdout.encoding = "latin1"
     sys.stdout.fileno = lambda: 0
 # sphinx_gallery_end_ignore
+import ray
 from ray import tune
-from ray import train
-from ray.train import Checkpoint, get_checkpoint
+from ray.tune import Checkpoint
 from ray.tune.schedulers import ASHAScheduler
 import ray.cloudpickle as pickle
 
 ######################################################################
-# Most of the imports are needed for building the PyTorch model. Only the last 
-# imports are for Ray Tune.
+# Most of the imports are needed for building the PyTorch model. Only the
+# last imports are for Ray Tune.
 #
 # Data loaders
 # ------------
-# We wrap the data loaders in their own function and pass a global data directory.
-# This way we can share a data directory between different trials.
-
+#
+# We wrap the data loaders in their own function and pass a global data
+# directory. This way we can share a data directory between different
+# trials.
 
 def load_data(data_dir="./data"):
     transform = transforms.Compose(
@@ -89,14 +97,12 @@ def load_data(data_dir="./data"):
 
     return trainset, testset
 
-
 ######################################################################
 # Configurable neural network
 # ---------------------------
-# We can only tune those parameters that are configurable.
-# In this example, we can specify
-# the layer sizes of the fully connected layers:
-
+#
+# We can only tune those parameters that are configurable. In this
+# example, we can specify the layer sizes of the fully connected layers:
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
@@ -117,76 +123,82 @@ def forward(self, x):
         x = self.fc3(x)
         return x
 
-
 ######################################################################
 # The train function
 # ------------------
-# Now it gets interesting, because we introduce some changes to the example `from the PyTorch
-# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`_.
-#
-# We wrap the training script in a function ``train_cifar(config, data_dir=None)``.
-# The ``config`` parameter will receive the hyperparameters we would like to
-# train with. The ``data_dir`` specifies the directory where we load and store the data,
-# so that multiple runs can share the same data source.
-# We also load the model and optimizer state at the start of the run, if a checkpoint
-# is provided. Further down in this tutorial you will find information on how
+#
+# Now it gets interesting, because we introduce some changes to the
+# example `from the PyTorch
+# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+#
+# We wrap the training script in a function
+# ``train_cifar(config, data_dir=None)``. The ``config`` parameter will
+# receive the hyperparameters we would like to train with. The
+# ``data_dir`` specifies the directory where we load and store the data,
+# so that multiple runs can share the same data source. We also load the
+# model and optimizer state at the start of the run, if a checkpoint is
+# provided. Further down in this tutorial you will find information on how
 # to save the checkpoint and what it is used for.
 #
 # .. code-block:: python
 #
-#     net = Net(config["l1"], config["l2"])
+#    net = Net(config["l1"], config["l2"])
 #
-#     checkpoint = get_checkpoint()
-#     if checkpoint:
-#         with checkpoint.as_directory() as checkpoint_dir:
-#             data_path = Path(checkpoint_dir) / "data.pkl"
-#             with open(data_path, "rb") as fp:
-#                 checkpoint_state = pickle.load(fp)
-#             start_epoch = checkpoint_state["epoch"]
-#             net.load_state_dict(checkpoint_state["net_state_dict"])
-#             optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
-#     else:
-#         start_epoch = 0
+#    checkpoint = tune.get_checkpoint()
+#    if checkpoint:
+#        with checkpoint.as_directory() as checkpoint_dir:
+#            data_path = Path(checkpoint_dir) / "data.pkl"
+#            with open(data_path, "rb") as fp:
+#                checkpoint_state = pickle.load(fp)
+#            start_epoch = checkpoint_state["epoch"]
+#            net.load_state_dict(checkpoint_state["net_state_dict"])
+#            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
+#    else:
+#        start_epoch = 0
 #
 # The learning rate of the optimizer is made configurable, too:
 #
 # .. code-block:: python
 #
-#     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
+#    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 #
-# We also split the training data into a training and validation subset. We thus train on
-# 80% of the data and calculate the validation loss on the remaining 20%. The batch sizes
-# with which we iterate through the training and test sets are configurable as well.
+# We also split the training data into a training and validation subset.
+# We thus train on 80% of the data and calculate the validation loss on
+# the remaining 20%. The batch sizes with which we iterate through the
+# training and test sets are configurable as well.
 #
 # Adding (multi) GPU support with DataParallel
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Image classification benefits largely from GPUs. Luckily, we can continue to use
-# PyTorch's abstractions in Ray Tune. Thus, we can wrap our model in ``nn.DataParallel``
-# to support data parallel training on multiple GPUs:
+#
+# Image classification benefits largely from GPUs. Luckily, we can
+# continue to use PyTorch’s abstractions in Ray Tune. Thus, we can wrap
+# our model in ``nn.DataParallel`` to support data parallel training on
+# multiple GPUs:
 #
 # .. code-block:: python
 #
-#     device = "cpu"
-#     if torch.cuda.is_available():
-#         device = "cuda:0"
-#         if torch.cuda.device_count() > 1:
-#             net = nn.DataParallel(net)
-#     net.to(device)
+#    device = "cpu"
+#    if torch.cuda.is_available():
+#        device = "cuda:0"
+#        if torch.cuda.device_count() > 1:
+#            net = nn.DataParallel(net)
+#    net.to(device)
 #
-# By using a ``device`` variable we make sure that training also works when we have
-# no GPUs available. PyTorch requires us to send our data to the GPU memory explicitly,
-# like this:
+# By using a ``device`` variable we make sure that training also works
+# when we have no GPUs available. PyTorch requires us to send our data to
+# the GPU memory explicitly, like this:
 #
 # .. code-block:: python
 #
-#     for i, data in enumerate(trainloader, 0):
-#         inputs, labels = data
-#         inputs, labels = inputs.to(device), labels.to(device)
+#    for i, data in enumerate(trainloader, 0):
+#        inputs, labels = data
+#        inputs, labels = inputs.to(device), labels.to(device)
 #
-# The code now supports training on CPUs, on a single GPU, and on multiple GPUs. Notably, Ray
-# also supports `fractional GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`_
-# so we can share GPUs among trials, as long as the model still fits on the GPU memory. We'll come back
-# to that later.
+# The code now supports training on CPUs, on a single GPU, and on multiple
+# GPUs. Notably, Ray also supports `fractional
+# GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`__
+# so we can share GPUs among trials, as long as the model still fits on
+# the GPU memory. We’ll come back to that later.
 #
 # Communicating with Ray Tune
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -195,41 +207,42 @@ def forward(self, x):
 #
 # .. code-block:: python
 #
-#     checkpoint_data = {
-#         "epoch": epoch,
-#         "net_state_dict": net.state_dict(),
-#         "optimizer_state_dict": optimizer.state_dict(),
-#     }
-#     with tempfile.TemporaryDirectory() as checkpoint_dir:
-#         data_path = Path(checkpoint_dir) / "data.pkl"
-#         with open(data_path, "wb") as fp:
-#             pickle.dump(checkpoint_data, fp)
-#
-#         checkpoint = Checkpoint.from_directory(checkpoint_dir)
-#         train.report(
-#             {"loss": val_loss / val_steps, "accuracy": correct / total},
-#             checkpoint=checkpoint,
-#         )
-#
-# Here we first save a checkpoint and then report some metrics back to Ray Tune. Specifically,
-# we send the validation loss and accuracy back to Ray Tune. Ray Tune can then use these metrics
-# to decide which hyperparameter configuration lead to the best results. These metrics
-# can also be used to stop bad performing trials early in order to avoid wasting
-# resources on those trials.
-#
-# The checkpoint saving is optional, however, it is necessary if we wanted to use advanced
-# schedulers like
-# `Population Based Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`_.
-# Also, by saving the checkpoint we can later load the trained models and validate them
-# on a test set. Lastly, saving checkpoints is useful for fault tolerance, and it allows
-# us to interrupt training and continue training later.
+#    checkpoint_data = {
+#        "epoch": epoch,
+#        "net_state_dict": net.state_dict(),
+#        "optimizer_state_dict": optimizer.state_dict(),
+#    }
+#    with tempfile.TemporaryDirectory() as checkpoint_dir:
+#        data_path = Path(checkpoint_dir) / "data.pkl"
+#        with open(data_path, "wb") as fp:
+#            pickle.dump(checkpoint_data, fp)
+#
+#        checkpoint = Checkpoint.from_directory(checkpoint_dir)
+#        tune.report(
+#            {"loss": val_loss / val_steps, "accuracy": correct / total},
+#            checkpoint=checkpoint,
+#        )
+#
+# Here we first save a checkpoint and then report some metrics back to Ray
+# Tune. Specifically, we send the validation loss and accuracy back to Ray
+# Tune. Ray Tune can then use these metrics to decide which hyperparameter
+# configuration lead to the best results. These metrics can also be used
+# to stop bad performing trials early in order to avoid wasting resources
+# on those trials.
+#
+# The checkpoint saving is optional, however, it is necessary if we wanted
+# to use advanced schedulers like `Population Based
+# Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__.
+# Also, by saving the checkpoint we can later load the trained models and
+# validate them on a test set. Lastly, saving checkpoints is useful for
+# fault tolerance, and it allows us to interrupt training and continue
+# training later.
 #
 # Full training function
 # ~~~~~~~~~~~~~~~~~~~~~~
 #
 # The full code example looks like this:
 
-
 def train_cifar(config, data_dir=None):
     net = Net(config["l1"], config["l2"])
 
@@ -243,7 +256,7 @@ def train_cifar(config, data_dir=None):
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 
-    checkpoint = get_checkpoint()
+    checkpoint = tune.get_checkpoint()
     if checkpoint:
         with checkpoint.as_directory() as checkpoint_dir:
             data_path = Path(checkpoint_dir) / "data.pkl"
@@ -263,10 +276,10 @@ def train_cifar(config, data_dir=None):
     )
 
     trainloader = torch.utils.data.DataLoader(
-        train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
+        train_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=2
     )
     valloader = torch.utils.data.DataLoader(
-        val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=8
+        val_subset, batch_size=int(config["batch_size"]), shuffle=True, num_workers=2
     )
 
     for epoch in range(start_epoch, 10):  # loop over the dataset multiple times
@@ -326,23 +339,23 @@ def train_cifar(config, data_dir=None):
                 pickle.dump(checkpoint_data, fp)
 
             checkpoint = Checkpoint.from_directory(checkpoint_dir)
-            train.report(
+            tune.report(
                 {"loss": val_loss / val_steps, "accuracy": correct / total},
                 checkpoint=checkpoint,
             )
     
     print("Finished Training")
 
-
 ######################################################################
-# As you can see, most of the code is adapted directly from the original example.
+# As you can see, most of the code is adapted directly from the original
+# example.
 #
 # Test set accuracy
 # -----------------
-# Commonly the performance of a machine learning model is tested on a hold-out test
-# set with data that has not been used for training the model. We also wrap this in a
-# function:
-
+#
+# Commonly the performance of a machine learning model is tested on a
+# hold-out test set with data that has not been used for training the
+# model. We also wrap this in a function:
 
 def test_accuracy(net, device="cpu"):
     trainset, testset = load_data()
@@ -364,69 +377,83 @@ def test_accuracy(net, device="cpu"):
 
     return correct / total
 
-
 ######################################################################
-# The function also expects a ``device`` parameter, so we can do the
-# test set validation on a GPU.
+# The function also expects a ``device`` parameter, so we can do the test
+# set validation on a GPU.
 #
 # Configuring the search space
 # ----------------------------
-# Lastly, we need to define Ray Tune's search space. Here is an example:
-#
-# .. code-block:: python
 #
-#     config = {
-#         "l1": tune.choice([2 ** i for i in range(9)]),
-#         "l2": tune.choice([2 ** i for i in range(9)]),
-#         "lr": tune.loguniform(1e-4, 1e-1),
-#         "batch_size": tune.choice([2, 4, 8, 16])
-#     }
+# Lastly, we need to define Ray Tune’s search space. Here is an example:
 #
-# The ``tune.choice()`` accepts a list of values that are uniformly sampled from.
-# In this example, the ``l1`` and ``l2`` parameters
-# should be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or 256.
-# The ``lr`` (learning rate) should be uniformly sampled between 0.0001 and 0.1. Lastly,
-# the batch size is a choice between 2, 4, 8, and 16.
-#
-# At each trial, Ray Tune will now randomly sample a combination of parameters from these
-# search spaces. It will then train a number of models in parallel and find the best
-# performing one among these. We also use the ``ASHAScheduler`` which will terminate bad
-# performing trials early.
+# .. code-block:: python
 #
-# We wrap the ``train_cifar`` function with ``functools.partial`` to set the constant
-# ``data_dir`` parameter. We can also tell Ray Tune what resources should be
-# available for each trial:
+#    config = {
+#        "l1": tune.choice([2 ** i for i in range(9)]),
+#        "l2": tune.choice([2 ** i for i in range(9)]),
+#        "lr": tune.loguniform(1e-4, 1e-1),
+#        "batch_size": tune.choice([2, 4, 8, 16])
+#    }
+#
+# The ``tune.choice()`` accepts a list of values that are uniformly
+# sampled from. In this example, the ``l1`` and ``l2`` parameters should
+# be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or
+# 256. The ``lr`` (learning rate) should be uniformly sampled between
+# 0.0001 and 0.1. Lastly, the batch size is a choice between 2, 4, 8, and
+# 16.
+#
+# At each trial, Ray Tune will now randomly sample a combination of
+# parameters from these search spaces. It will then train a number of
+# models in parallel and find the best performing one among these. We also
+# use the ``ASHAScheduler`` which will terminate bad performing trials
+# early.
+#
+# We wrap the ``train_cifar`` function with ``functools.partial`` to set
+# the constant ``data_dir`` parameter. We can also tell Ray Tune what
+# resources should be available for each trial using
+# ``tune.with_resources``:
 #
 # .. code-block:: python
 #
-#     gpus_per_trial = 2
-#     # ...
-#     result = tune.run(
-#         partial(train_cifar, data_dir=data_dir),
-#         resources_per_trial={"cpu": 8, "gpu": gpus_per_trial},
-#         config=config,
-#         num_samples=num_samples,
-#         scheduler=scheduler,
-#         checkpoint_at_end=True)
-#
-# You can specify the number of CPUs, which are then available e.g.
-# to increase the ``num_workers`` of the PyTorch ``DataLoader`` instances. The selected
-# number of GPUs are made visible to PyTorch in each trial. Trials do not have access to
-# GPUs that haven't been requested for them - so you don't have to care about two trials
-# using the same set of resources.
-#
-# Here we can also specify fractional GPUs, so something like ``gpus_per_trial=0.5`` is
-# completely valid. The trials will then share GPUs among each other.
-# You just have to make sure that the models still fit in the GPU memory.
-#
-# After training the models, we will find the best performing one and load the trained
-# network from the checkpoint file. We then obtain the test set accuracy and report
-# everything by printing.
+#    gpus_per_trial = 2
+#    # ...
+#    tuner = tune.Tuner(
+#        tune.with_resources(
+#            partial(train_cifar, data_dir=data_dir),
+#            resources={"cpu": 8, "gpu": gpus_per_trial}
+#        ),
+#        tune_config=tune.TuneConfig(
+#            metric="loss",
+#            mode="min",
+#            scheduler=scheduler,
+#            num_samples=num_samples,
+#        ),
+#        param_space=config,
+#    )
+#    results = tuner.fit()
+#
+# You can specify the number of CPUs, which are then available e.g. to
+# increase the ``num_workers`` of the PyTorch ``DataLoader`` instances.
+# The selected number of GPUs are made visible to PyTorch in each trial.
+# Trials do not have access to GPUs that haven’t been requested for them -
+# so you don’t have to care about two trials using the same set of
+# resources.
+#
+# Here we can also specify fractional GPUs, so something like
+# ``gpus_per_trial=0.5`` is completely valid. The trials will then share
+# GPUs among each other. You just have to make sure that the models still
+# fit in the GPU memory.
+#
+# After training the models, we will find the best performing one and load
+# the trained network from the checkpoint file. We then obtain the test
+# set accuracy and report everything by printing.
 #
 # The full main function looks like this:
 
-
-def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
+def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
+    print("Starting hyperparameter tuning.")
+    ray.init(include_dashboard=False, runtime_env={"RAY_enable_open_telemetry": "0"})
+    
     data_dir = os.path.abspath("./data")
     load_data(data_dir)
     config = {
@@ -436,26 +463,32 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
         "batch_size": tune.choice([2, 4, 8, 16]),
     }
     scheduler = ASHAScheduler(
-        metric="loss",
-        mode="min",
         max_t=max_num_epochs,
         grace_period=1,
         reduction_factor=2,
     )
-    result = tune.run(
-        partial(train_cifar, data_dir=data_dir),
-        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
-        config=config,
-        num_samples=num_samples,
-        scheduler=scheduler,
+    
+    tuner = tune.Tuner(
+        tune.with_resources(
+            partial(train_cifar, data_dir=data_dir),
+            resources={"cpu": 2, "gpu": gpus_per_trial}
+        ),
+        tune_config=tune.TuneConfig(
+            metric="loss",
+            mode="min",
+            scheduler=scheduler,
+            num_samples=num_trials,
+        ),
+        param_space=config,
     )
+    results = tuner.fit()
 
-    best_trial = result.get_best_trial("loss", "min", "last")
-    print(f"Best trial config: {best_trial.config}")
-    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
-    print(f"Best trial final validation accuracy: {best_trial.last_result['accuracy']}")
+    best_result = results.get_best_result("loss", "min")
+    print(f"Best trial config: {best_result.config}")
+    print(f"Best trial final validation loss: {best_result.metrics['loss']}")
+    print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")
 
-    best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"])
+    best_trained_model = Net(best_result.config["l1"], best_result.config["l2"])
     device = "cpu"
     if torch.cuda.is_available():
         device = "cuda:0"
@@ -463,7 +496,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
             best_trained_model = nn.DataParallel(best_trained_model)
     best_trained_model.to(device)
 
-    best_checkpoint = result.get_best_checkpoint(trial=best_trial, metric="accuracy", mode="max")
+    best_checkpoint = best_result.checkpoint
     with best_checkpoint.as_directory() as checkpoint_dir:
         data_path = Path(checkpoint_dir) / "data.pkl"
         with open(data_path, "rb") as fp:
@@ -476,37 +509,36 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 if __name__ == "__main__":
     # You can change the number of GPUs per trial here:
-    main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)
-
+    main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
 # If you run the code, an example output could look like this:
 #
-# .. code-block:: sh
-#
-#     Number of trials: 10/10 (10 TERMINATED)
-#     +-----+--------------+------+------+-------------+--------+---------+------------+
-#     | ... |   batch_size |   l1 |   l2 |          lr |   iter |    loss |   accuracy |
-#     |-----+--------------+------+------+-------------+--------+---------+------------|
-#     | ... |            2 |    1 |  256 | 0.000668163 |      1 | 2.31479 |     0.0977 |
-#     | ... |            4 |   64 |    8 | 0.0331514   |      1 | 2.31605 |     0.0983 |
-#     | ... |            4 |    2 |    1 | 0.000150295 |      1 | 2.30755 |     0.1023 |
-#     | ... |           16 |   32 |   32 | 0.0128248   |     10 | 1.66912 |     0.4391 |
-#     | ... |            4 |    8 |  128 | 0.00464561  |      2 | 1.7316  |     0.3463 |
-#     | ... |            8 |  256 |    8 | 0.00031556  |      1 | 2.19409 |     0.1736 |
-#     | ... |            4 |   16 |  256 | 0.00574329  |      2 | 1.85679 |     0.3368 |
-#     | ... |            8 |    2 |    2 | 0.00325652  |      1 | 2.30272 |     0.0984 |
-#     | ... |            2 |    2 |    2 | 0.000342987 |      2 | 1.76044 |     0.292  |
-#     | ... |            4 |   64 |   32 | 0.003734    |      8 | 1.53101 |     0.4761 |
-#     +-----+--------------+------+------+-------------+--------+---------+------------+
-#
-#     Best trial config: {'l1': 64, 'l2': 32, 'lr': 0.0037339984519545164, 'batch_size': 4}
-#     Best trial final validation loss: 1.5310075663924216
-#     Best trial final validation accuracy: 0.4761
-#     Best trial test set accuracy: 0.4737
+# .. code-block:: text
+#
+#    Number of trials: 10/10 (10 TERMINATED)
+#    +-----+--------------+------+------+-------------+--------+---------+------------+
+#    | ... |   batch_size |   l1 |   l2 |          lr |   iter |    loss |   accuracy |
+#    |-----+--------------+------+------+-------------+--------+---------+------------|
+#    | ... |            2 |    1 |  256 | 0.000668163 |      1 | 2.31479 |     0.0977 |
+#    | ... |            4 |   64 |    8 | 0.0331514   |      1 | 2.31605 |     0.0983 |
+#    | ... |            4 |    2 |    1 | 0.000150295 |      1 | 2.30755 |     0.1023 |
+#    | ... |           16 |   32 |   32 | 0.0128248   |     10 | 1.66912 |     0.4391 |
+#    | ... |            4 |    8 |  128 | 0.00464561  |      2 | 1.7316  |     0.3463 |
+#    | ... |            8 |  256 |    8 | 0.00031556  |      1 | 2.19409 |     0.1736 |
+#    | ... |            4 |   16 |  256 | 0.00574329  |      2 | 1.85679 |     0.3368 |
+#    | ... |            8 |    2 |    2 | 0.00325652  |      1 | 2.30272 |     0.0984 |
+#    | ... |            2 |    2 |    2 | 0.000342987 |      2 | 1.76044 |     0.292  |
+#    | ... |            4 |   64 |   32 | 0.003734    |      8 | 1.53101 |     0.4761 |
+#    +-----+--------------+------+------+-------------+--------+---------+------------+
+#
+#    Best trial config: {'l1': 64, 'l2': 32, 'lr': 0.0037339984519545164, 'batch_size': 4}
+#    Best trial final validation loss: 1.5310075663924216
+#    Best trial final validation accuracy: 0.4761
+#    Best trial test set accuracy: 0.4737
 #
 # Most trials have been stopped early in order to avoid wasting resources.
-# The best performing trial achieved a validation accuracy of about 47%, which could
-# be confirmed on the test set.
+# The best performing trial achieved a validation accuracy of about 47%,
+# which could be confirmed on the test set.
 #
-# So that's it! You can now tune the parameters of your PyTorch models.
+# So that’s it! You can now tune the parameters of your PyTorch models.

From 10566e349ef1fb17986e6f5c495a3e9cdd276fcd Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 15:29:14 -0800
Subject: [PATCH 04/20] Remove Ray initialization from hyperparameter tuning
 tutorial and update code block formatting from text to bash.

---
 beginner_source/hyperparameter_tuning_tutorial.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index e3f1b15f1a9..d39e12b9370 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -452,7 +452,6 @@ def test_accuracy(net, device="cpu"):
 
 def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print("Starting hyperparameter tuning.")
-    ray.init(include_dashboard=False, runtime_env={"RAY_enable_open_telemetry": "0"})
     
     data_dir = os.path.abspath("./data")
     load_data(data_dir)
@@ -514,7 +513,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 ######################################################################
 # If you run the code, an example output could look like this:
 #
-# .. code-block:: text
+# .. code-block:: bash
 #
 #    Number of trials: 10/10 (10 TERMINATED)
 #    +-----+--------------+------+------+-------------+--------+---------+------------+

From ad79672502257c58cb7c59bff811887f612198bb Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 15:52:32 -0800
Subject: [PATCH 05/20] Clean up hparam tuning tutorial, modernize
 checkpointing

---
 .../hyperparameter_tuning_tutorial.py         | 48 +++++++------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index d39e12b9370..ffc5f361db1 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -44,8 +44,6 @@
 
 """
 
-# %matplotlib inline
-
 from functools import partial
 import os
 import tempfile
@@ -57,19 +55,10 @@
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
-# sphinx_gallery_start_ignore
-# Fixes `AttributeError: '_LoggingTee' object has no attribute 'fileno'`.
-# This is only needed to run with sphinx-build.
-import sys
-if not hasattr(sys.stdout, "encoding"):
-    sys.stdout.encoding = "latin1"
-    sys.stdout.fileno = lambda: 0
-# sphinx_gallery_end_ignore
 import ray
 from ray import tune
 from ray.tune import Checkpoint
 from ray.tune.schedulers import ASHAScheduler
-import ray.cloudpickle as pickle
 
 ######################################################################
 # Most of the imports are needed for building the PyTorch model. Only the
@@ -135,10 +124,13 @@ def forward(self, x):
 # ``train_cifar(config, data_dir=None)``. The ``config`` parameter will
 # receive the hyperparameters we would like to train with. The
 # ``data_dir`` specifies the directory where we load and store the data,
-# so that multiple runs can share the same data source. We also load the
-# model and optimizer state at the start of the run, if a checkpoint is
-# provided. Further down in this tutorial you will find information on how
-# to save the checkpoint and what it is used for.
+# so that multiple runs can share the same data source. This is especially
+# useful in cluster environments, where you can mount a shared storage
+# (e.g. NFS) to this directory so that the data is not downloaded to each
+# node separately. We also load the model and optimizer state at the start
+# of the run, if a checkpoint is provided. Further down in this tutorial
+# you will find information on how to save the checkpoint and what it is
+# used for.
 #
 # .. code-block:: python
 #
@@ -147,9 +139,8 @@ def forward(self, x):
 #    checkpoint = tune.get_checkpoint()
 #    if checkpoint:
 #        with checkpoint.as_directory() as checkpoint_dir:
-#            data_path = Path(checkpoint_dir) / "data.pkl"
-#            with open(data_path, "rb") as fp:
-#                checkpoint_state = pickle.load(fp)
+#            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#            checkpoint_state = torch.load(checkpoint_path)
 #            start_epoch = checkpoint_state["epoch"]
 #            net.load_state_dict(checkpoint_state["net_state_dict"])
 #            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
@@ -213,9 +204,8 @@ def forward(self, x):
 #        "optimizer_state_dict": optimizer.state_dict(),
 #    }
 #    with tempfile.TemporaryDirectory() as checkpoint_dir:
-#        data_path = Path(checkpoint_dir) / "data.pkl"
-#        with open(data_path, "wb") as fp:
-#            pickle.dump(checkpoint_data, fp)
+#        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#        torch.save(checkpoint_data, checkpoint_path)
 #
 #        checkpoint = Checkpoint.from_directory(checkpoint_dir)
 #        tune.report(
@@ -259,9 +249,8 @@ def train_cifar(config, data_dir=None):
     checkpoint = tune.get_checkpoint()
     if checkpoint:
         with checkpoint.as_directory() as checkpoint_dir:
-            data_path = Path(checkpoint_dir) / "data.pkl"
-            with open(data_path, "rb") as fp:
-                checkpoint_state = pickle.load(fp)
+            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+            checkpoint_state = torch.load(checkpoint_path)
             start_epoch = checkpoint_state["epoch"]
             net.load_state_dict(checkpoint_state["net_state_dict"])
             optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
@@ -334,9 +323,8 @@ def train_cifar(config, data_dir=None):
             "optimizer_state_dict": optimizer.state_dict(),
         }
         with tempfile.TemporaryDirectory() as checkpoint_dir:
-            data_path = Path(checkpoint_dir) / "data.pkl"
-            with open(data_path, "wb") as fp:
-                pickle.dump(checkpoint_data, fp)
+            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+            torch.save(checkpoint_data, checkpoint_path)
 
             checkpoint = Checkpoint.from_directory(checkpoint_dir)
             tune.report(
@@ -452,6 +440,7 @@ def test_accuracy(net, device="cpu"):
 
 def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print("Starting hyperparameter tuning.")
+    ray.init()
     
     data_dir = os.path.abspath("./data")
     load_data(data_dir)
@@ -497,9 +486,8 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 
     best_checkpoint = best_result.checkpoint
     with best_checkpoint.as_directory() as checkpoint_dir:
-        data_path = Path(checkpoint_dir) / "data.pkl"
-        with open(data_path, "rb") as fp:
-            best_checkpoint_data = pickle.load(fp)
+        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+        best_checkpoint_data = torch.load(checkpoint_path)
 
         best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
         test_acc = test_accuracy(best_trained_model, device)

From a941761df4b750a952b00afcf485d52cb56a382b Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 16:49:24 -0800
Subject: [PATCH 06/20] Polish hparam tutorial a bit

---
 .../hyperparameter_tuning_tutorial.py         | 120 ++++++++++--------
 1 file changed, 70 insertions(+), 50 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index ffc5f361db1..c8dac6ef9c3 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -3,9 +3,9 @@
 ===================================
 
 Hyperparameter tuning can make the difference between an average model
-and a highly accurate one. Often simple things like choosing a different
-learning rate or changing a network layer size can have a dramatic
-impact on your model performance.
+and a highly accurate one. Often, simple decisions like choosing a
+different learning rate or changing a network layer size can
+dramatically impact model performance.
 
 Fortunately, there are tools that help with finding the best combination
 of parameters. `Ray Tune <https://docs.ray.io/en/latest/tune.html>`__ is
@@ -21,15 +21,12 @@
 documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__
 for training a CIFAR10 image classifier.
 
-As you will see, we only need to add some slight modifications. In
-particular, we need to
+We only need to make minor modifications:
 
 1. wrap data loading and training in functions,
 2. make some network parameters configurable,
 3. add checkpointing (optional),
-4. and define the search space for the model tuning
-
-| 
+4. define the search space for the model tuning
 
 To run this tutorial, please make sure the following packages are
 installed:
@@ -62,14 +59,13 @@
 
 ######################################################################
 # Most of the imports are needed for building the PyTorch model. Only the
-# last imports are for Ray Tune.
+# last few are specific to Ray Tune.
 #
 # Data loaders
 # ------------
 #
-# We wrap the data loaders in their own function and pass a global data
-# directory. This way we can share a data directory between different
-# trials.
+# We wrap the data loaders in a function and pass a global data directory.
+# This allows us to share a data directory across different trials.
 
 def load_data(data_dir="./data"):
     transform = transforms.Compose(
@@ -90,8 +86,8 @@ def load_data(data_dir="./data"):
 # Configurable neural network
 # ---------------------------
 #
-# We can only tune those parameters that are configurable. In this
-# example, we can specify the layer sizes of the fully connected layers:
+# We can only tune parameters that are configurable. In this example, we
+# specify the layer sizes of the fully connected layers:
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
@@ -121,14 +117,14 @@ def forward(self, x):
 # documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
 #
 # We wrap the training script in a function
-# ``train_cifar(config, data_dir=None)``. The ``config`` parameter will
-# receive the hyperparameters we would like to train with. The
-# ``data_dir`` specifies the directory where we load and store the data,
-# so that multiple runs can share the same data source. This is especially
-# useful in cluster environments, where you can mount a shared storage
-# (e.g. NFS) to this directory so that the data is not downloaded to each
+# ``train_cifar(config, data_dir=None)``. The ``config`` parameter
+# receives the hyperparameters we want to train with. The ``data_dir``
+# specifies the directory where we load and store the data, allowing
+# multiple runs to share the same data source. This is especially useful
+# in cluster environments where you can mount a shared storage (e.g. NFS)
+# to this directory, preventing the data from being downloaded to each
 # node separately. We also load the model and optimizer state at the start
-# of the run, if a checkpoint is provided. Further down in this tutorial
+# of the run if a checkpoint is provided. Further down in this tutorial,
 # you will find information on how to save the checkpoint and what it is
 # used for.
 #
@@ -175,9 +171,9 @@ def forward(self, x):
 #            net = nn.DataParallel(net)
 #    net.to(device)
 #
-# By using a ``device`` variable we make sure that training also works
-# when we have no GPUs available. PyTorch requires us to send our data to
-# the GPU memory explicitly, like this:
+# By using a ``device`` variable, we ensure that training works even
+# without a GPU. PyTorch requires us to send our data to the GPU memory
+# explicitly:
 #
 # .. code-block:: python
 #
@@ -194,7 +190,9 @@ def forward(self, x):
 # Communicating with Ray Tune
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# The most interesting part is the communication with Ray Tune:
+# The most interesting part is the communication with Ray Tune. As you’ll
+# see, integrating Ray Tune into your training code requires only a few
+# additional lines:
 #
 # .. code-block:: python
 #
@@ -215,18 +213,27 @@ def forward(self, x):
 #
 # Here we first save a checkpoint and then report some metrics back to Ray
 # Tune. Specifically, we send the validation loss and accuracy back to Ray
-# Tune. Ray Tune can then use these metrics to decide which hyperparameter
-# configuration lead to the best results. These metrics can also be used
-# to stop bad performing trials early in order to avoid wasting resources
-# on those trials.
+# Tune. Ray Tune uses these metrics to determine the best hyperparameter
+# configuration and to stop underperforming trials early, saving
+# resources.
 #
 # The checkpoint saving is optional, however, it is necessary if we wanted
 # to use advanced schedulers like `Population Based
 # Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__.
-# Also, by saving the checkpoint we can later load the trained models and
-# validate them on a test set. Lastly, saving checkpoints is useful for
-# fault tolerance, and it allows us to interrupt training and continue
-# training later.
+# Saving the checkpoint also allows us to later load the trained models
+# for validation on a test set. Lastly, it provides fault tolerance,
+# enabling us to pause and resume training.
+#
+# To summarize, integrating Ray Tune into your PyTorch training requires
+# just a few key additions:
+#
+# - ``tune.report()`` to report metrics (and optionally checkpoints) to
+#   Ray Tune
+# - ``tune.get_checkpoint()`` to load a model from a checkpoint
+# - ``Checkpoint.from_directory()`` to create a checkpoint object from
+#   saved state
+#
+# The rest of your training code remains standard PyTorch!
 #
 # Full training function
 # ~~~~~~~~~~~~~~~~~~~~~~
@@ -246,6 +253,7 @@ def train_cifar(config, data_dir=None):
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 
+    # Load checkpoint if resuming training
     checkpoint = tune.get_checkpoint()
     if checkpoint:
         with checkpoint.as_directory() as checkpoint_dir:
@@ -317,6 +325,7 @@ def train_cifar(config, data_dir=None):
                 val_loss += loss.cpu().numpy()
                 val_steps += 1
 
+        # Save checkpoint and report metrics
         checkpoint_data = {
             "epoch": epoch,
             "net_state_dict": net.state_dict(),
@@ -331,7 +340,7 @@ def train_cifar(config, data_dir=None):
                 {"loss": val_loss / val_steps, "accuracy": correct / total},
                 checkpoint=checkpoint,
             )
-    
+
     print("Finished Training")
 
 ######################################################################
@@ -390,11 +399,21 @@ def test_accuracy(net, device="cpu"):
 # 0.0001 and 0.1. Lastly, the batch size is a choice between 2, 4, 8, and
 # 16.
 #
-# At each trial, Ray Tune will now randomly sample a combination of
-# parameters from these search spaces. It will then train a number of
-# models in parallel and find the best performing one among these. We also
-# use the ``ASHAScheduler`` which will terminate bad performing trials
-# early.
+# For each trial, Ray Tune samples a combination of parameters from these
+# search spaces according to the search space configuration and search
+# strategy. It then trains multiple models in parallel to identify the
+# best performing one.
+#
+# By default, Ray Tune uses random search to pick the next hyperparameter
+# configuration to try. However, Ray Tune also provides more sophisticated
+# search algorithms that can more efficiently navigate the search space,
+# such as
+# `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__,
+# `HyperOpt <https://docs.ray.io/en/latest/tune/api/suggestion.html#hyperopt>`__,
+# and `Bayesian
+# Optimization <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__.
+#
+# We use the ``ASHAScheduler`` to terminate underperforming trials early.
 #
 # We wrap the ``train_cifar`` function with ``functools.partial`` to set
 # the constant ``data_dir`` parameter. We can also tell Ray Tune what
@@ -423,20 +442,21 @@ def test_accuracy(net, device="cpu"):
 # You can specify the number of CPUs, which are then available e.g. to
 # increase the ``num_workers`` of the PyTorch ``DataLoader`` instances.
 # The selected number of GPUs are made visible to PyTorch in each trial.
-# Trials do not have access to GPUs that haven’t been requested for them -
-# so you don’t have to care about two trials using the same set of
-# resources.
+# Trials do not have access to GPUs that haven’t been requested, so you
+# don’t need to worry about resource contention.
 #
-# Here we can also specify fractional GPUs, so something like
-# ``gpus_per_trial=0.5`` is completely valid. The trials will then share
-# GPUs among each other. You just have to make sure that the models still
-# fit in the GPU memory.
+# You can also specify fractional GPUs (e.g., ``gpus_per_trial=0.5``),
+# which allows trials to share a GPU. Just ensure that the models fit
+# within the GPU memory.
 #
 # After training the models, we will find the best performing one and load
 # the trained network from the checkpoint file. We then obtain the test
 # set accuracy and report everything by printing.
 #
-# The full main function looks like this:
+# The full main function looks like this. Note that the
+# ``if __name__ == "__main__":`` block is configured for a quick run (1
+# trial, 1 epoch, CPU only) to verify that everything works. You should
+# increase these values to perform an actual hyperparameter tuning search.
 
 def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print("Starting hyperparameter tuning.")
@@ -495,7 +515,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 
 
 if __name__ == "__main__":
-    # You can change the number of GPUs per trial here:
+    # Set the number of trials, epochs, and GPUs per trial here:
     main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
@@ -524,8 +544,8 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 #    Best trial final validation accuracy: 0.4761
 #    Best trial test set accuracy: 0.4737
 #
-# Most trials have been stopped early in order to avoid wasting resources.
-# The best performing trial achieved a validation accuracy of about 47%,
+# Most trials were stopped early to conserve resources. The best
+# performing trial achieved a validation accuracy of approximately 47%,
 # which could be confirmed on the test set.
 #
 # So that’s it! You can now tune the parameters of your PyTorch models.

From 64bc12eb09f3f50da9160eeafa460c1685276174 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 16:50:30 -0800
Subject: [PATCH 07/20] Use the actual CIFAR10 normalization values

---
 beginner_source/hyperparameter_tuning_tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index c8dac6ef9c3..944a460c36e 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -69,7 +69,7 @@
 
 def load_data(data_dir="./data"):
     transform = transforms.Compose(
-        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+        [transforms.ToTensor(), transforms.Normalize((0.4914, 0.48216, 0.44653), (0.2022, 0.19932, 0.20086))]
     )
 
     trainset = torchvision.datasets.CIFAR10(

From 1f11769e18186b20afb5d933dfb2e9ed31c6cc31 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 9 Dec 2025 20:36:43 -0800
Subject: [PATCH 08/20] polish

---
 .../hyperparameter_tuning_tutorial.py         | 110 ++++++++----------
 1 file changed, 47 insertions(+), 63 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 944a460c36e..203cd6d7dd2 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -7,35 +7,25 @@
 different learning rate or changing a network layer size can
 dramatically impact model performance.
 
-Fortunately, there are tools that help with finding the best combination
-of parameters. `Ray Tune <https://docs.ray.io/en/latest/tune.html>`__ is
-an industry standard tool for distributed hyperparameter tuning. Ray
-Tune includes the latest hyperparameter search algorithms, integrates
-with various analysis libraries, and natively supports distributed
-training through `Ray’s distributed machine learning
-engine <https://ray.io/>`__.
+This page shows how to integrate `Ray
+Tune <https://docs.ray.io/en/latest/tune.html>`__ into your PyTorch
+training workflow for distributed hyperparameter tuning. It extends the
+PyTorch tutorial for training a CIFAR10 image classifier in the `CIFAR10
+tutorial (PyTorch
+documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
 
-In this tutorial, we will show you how to integrate Ray Tune into your
-PyTorch training workflow. We will extend `this tutorial from the
-PyTorch
-documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__
-for training a CIFAR10 image classifier.
+Only minor modifications are needed. Specifically, this example wraps
+data loading and training in functions, makes some network parameters
+configurable, adds optional checkpointing, and defines the search space
+for model tuning.
 
-We only need to make minor modifications:
+To run this tutorial, install the following prerequisites:
 
-1. wrap data loading and training in functions,
-2. make some network parameters configurable,
-3. add checkpointing (optional),
-4. define the search space for the model tuning
+- ``ray[tune]`` – Distributed hyperparameter tuning library
+- ``torchvision`` – Data transforms for computer vision datasets
 
-To run this tutorial, please make sure the following packages are
-installed:
-
-- ``ray[tune]``: Distributed hyperparameter tuning library
-- ``torchvision``: For the data transformers
-
-Setup / Imports
----------------
+Setup and imports
+-----------------
 
 Let’s start with the imports:
 
@@ -86,8 +76,8 @@ def load_data(data_dir="./data"):
 # Configurable neural network
 # ---------------------------
 #
-# We can only tune parameters that are configurable. In this example, we
-# specify the layer sizes of the fully connected layers:
+# In this example, we specify the layer sizes of the fully connected
+# layers.
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
@@ -109,24 +99,23 @@ def forward(self, x):
         return x
 
 ######################################################################
-# The train function
-# ------------------
+# Train function
+# --------------
 #
 # Now it gets interesting, because we introduce some changes to the
-# example `from the PyTorch
-# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+# example from the `CIFAR10 tutorial (PyTorch
+# documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
 #
 # We wrap the training script in a function
 # ``train_cifar(config, data_dir=None)``. The ``config`` parameter
 # receives the hyperparameters we want to train with. The ``data_dir``
 # specifies the directory where we load and store the data, allowing
 # multiple runs to share the same data source. This is especially useful
-# in cluster environments where you can mount a shared storage (e.g. NFS)
-# to this directory, preventing the data from being downloaded to each
-# node separately. We also load the model and optimizer state at the start
-# of the run if a checkpoint is provided. Further down in this tutorial,
-# you will find information on how to save the checkpoint and what it is
-# used for.
+# in cluster environments where you can mount shared storage (for example
+# NFS), preventing the data from being downloaded to each node separately.
+# We also load the model and optimizer state at the start of the run if a
+# checkpoint is provided. Further down in this tutorial, you will find
+# information on how to save the checkpoint and what it is used for.
 #
 # .. code-block:: python
 #
@@ -158,9 +147,9 @@ def forward(self, x):
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Image classification benefits largely from GPUs. Luckily, we can
-# continue to use PyTorch’s abstractions in Ray Tune. Thus, we can wrap
-# our model in ``nn.DataParallel`` to support data parallel training on
-# multiple GPUs:
+# continue to use PyTorch’s tools in Ray Tune. Thus, we can wrap our model
+# in ``nn.DataParallel`` to support data parallel training on multiple
+# GPUs:
 #
 # .. code-block:: python
 #
@@ -185,7 +174,7 @@ def forward(self, x):
 # GPUs. Notably, Ray also supports `fractional
 # GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`__
 # so we can share GPUs among trials, as long as the model still fits on
-# the GPU memory. We’ll come back to that later.
+# the GPU memory. We will return to that later.
 #
 # Communicating with Ray Tune
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -225,15 +214,11 @@ def forward(self, x):
 # enabling us to pause and resume training.
 #
 # To summarize, integrating Ray Tune into your PyTorch training requires
-# just a few key additions:
-#
-# - ``tune.report()`` to report metrics (and optionally checkpoints) to
-#   Ray Tune
-# - ``tune.get_checkpoint()`` to load a model from a checkpoint
-# - ``Checkpoint.from_directory()`` to create a checkpoint object from
-#   saved state
-#
-# The rest of your training code remains standard PyTorch!
+# just a few key additions: use ``tune.report()`` to report metrics (and
+# optionally checkpoints) to Ray Tune, ``tune.get_checkpoint()`` to load a
+# model from a checkpoint, and ``Checkpoint.from_directory()`` to create a
+# checkpoint object from saved state. The rest of your training code
+# remains standard PyTorch!
 #
 # Full training function
 # ~~~~~~~~~~~~~~~~~~~~~~
@@ -351,7 +336,7 @@ def train_cifar(config, data_dir=None):
 # -----------------
 #
 # Commonly the performance of a machine learning model is tested on a
-# hold-out test set with data that has not been used for training the
+# held-out test set with data that has not been used for training the
 # model. We also wrap this in a function:
 
 def test_accuracy(net, device="cpu"):
@@ -375,11 +360,11 @@ def test_accuracy(net, device="cpu"):
     return correct / total
 
 ######################################################################
-# The function also expects a ``device`` parameter, so we can do the test
+# The function also expects a ``device`` parameter so we can do the test
 # set validation on a GPU.
 #
-# Configuring the search space
-# ----------------------------
+# Search space configuration
+# --------------------------
 #
 # Lastly, we need to define Ray Tune’s search space. Here is an example:
 #
@@ -394,10 +379,9 @@ def test_accuracy(net, device="cpu"):
 #
 # The ``tune.choice()`` accepts a list of values that are uniformly
 # sampled from. In this example, the ``l1`` and ``l2`` parameters should
-# be powers of 2 between 4 and 256, so either 4, 8, 16, 32, 64, 128, or
-# 256. The ``lr`` (learning rate) should be uniformly sampled between
-# 0.0001 and 0.1. Lastly, the batch size is a choice between 2, 4, 8, and
-# 16.
+# be powers of 2 between 1 and 256: 1, 2, 4, 8, 16, 32, 64, 128, or 256.
+# The ``lr`` (learning rate) should be uniformly sampled between 0.0001
+# and 0.1. Lastly, the batch size is a choice between 2, 4, 8, and 16.
 #
 # For each trial, Ray Tune samples a combination of parameters from these
 # search spaces according to the search space configuration and search
@@ -439,13 +423,13 @@ def test_accuracy(net, device="cpu"):
 #    )
 #    results = tuner.fit()
 #
-# You can specify the number of CPUs, which are then available e.g. to
+# Specify the number of CPUs, which are then available, for example to
 # increase the ``num_workers`` of the PyTorch ``DataLoader`` instances.
 # The selected number of GPUs are made visible to PyTorch in each trial.
-# Trials do not have access to GPUs that haven’t been requested, so you
+# Trials do not have access to GPUs that have not been requested, so you
 # don’t need to worry about resource contention.
 #
-# You can also specify fractional GPUs (e.g., ``gpus_per_trial=0.5``),
+# You can specify fractional GPUs (for example, ``gpus_per_trial=0.5``),
 # which allows trials to share a GPU. Just ensure that the models fit
 # within the GPU memory.
 #
@@ -519,7 +503,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
-# If you run the code, an example output could look like this:
+# Your output will look something like this:
 #
 # .. code-block:: bash
 #
@@ -548,4 +532,4 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # performing trial achieved a validation accuracy of approximately 47%,
 # which could be confirmed on the test set.
 #
-# So that’s it! You can now tune the parameters of your PyTorch models.
+# You can now tune the parameters of your PyTorch models.

From e3604484eebd9c32e64e50936bebb7d64b05b0bc Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Wed, 10 Dec 2025 18:58:44 -0800
Subject: [PATCH 09/20] finalize the hyperparameter tuning tutorial

---
 .../hyperparameter_tuning_tutorial.py         | 255 +++++++++++-------
 ecosystem.rst                                 |   2 +-
 index.rst                                     |   2 +-
 3 files changed, 160 insertions(+), 99 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 203cd6d7dd2..19c939020b8 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -2,34 +2,38 @@
 Hyperparameter tuning with Ray Tune
 ===================================
 
-Hyperparameter tuning can make the difference between an average model
-and a highly accurate one. Often, simple decisions like choosing a
-different learning rate or changing a network layer size can
-dramatically impact model performance.
-
-This page shows how to integrate `Ray
-Tune <https://docs.ray.io/en/latest/tune.html>`__ into your PyTorch
-training workflow for distributed hyperparameter tuning. It extends the
-PyTorch tutorial for training a CIFAR10 image classifier in the `CIFAR10
-tutorial (PyTorch
+This tutorial shows how to integrate Ray Tune into your PyTorch training
+workflow to perform scalable and efficient hyperparameter tuning.
+
+`Ray <https://docs.ray.io/en/latest/index.html>`__, a project of the
+PyTorch Foundation, is an open-source unified framework for scaling AI
+and Python applications. It helps run distributed workloads by handling
+the complexity of distributed computing. `Ray
+Tune <https://docs.ray.io/en/latest/tune/index.html>`__ is a library
+built on Ray for hyperparameter tuning that enables you to scale a
+hyperparameter sweep from your machine to a large cluster with no code
+changes.
+
+This tutorial extends the PyTorch tutorial for training a CIFAR10 image
+classifier in the `CIFAR10 tutorial (PyTorch
 documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+Only minor modifications are needed to adapt the PyTorch tutorial for
+Ray Tune. Specifically, this tutorial wraps the data loading and
+training in functions, makes some network parameters configurable, adds
+optional checkpointing, and defines the search space for model tuning.
 
-Only minor modifications are needed. Specifically, this example wraps
-data loading and training in functions, makes some network parameters
-configurable, adds optional checkpointing, and defines the search space
-for model tuning.
+Setup
+-----
 
-To run this tutorial, install the following prerequisites:
+To run this tutorial, install the dependencies:
 
-- ``ray[tune]`` – Distributed hyperparameter tuning library
-- ``torchvision`` – Data transforms for computer vision datasets
-
-Setup and imports
------------------
+"""
 
-Let’s start with the imports:
+# %%bash
+# pip install "ray[tune]" torchvision
 
-"""
+######################################################################
+# Then start with the imports:
 
 from functools import partial
 import os
@@ -42,20 +46,18 @@
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
+# New: imports for Ray Tune
 import ray
 from ray import tune
 from ray.tune import Checkpoint
 from ray.tune.schedulers import ASHAScheduler
 
 ######################################################################
-# Most of the imports are needed for building the PyTorch model. Only the
-# last few are specific to Ray Tune.
-#
-# Data loaders
-# ------------
+# How to use PyTorch data loaders with Ray Tune
+# ---------------------------------------------
 #
-# We wrap the data loaders in a function and pass a global data directory.
-# This allows us to share a data directory across different trials.
+# Wrap the data loaders in a constructor function. Pass a global data
+# directory here to reuse the dataset across different trials.
 
 def load_data(data_dir="./data"):
     transform = transforms.Compose(
@@ -73,15 +75,15 @@ def load_data(data_dir="./data"):
     return trainset, testset
 
 ######################################################################
-# Configurable neural network
-# ---------------------------
+# Configure the hyperparameters
+# -----------------------------
 #
 # In this example, we specify the layer sizes of the fully connected
 # layers.
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
-        super(Net, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2d(3, 6, 5)
         self.pool = nn.MaxPool2d(2, 2)
         self.conv2 = nn.Conv2d(6, 16, 5)
@@ -99,12 +101,12 @@ def forward(self, x):
         return x
 
 ######################################################################
-# Train function
-# --------------
+# Use a train function with Ray Tune
+# ----------------------------------
 #
 # Now it gets interesting, because we introduce some changes to the
-# example from the `CIFAR10 tutorial (PyTorch
-# documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
+# example `from the PyTorch
+# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
 #
 # We wrap the training script in a function
 # ``train_cifar(config, data_dir=None)``. The ``config`` parameter
@@ -112,10 +114,10 @@ def forward(self, x):
 # specifies the directory where we load and store the data, allowing
 # multiple runs to share the same data source. This is especially useful
 # in cluster environments where you can mount shared storage (for example
-# NFS), preventing the data from being downloaded to each node separately.
+# NFS) to prevent the data from being downloaded to each node separately.
 # We also load the model and optimizer state at the start of the run if a
 # checkpoint is provided. Further down in this tutorial, you will find
-# information on how to save the checkpoint and what it is used for.
+# information on how to save the checkpoint and how it is used.
 #
 # .. code-block:: python
 #
@@ -143,12 +145,12 @@ def forward(self, x):
 # the remaining 20%. The batch sizes with which we iterate through the
 # training and test sets are configurable as well.
 #
-# Adding (multi) GPU support with DataParallel
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Add multi-GPU support with DataParallel
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# Image classification benefits largely from GPUs. Luckily, we can
-# continue to use PyTorch’s tools in Ray Tune. Thus, we can wrap our model
-# in ``nn.DataParallel`` to support data parallel training on multiple
+# Image classification benefits largely from GPUs. Luckily, you can
+# continue to use PyTorch tools in Ray Tune. Thus, you can wrap the model
+# in ``nn.DataParallel`` to support data-parallel training on multiple
 # GPUs:
 #
 # .. code-block:: python
@@ -206,7 +208,7 @@ def forward(self, x):
 # configuration and to stop underperforming trials early, saving
 # resources.
 #
-# The checkpoint saving is optional, however, it is necessary if we wanted
+# The checkpoint saving is optional. However, it is necessary if we wanted
 # to use advanced schedulers like `Population Based
 # Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__.
 # Saving the checkpoint also allows us to later load the trained models
@@ -218,7 +220,7 @@ def forward(self, x):
 # optionally checkpoints) to Ray Tune, ``tune.get_checkpoint()`` to load a
 # model from a checkpoint, and ``Checkpoint.from_directory()`` to create a
 # checkpoint object from saved state. The rest of your training code
-# remains standard PyTorch!
+# remains standard PyTorch.
 #
 # Full training function
 # ~~~~~~~~~~~~~~~~~~~~~~
@@ -332,8 +334,8 @@ def train_cifar(config, data_dir=None):
 # As you can see, most of the code is adapted directly from the original
 # example.
 #
-# Test set accuracy
-# -----------------
+# Compute test set accuracy
+# -------------------------
 #
 # Commonly the performance of a machine learning model is tested on a
 # held-out test set with data that has not been used for training the
@@ -360,58 +362,95 @@ def test_accuracy(net, device="cpu"):
     return correct / total
 
 ######################################################################
-# The function also expects a ``device`` parameter so we can do the test
+# The function also expects a ``device`` parameter so you can run the test
 # set validation on a GPU.
 #
-# Search space configuration
+# Configure the search space
 # --------------------------
 #
-# Lastly, we need to define Ray Tune’s search space. Here is an example:
+# Lastly, we need to define Ray Tune’s search space. Ray Tune offers a
+# variety of `search space
+# distributions <https://docs.ray.io/en/latest/tune/api/search_space.html>`__
+# to suit different parameter types: ``loguniform``, ``uniform``,
+# ``choice``, ``randint``, ``grid``, and more. It also lets you express
+# complex dependencies between parameters with `conditional search
+# spaces <https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html#how-to-use-custom-and-conditional-search-spaces-in-tune>`__.
+#
+# Here is an example:
 #
 # .. code-block:: python
 #
 #    config = {
-#        "l1": tune.choice([2 ** i for i in range(9)]),
-#        "l2": tune.choice([2 ** i for i in range(9)]),
+#        "l1": tune.choice([2**i for i in range(9)]),
+#        "l2": tune.choice([2**i for i in range(9)]),
 #        "lr": tune.loguniform(1e-4, 1e-1),
-#        "batch_size": tune.choice([2, 4, 8, 16])
+#        "batch_size": tune.choice([2, 4, 8, 16]),
 #    }
 #
 # The ``tune.choice()`` accepts a list of values that are uniformly
-# sampled from. In this example, the ``l1`` and ``l2`` parameters should
-# be powers of 2 between 1 and 256: 1, 2, 4, 8, 16, 32, 64, 128, or 256.
-# The ``lr`` (learning rate) should be uniformly sampled between 0.0001
-# and 0.1. Lastly, the batch size is a choice between 2, 4, 8, and 16.
-#
-# For each trial, Ray Tune samples a combination of parameters from these
-# search spaces according to the search space configuration and search
-# strategy. It then trains multiple models in parallel to identify the
-# best performing one.
-#
-# By default, Ray Tune uses random search to pick the next hyperparameter
-# configuration to try. However, Ray Tune also provides more sophisticated
-# search algorithms that can more efficiently navigate the search space,
-# such as
-# `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__,
-# `HyperOpt <https://docs.ray.io/en/latest/tune/api/suggestion.html#hyperopt>`__,
-# and `Bayesian
-# Optimization <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__.
-#
-# We use the ``ASHAScheduler`` to terminate underperforming trials early.
-#
-# We wrap the ``train_cifar`` function with ``functools.partial`` to set
-# the constant ``data_dir`` parameter. We can also tell Ray Tune what
-# resources should be available for each trial using
+# sampled from. In this example, the ``l1`` and ``l2`` parameter values
+# will be powers of 2 between 1 and 256. The learning rate is sampled on a
+# log scale between 0.0001 and 0.1. Sampling on a log scale ensures that
+# the search space is explored efficiently across different magnitudes.
+#
+# Smarter sampling and scheduling
+# -------------------------------
+#
+# To make the hyperparameter search process efficient, Ray Tune provides
+# two main controls:
+#
+# 1. It can intelligently pick the next set of hyperparameters to test
+#    based on previous results using `advanced search
+#    algorithms <https://docs.ray.io/en/latest/tune/api/suggestion.html>`__
+#    such as
+#    `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__
+#    or
+#    ```bayesopt`` <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__,
+#    instead of relying only on random or grid search.
+# 2. It can detect underperforming trials and stop them early using
+#    `schedulers <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`__,
+#    enabling you to explore the parameter space more on the same compute
+#    budget.
+#
+# In this tutorial, we use the ``ASHAScheduler``, which aggressively
+# terminates low-performing trials to save computational resources.
+#
+# Configure the resources
+# -----------------------
+#
+# Tell Ray Tune what resources should be available for each trial using
 # ``tune.with_resources``:
 #
 # .. code-block:: python
 #
-#    gpus_per_trial = 2
-#    # ...
+#    tune.with_resources(
+#        partial(train_cifar, data_dir=data_dir),
+#        resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
+#    )
+#
+# This tells Ray Tune to allocate ``cpus_per_trial`` CPUs and
+# ``gpus_per_trial`` GPUs for each trial. Ray Tune automatically manages
+# the placement of these trials and ensures they are isolated, so you
+# don’t need to manually assign GPUs to processes.
+#
+# For example, if you are running this experiment on a cluster of 20
+# machines, each with 8 GPUs, you can set ``gpus_per_trial = 0.5`` to
+# schedule 2 concurrent trials per GPU. This configuration runs 320 trials
+# in parallel across the cluster.
+#
+# Putting it together
+# -------------------
+#
+# The Ray Tune API is designed to be modular and composable: you pass your
+# configurations to the ``tune.Tuner`` class to create a tuner object,
+# then execute ``tuner.fit()`` to start training:
+#
+# .. code-block:: python
+#
 #    tuner = tune.Tuner(
 #        tune.with_resources(
 #            partial(train_cifar, data_dir=data_dir),
-#            resources={"cpu": 8, "gpu": gpus_per_trial}
+#            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
 #        ),
 #        tune_config=tune.TuneConfig(
 #            metric="loss",
@@ -423,24 +462,9 @@ def test_accuracy(net, device="cpu"):
 #    )
 #    results = tuner.fit()
 #
-# Specify the number of CPUs, which are then available, for example to
-# increase the ``num_workers`` of the PyTorch ``DataLoader`` instances.
-# The selected number of GPUs are made visible to PyTorch in each trial.
-# Trials do not have access to GPUs that have not been requested, so you
-# don’t need to worry about resource contention.
-#
-# You can specify fractional GPUs (for example, ``gpus_per_trial=0.5``),
-# which allows trials to share a GPU. Just ensure that the models fit
-# within the GPU memory.
-#
 # After training the models, we will find the best performing one and load
 # the trained network from the checkpoint file. We then obtain the test
-# set accuracy and report everything by printing.
-#
-# The full main function looks like this. Note that the
-# ``if __name__ == "__main__":`` block is configured for a quick run (1
-# trial, 1 epoch, CPU only) to verify that everything works. You should
-# increase these values to perform an actual hyperparameter tuning search.
+# set accuracy and report the results.
 
 def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print("Starting hyperparameter tuning.")
@@ -500,10 +524,11 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 
 if __name__ == "__main__":
     # Set the number of trials, epochs, and GPUs per trial here:
+    # The following configuration is for a quick run (1 trial, 1 epoch, CPU only) for demonstration purposes.
     main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
-# Your output will look something like this:
+# Your Ray Tune trial summary output will look something like this:
 #
 # .. code-block:: bash
 #
@@ -533,3 +558,39 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # which could be confirmed on the test set.
 #
 # You can now tune the parameters of your PyTorch models.
+#
+# Observability
+# -------------
+#
+# When running large-scale experiments, monitoring is crucial. Ray
+# provides a
+# `Dashboard <https://docs.ray.io/en/latest/ray-observability/getting-started.html>`__
+# that lets you view the status of your trials, check cluster resource
+# utilization, and inspect logs in real-time.
+#
+# For debugging, Ray also offers `Distributed
+# Debugging <https://docs.ray.io/en/latest/ray-observability/user-guides/debug-apps/ray-debugger.html>`__
+# tools that let you attach a debugger to running trials across the
+# cluster.
+#
+# Conclusion
+# ----------
+#
+# In this tutorial, you learned how to tune the hyperparameters of a
+# PyTorch model using Ray Tune. You saw how to integrate Ray Tune into
+# your PyTorch training loop, define a search space for your
+# hyperparameters, use an efficient scheduler like ASHA to terminate bad
+# trials early, save checkpoints and report metrics to Ray Tune, and run
+# the hyperparameter search and analyze the results.
+#
+# Ray Tune makes it easy to scale your experiments from a single machine
+# to a large cluster, helping you find the best model configuration
+# efficiently.
+#
+# Further reading
+# ---------------
+#
+# - `Ray Tune
+#   documentation <https://docs.ray.io/en/latest/tune/index.html>`__
+# - `Ray Tune
+#   examples <https://docs.ray.io/en/latest/tune/examples/index.html>`__
diff --git a/ecosystem.rst b/ecosystem.rst
index da2a926851a..ddd6c505561 100644
--- a/ecosystem.rst
+++ b/ecosystem.rst
@@ -33,7 +33,7 @@ to production deployment.
    :card_description: Learn how to use Ray Tune to find the best performing set of hyperparameters for your model.
    :image: _static/img/ray-tune.png
    :link: beginner/hyperparameter_tuning_tutorial.html
-   :tags: Model-Optimization,Best-Practice,Ecosystem
+   :tags: Model-Optimization,Best-Practice,Ecosystem,Ray-Distributed,Parallel-and-Distributed-Training
 
 .. customcarditem::
    :header: Multi-Objective Neural Architecture Search with Ax
diff --git a/index.rst b/index.rst
index 5a5e80abfbb..f9a76296750 100644
--- a/index.rst
+++ b/index.rst
@@ -493,7 +493,7 @@ Welcome to PyTorch Tutorials
    :card_description: Learn how to use Ray Tune to find the best performing set of hyperparameters for your model.
    :image: _static/img/ray-tune.png
    :link: beginner/hyperparameter_tuning_tutorial.html
-   :tags: Model-Optimization,Best-Practice
+   :tags: Model-Optimization,Best-Practice,Ray-Distributed,Parallel-and-Distributed-Training
 
 .. customcarditem::
    :header: Parametrizations Tutorial

From 90de3a93e41ad105343e7998d3c5bc2b09f52696 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <rdecal@anyscale.com>
Date: Thu, 11 Dec 2025 19:21:56 -0800
Subject: [PATCH 10/20] add author

---
 beginner_source/hyperparameter_tuning_tutorial.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 19c939020b8..c1798cca8ac 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -2,6 +2,8 @@
 Hyperparameter tuning with Ray Tune
 ===================================
 
+**Author:** `Ricardo Decal <https://github.com/crypdick>`_
+
 This tutorial shows how to integrate Ray Tune into your PyTorch training
 workflow to perform scalable and efficient hyperparameter tuning.
 

From 0ab1f75ecc975e03a180c6f798f3c1fc50ee5fe5 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 14:42:01 -0800
Subject: [PATCH 11/20] Ignore more IDE stuff

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ea478ca180d..9722d93505a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -126,8 +126,10 @@ cleanup.sh
 # PyTorch things
 *.pt
 
-# VSCode
+# IDEs
 *.vscode
+.devtools/
+.cursor
 
 # pyspelling
 dictionary.dic

From ce0bc81e5fd8acb5c10f49bae1aaf7a0de094010 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 14:46:55 -0800
Subject: [PATCH 12/20] make linter happy

---
 .gitignore                                        | 1 -
 beginner_source/hyperparameter_tuning_tutorial.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9722d93505a..67d6fefc303 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,4 +136,3 @@ dictionary.dic
 
 # linters
 /.lintbin
-
diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index c1798cca8ac..dd76a3258bb 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -571,7 +571,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # utilization, and inspect logs in real-time.
 #
 # For debugging, Ray also offers `Distributed
-# Debugging <https://docs.ray.io/en/latest/ray-observability/user-guides/debug-apps/ray-debugger.html>`__
+# Debugging <https://docs.ray.io/en/latest/ray-observability/index.html>`__
 # tools that let you attach a debugger to running trials across the
 # cluster.
 #

From 8c18e688835f495d0d31de81add0c575b487cb77 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 14:58:03 -0800
Subject: [PATCH 13/20] PR feedback

---
 .../hyperparameter_tuning_tutorial.py         | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index dd76a3258bb..ada184f42cd 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -62,6 +62,7 @@
 # directory here to reuse the dataset across different trials.
 
 def load_data(data_dir="./data"):
+    # Mean and standard deviation of the CIFAR10 training subset.
     transform = transforms.Compose(
         [transforms.ToTensor(), transforms.Normalize((0.4914, 0.48216, 0.44653), (0.2022, 0.19932, 0.20086))]
     )
@@ -157,12 +158,11 @@ def forward(self, x):
 #
 # .. code-block:: python
 #
-#    device = "cpu"
 #    if torch.cuda.is_available():
-#        device = "cuda:0"
+#        # Must move the model to CUDA before wrapping it with ``DataParallel``
+#        net = net.to("cuda")
 #        if torch.cuda.device_count() > 1:
 #            net = nn.DataParallel(net)
-#    net.to(device)
 #
 # By using a ``device`` variable, we ensure that training works even
 # without a GPU. PyTorch requires us to send our data to the GPU memory
@@ -232,12 +232,9 @@ def forward(self, x):
 def train_cifar(config, data_dir=None):
     net = Net(config["l1"], config["l2"])
 
-    device = "cpu"
-    if torch.cuda.is_available():
-        device = "cuda:0"
-        if torch.cuda.device_count() > 1:
-            net = nn.DataParallel(net)
-    net.to(device)
+    net = net.to(config["device"])
+    if torch.cuda.device_count() > 1:
+        net = nn.DataParallel(net)
 
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
@@ -474,11 +471,13 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     
     data_dir = os.path.abspath("./data")
     load_data(data_dir)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     config = {
         "l1": tune.choice([2**i for i in range(9)]),
         "l2": tune.choice([2**i for i in range(9)]),
         "lr": tune.loguniform(1e-4, 1e-1),
         "batch_size": tune.choice([2, 4, 8, 16]),
+        "device": device,
     }
     scheduler = ASHAScheduler(
         max_t=max_num_epochs,
@@ -507,12 +506,9 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")
 
     best_trained_model = Net(best_result.config["l1"], best_result.config["l2"])
-    device = "cpu"
-    if torch.cuda.is_available():
-        device = "cuda:0"
-        if gpus_per_trial > 1:
-            best_trained_model = nn.DataParallel(best_trained_model)
-    best_trained_model.to(device)
+    best_trained_model = best_trained_model.to(device)
+    if gpus_per_trial > 1:
+        best_trained_model = nn.DataParallel(best_trained_model)
 
     best_checkpoint = best_result.checkpoint
     with best_checkpoint.as_directory() as checkpoint_dir:

From e1e0ea16ad0d989af4fd41bfcf91d6fcd7419d75 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 15:00:44 -0800
Subject: [PATCH 14/20] PR feedback

---
 beginner_source/hyperparameter_tuning_tutorial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index ada184f42cd..062a8adda8d 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -143,10 +143,10 @@ def forward(self, x):
 #
 #    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 #
-# We also split the training data into a training and validation subset.
+# We also split the dataset into training and validation subsets.
 # We thus train on 80% of the data and calculate the validation loss on
 # the remaining 20%. The batch sizes with which we iterate through the
-# training and test sets are configurable as well.
+# training and test sets are configurable by Ray Tune.
 #
 # Add multi-GPU support with DataParallel
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From ffbb8cb00824d0b1c58c5331a370672c000b5609 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 17:47:59 -0800
Subject: [PATCH 15/20] fix device loading

---
 beginner_source/hyperparameter_tuning_tutorial.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 062a8adda8d..c97bda8272f 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -231,8 +231,9 @@ def forward(self, x):
 
 def train_cifar(config, data_dir=None):
     net = Net(config["l1"], config["l2"])
+    device = config["device"]
 
-    net = net.to(config["device"])
+    net = net.to(device)
     if torch.cuda.device_count() > 1:
         net = nn.DataParallel(net)
 
@@ -251,7 +252,7 @@ def train_cifar(config, data_dir=None):
     else:
         start_epoch = 0
 
-    trainset, testset = load_data(data_dir)
+    trainset, _testset = load_data(data_dir)
 
     test_abs = int(len(trainset) * 0.8)
     train_subset, val_subset = random_split(
@@ -341,7 +342,7 @@ def train_cifar(config, data_dir=None):
 # model. We also wrap this in a function:
 
 def test_accuracy(net, device="cpu"):
-    trainset, testset = load_data()
+    _trainset, testset = load_data()
 
     testloader = torch.utils.data.DataLoader(
         testset, batch_size=4, shuffle=False, num_workers=2
@@ -470,7 +471,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     ray.init()
     
     data_dir = os.path.abspath("./data")
-    load_data(data_dir)
+    load_data(data_dir)  # Pre-download the dataset
     device = "cuda" if torch.cuda.is_available() else "cpu"
     config = {
         "l1": tune.choice([2**i for i in range(9)]),

From 326952ec8201d29452d81077b713b092d18e0af3 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 18:32:46 -0800
Subject: [PATCH 16/20] turn the pip code block to markdown section

---
 beginner_source/hyperparameter_tuning_tutorial.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index c97bda8272f..cc29461520f 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -29,10 +29,11 @@
 
 To run this tutorial, install the dependencies:
 
-"""
+.. code-block:: bash
+
+   pip install "ray[tune]" torchvision
 
-# %%bash
-# pip install "ray[tune]" torchvision
+"""
 
 ######################################################################
 # Then start with the imports:

From a6b27bea7a6299395d0532777730b1a21ca8067a Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Mon, 15 Dec 2025 19:42:02 -0800
Subject: [PATCH 17/20] pr feedback and linting

---
 .../hyperparameter_tuning_tutorial.py         | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index cc29461520f..89c8c6c6640 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -2,7 +2,7 @@
 Hyperparameter tuning with Ray Tune
 ===================================
 
-**Author:** `Ricardo Decal <https://github.com/crypdick>`_
+**Author:** `Ricardo Decal <https://github.com/crypdick>`__
 
 This tutorial shows how to integrate Ray Tune into your PyTorch training
 workflow to perform scalable and efficient hyperparameter tuning.
@@ -57,7 +57,7 @@
 
 ######################################################################
 # How to use PyTorch data loaders with Ray Tune
-# ---------------------------------------------
+# =============================================
 #
 # Wrap the data loaders in a constructor function. Pass a global data
 # directory here to reuse the dataset across different trials.
@@ -80,10 +80,11 @@ def load_data(data_dir="./data"):
 
 ######################################################################
 # Configure the hyperparameters
-# -----------------------------
+# =============================
 #
-# In this example, we specify the layer sizes of the fully connected
-# layers.
+# In this tutorial, we will tune the sizes of the fully connected layers
+# and the learning rate. In order to do so, we need to expose the layer
+# sizes and the learning rate as configurable parameters.
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
@@ -106,7 +107,7 @@ def forward(self, x):
 
 ######################################################################
 # Use a train function with Ray Tune
-# ----------------------------------
+# ==================================
 #
 # Now it gets interesting, because we introduce some changes to the
 # example `from the PyTorch
@@ -144,13 +145,13 @@ def forward(self, x):
 #
 #    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 #
-# We also split the dataset into training and validation subsets.
-# We thus train on 80% of the data and calculate the validation loss on
-# the remaining 20%. The batch sizes with which we iterate through the
+# We also split the dataset into training and validation subsets. We thus
+# train on 80% of the data and calculate the validation loss on the
+# remaining 20%. The batch sizes with which we iterate through the
 # training and test sets are configurable by Ray Tune.
 #
 # Add multi-GPU support with DataParallel
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ---------------------------------------
 #
 # Image classification benefits largely from GPUs. Luckily, you can
 # continue to use PyTorch tools in Ray Tune. Thus, you can wrap the model
@@ -182,7 +183,7 @@ def forward(self, x):
 # the GPU memory. We will return to that later.
 #
 # Communicating with Ray Tune
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ---------------------------
 #
 # The most interesting part is the communication with Ray Tune. As you’ll
 # see, integrating Ray Tune into your training code requires only a few
@@ -226,7 +227,7 @@ def forward(self, x):
 # remains standard PyTorch.
 #
 # Full training function
-# ~~~~~~~~~~~~~~~~~~~~~~
+# ----------------------
 #
 # The full code example looks like this:
 
@@ -336,7 +337,7 @@ def train_cifar(config, data_dir=None):
 # example.
 #
 # Compute test set accuracy
-# -------------------------
+# =========================
 #
 # Commonly the performance of a machine learning model is tested on a
 # held-out test set with data that has not been used for training the
@@ -367,7 +368,7 @@ def test_accuracy(net, device="cpu"):
 # set validation on a GPU.
 #
 # Configure the search space
-# --------------------------
+# ==========================
 #
 # Lastly, we need to define Ray Tune’s search space. Ray Tune offers a
 # variety of `search space
@@ -395,7 +396,7 @@ def test_accuracy(net, device="cpu"):
 # the search space is explored efficiently across different magnitudes.
 #
 # Smarter sampling and scheduling
-# -------------------------------
+# ===============================
 #
 # To make the hyperparameter search process efficient, Ray Tune provides
 # two main controls:
@@ -406,7 +407,7 @@ def test_accuracy(net, device="cpu"):
 #    such as
 #    `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__
 #    or
-#    ```bayesopt`` <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__,
+#    `BayesOpt <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__,
 #    instead of relying only on random or grid search.
 # 2. It can detect underperforming trials and stop them early using
 #    `schedulers <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`__,
@@ -417,7 +418,7 @@ def test_accuracy(net, device="cpu"):
 # terminates low-performing trials to save computational resources.
 #
 # Configure the resources
-# -----------------------
+# =======================
 #
 # Tell Ray Tune what resources should be available for each trial using
 # ``tune.with_resources``:
@@ -436,11 +437,11 @@ def test_accuracy(net, device="cpu"):
 #
 # For example, if you are running this experiment on a cluster of 20
 # machines, each with 8 GPUs, you can set ``gpus_per_trial = 0.5`` to
-# schedule 2 concurrent trials per GPU. This configuration runs 320 trials
-# in parallel across the cluster.
+# schedule two concurrent trials per GPU. This configuration runs 320
+# trials in parallel across the cluster.
 #
 # Putting it together
-# -------------------
+# ===================
 #
 # The Ray Tune API is designed to be modular and composable: you pass your
 # configurations to the ``tune.Tuner`` class to create a tuner object,
@@ -560,7 +561,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # You can now tune the parameters of your PyTorch models.
 #
 # Observability
-# -------------
+# =============
 #
 # When running large-scale experiments, monitoring is crucial. Ray
 # provides a
@@ -568,13 +569,12 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # that lets you view the status of your trials, check cluster resource
 # utilization, and inspect logs in real-time.
 #
-# For debugging, Ray also offers `Distributed
-# Debugging <https://docs.ray.io/en/latest/ray-observability/index.html>`__
-# tools that let you attach a debugger to running trials across the
-# cluster.
+# For debugging, Ray also offers `distributed debugging
+# tools <https://docs.ray.io/en/latest/ray-observability/index.html>`__
+# that let you attach a debugger to running trials across the cluster.
 #
 # Conclusion
-# ----------
+# ==========
 #
 # In this tutorial, you learned how to tune the hyperparameters of a
 # PyTorch model using Ray Tune. You saw how to integrate Ray Tune into
@@ -588,7 +588,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # efficiently.
 #
 # Further reading
-# ---------------
+# ===============
 #
 # - `Ray Tune
 #   documentation <https://docs.ray.io/en/latest/tune/index.html>`__

From 60d7e3239c37f0793a06d9d14e9b8227aef66b42 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 16 Dec 2025 13:29:37 -0800
Subject: [PATCH 18/20] Restructure the hyperparameter tuning tutorial

---
 .../hyperparameter_tuning_tutorial.py         | 463 +++++++++---------
 1 file changed, 232 insertions(+), 231 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 89c8c6c6640..3bae0fbc4bb 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -1,6 +1,6 @@
 """
-Hyperparameter tuning with Ray Tune
-===================================
+Hyperparameter tuning using Ray Tune
+====================================
 
 **Author:** `Ricardo Decal <https://github.com/crypdick>`__
 
@@ -8,21 +8,21 @@
 workflow to perform scalable and efficient hyperparameter tuning.
 
 `Ray <https://docs.ray.io/en/latest/index.html>`__, a project of the
-PyTorch Foundation, is an open-source unified framework for scaling AI
-and Python applications. It helps run distributed workloads by handling
-the complexity of distributed computing. `Ray
+PyTorch Foundation, is an open source unified framework for scaling AI
+and Python applications. It helps run distributed jobs by handling the
+complexity of distributed computing. `Ray
 Tune <https://docs.ray.io/en/latest/tune/index.html>`__ is a library
 built on Ray for hyperparameter tuning that enables you to scale a
 hyperparameter sweep from your machine to a large cluster with no code
 changes.
 
-This tutorial extends the PyTorch tutorial for training a CIFAR10 image
-classifier in the `CIFAR10 tutorial (PyTorch
-documentation) <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
-Only minor modifications are needed to adapt the PyTorch tutorial for
-Ray Tune. Specifically, this tutorial wraps the data loading and
-training in functions, makes some network parameters configurable, adds
-optional checkpointing, and defines the search space for model tuning.
+This tutorial makes minor modifications to the `PyTorch tutorial for
+training a CIFAR10
+classifier <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__
+to adapt it for Ray Tune. Specifically, this tutorial wraps the data
+loading and training in functions, defines a search space for model
+tuning, exposes some parameters to make them configurable, adds optional
+checkpointing, and supports multi-GPU training.
 
 Setup
 -----
@@ -56,11 +56,14 @@
 from ray.tune.schedulers import ASHAScheduler
 
 ######################################################################
-# How to use PyTorch data loaders with Ray Tune
-# =============================================
+# Data loading
+# ============
 #
-# Wrap the data loaders in a constructor function. Pass a global data
-# directory here to reuse the dataset across different trials.
+# Wrap the data loaders in a constructor function. In this tutorial, a
+# global data directory is passed to the function to enable reusing the
+# dataset across different trials. In a cluster environment, you can use
+# shared storage, such as network file systems, to prevent each node from
+# downloading the data separately.
 
 def load_data(data_dir="./data"):
     # Mean and standard deviation of the CIFAR10 training subset.
@@ -79,12 +82,13 @@ def load_data(data_dir="./data"):
     return trainset, testset
 
 ######################################################################
-# Configure the hyperparameters
-# =============================
+# Model architecture
+# ==================
 #
-# In this tutorial, we will tune the sizes of the fully connected layers
-# and the learning rate. In order to do so, we need to expose the layer
-# sizes and the learning rate as configurable parameters.
+# This tutorial searches for the best sizes for the fully connected layers
+# and the learning rate. To enable this, the ``Net`` class exposes the
+# layer sizes ``l1`` and ``l2`` as configurable parameters that Ray Tune
+# can search over:
 
 class Net(nn.Module):
     def __init__(self, l1=120, l2=84):
@@ -106,130 +110,45 @@ def forward(self, x):
         return x
 
 ######################################################################
-# Use a train function with Ray Tune
-# ==================================
-#
-# Now it gets interesting, because we introduce some changes to the
-# example `from the PyTorch
-# documentation <https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html>`__.
-#
-# We wrap the training script in a function
-# ``train_cifar(config, data_dir=None)``. The ``config`` parameter
-# receives the hyperparameters we want to train with. The ``data_dir``
-# specifies the directory where we load and store the data, allowing
-# multiple runs to share the same data source. This is especially useful
-# in cluster environments where you can mount shared storage (for example
-# NFS) to prevent the data from being downloaded to each node separately.
-# We also load the model and optimizer state at the start of the run if a
-# checkpoint is provided. Further down in this tutorial, you will find
-# information on how to save the checkpoint and how it is used.
-#
-# .. code-block:: python
-#
-#    net = Net(config["l1"], config["l2"])
-#
-#    checkpoint = tune.get_checkpoint()
-#    if checkpoint:
-#        with checkpoint.as_directory() as checkpoint_dir:
-#            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
-#            checkpoint_state = torch.load(checkpoint_path)
-#            start_epoch = checkpoint_state["epoch"]
-#            net.load_state_dict(checkpoint_state["net_state_dict"])
-#            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
-#    else:
-#        start_epoch = 0
-#
-# The learning rate of the optimizer is made configurable, too:
-#
-# .. code-block:: python
-#
-#    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
-#
-# We also split the dataset into training and validation subsets. We thus
-# train on 80% of the data and calculate the validation loss on the
-# remaining 20%. The batch sizes with which we iterate through the
-# training and test sets are configurable by Ray Tune.
-#
-# Add multi-GPU support with DataParallel
-# ---------------------------------------
-#
-# Image classification benefits largely from GPUs. Luckily, you can
-# continue to use PyTorch tools in Ray Tune. Thus, you can wrap the model
-# in ``nn.DataParallel`` to support data-parallel training on multiple
-# GPUs:
-#
-# .. code-block:: python
-#
-#    if torch.cuda.is_available():
-#        # Must move the model to CUDA before wrapping it with ``DataParallel``
-#        net = net.to("cuda")
-#        if torch.cuda.device_count() > 1:
-#            net = nn.DataParallel(net)
-#
-# By using a ``device`` variable, we ensure that training works even
-# without a GPU. PyTorch requires us to send our data to the GPU memory
-# explicitly:
-#
-# .. code-block:: python
-#
-#    for i, data in enumerate(trainloader, 0):
-#        inputs, labels = data
-#        inputs, labels = inputs.to(device), labels.to(device)
-#
-# The code now supports training on CPUs, on a single GPU, and on multiple
-# GPUs. Notably, Ray also supports `fractional
-# GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`__
-# so we can share GPUs among trials, as long as the model still fits on
-# the GPU memory. We will return to that later.
+# Define the search space
+# =======================
 #
-# Communicating with Ray Tune
-# ---------------------------
+# Next, define the hyperparameters to tune and how Ray Tune samples them.
+# Ray Tune offers a variety of `search space
+# distributions <https://docs.ray.io/en/latest/tune/api/search_space.html>`__
+# to suit different parameter types: ``loguniform``, ``uniform``,
+# ``choice``, ``randint``, ``grid``, and more. You can also express
+# complex dependencies between parameters with `conditional search
+# spaces <https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html#how-to-use-custom-and-conditional-search-spaces-in-tune>`__
+# or sample from arbitrary functions.
 #
-# The most interesting part is the communication with Ray Tune. As you’ll
-# see, integrating Ray Tune into your training code requires only a few
-# additional lines:
+# Here is the search space for this tutorial:
 #
 # .. code-block:: python
 #
-#    checkpoint_data = {
-#        "epoch": epoch,
-#        "net_state_dict": net.state_dict(),
-#        "optimizer_state_dict": optimizer.state_dict(),
+#    config = {
+#        "l1": tune.choice([2**i for i in range(9)]),
+#        "l2": tune.choice([2**i for i in range(9)]),
+#        "lr": tune.loguniform(1e-4, 1e-1),
+#        "batch_size": tune.choice([2, 4, 8, 16]),
 #    }
-#    with tempfile.TemporaryDirectory() as checkpoint_dir:
-#        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
-#        torch.save(checkpoint_data, checkpoint_path)
 #
-#        checkpoint = Checkpoint.from_directory(checkpoint_dir)
-#        tune.report(
-#            {"loss": val_loss / val_steps, "accuracy": correct / total},
-#            checkpoint=checkpoint,
-#        )
+# The ``tune.choice()`` accepts a list of values that are uniformly
+# sampled from. In this example, the ``l1`` and ``l2`` parameter values
+# are powers of 2 between 1 and 256, and the learning rate samples on a
+# log scale between 0.0001 and 0.1. Sampling on a log scale enables
+# exploration across a range of magnitudes on a relative scale, rather
+# than an absolute scale.
 #
-# Here we first save a checkpoint and then report some metrics back to Ray
-# Tune. Specifically, we send the validation loss and accuracy back to Ray
-# Tune. Ray Tune uses these metrics to determine the best hyperparameter
-# configuration and to stop underperforming trials early, saving
-# resources.
+# Training function
+# =================
 #
-# The checkpoint saving is optional. However, it is necessary if we wanted
-# to use advanced schedulers like `Population Based
-# Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__.
-# Saving the checkpoint also allows us to later load the trained models
-# for validation on a test set. Lastly, it provides fault tolerance,
-# enabling us to pause and resume training.
-#
-# To summarize, integrating Ray Tune into your PyTorch training requires
-# just a few key additions: use ``tune.report()`` to report metrics (and
-# optionally checkpoints) to Ray Tune, ``tune.get_checkpoint()`` to load a
-# model from a checkpoint, and ``Checkpoint.from_directory()`` to create a
-# checkpoint object from saved state. The rest of your training code
-# remains standard PyTorch.
-#
-# Full training function
-# ----------------------
+# Ray Tune requires a training function that accepts a configuration
+# dictionary and runs the main training loop. As Ray Tune runs different
+# trials, it updates the configuration dictionary for each trial.
 #
-# The full code example looks like this:
+# Here is the full training function, followed by explanations of the key
+# Ray Tune integration points:
 
 def train_cifar(config, data_dir=None):
     net = Net(config["l1"], config["l2"])
@@ -333,18 +252,110 @@ def train_cifar(config, data_dir=None):
     print("Finished Training")
 
 ######################################################################
-# As you can see, most of the code is adapted directly from the original
-# example.
+# Key integration points
+# ----------------------
+#
+# Using hyperparameters from the configuration dictionary
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Ray Tune updates the ``config`` dictionary with the hyperparameters for
+# each trial. In this example, the model architecture and optimizer
+# receive the hyperparameters from the ``config`` dictionary:
 #
-# Compute test set accuracy
-# =========================
+# .. code-block:: python
+#
+#    net = Net(config["l1"], config["l2"])
+#    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
+#
+# Reporting metrics and saving checkpoints
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The most important integration is communicating with Ray Tune. Ray Tune
+# uses the validation metrics to determine the best hyperparameter
+# configuration and to stop underperforming trials early, saving
+# resources.
 #
-# Commonly the performance of a machine learning model is tested on a
-# held-out test set with data that has not been used for training the
-# model. We also wrap this in a function:
+# Checkpointing enables you to later load the trained models, resume
+# hyperparameter searches, and provides fault tolerance. It’s also
+# required for some Ray Tune schedulers like `Population Based
+# Training <https://docs.ray.io/en/latest/tune/examples/pbt_guide.html>`__
+# that pause and resume trials during the search.
+#
+# This code from the training function loads model and optimizer state at
+# the start if a checkpoint exists:
+#
+# .. code-block:: python
+#
+#    checkpoint = tune.get_checkpoint()
+#    if checkpoint:
+#        with checkpoint.as_directory() as checkpoint_dir:
+#            checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#            checkpoint_state = torch.load(checkpoint_path)
+#            start_epoch = checkpoint_state["epoch"]
+#            net.load_state_dict(checkpoint_state["net_state_dict"])
+#            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
+#
+# At the end of each epoch, save a checkpoint and report the validation
+# metrics:
+#
+# .. code-block:: python
+#
+#    checkpoint_data = {
+#        "epoch": epoch,
+#        "net_state_dict": net.state_dict(),
+#        "optimizer_state_dict": optimizer.state_dict(),
+#    }
+#    with tempfile.TemporaryDirectory() as checkpoint_dir:
+#        checkpoint_path = Path(checkpoint_dir) / "checkpoint.pt"
+#        torch.save(checkpoint_data, checkpoint_path)
+#
+#        checkpoint = Checkpoint.from_directory(checkpoint_dir)
+#        tune.report(
+#            {"loss": val_loss / val_steps, "accuracy": correct / total},
+#            checkpoint=checkpoint,
+#        )
+#
+# Ray Tune checkpointing supports local file systems, cloud storage, and
+# distributed file systems. For more information, see the `Ray Tune
+# storage
+# documentation <https://docs.ray.io/en/latest/tune/tutorials/tune-storage.html>`__.
+#
+# Multi-GPU support
+# ~~~~~~~~~~~~~~~~~
+#
+# Image classification models can be greatly accelerated by using GPUs.
+# The training function supports multi-GPU training by wrapping the model
+# in ``nn.DataParallel``:
+#
+# .. code-block:: python
+#
+#    if torch.cuda.device_count() > 1:
+#        net = nn.DataParallel(net)
+#
+# This training function supports training on CPUs, a single GPU, or
+# multiple GPUs without code changes. Ray Tune also supports `fractional
+# GPUs <https://docs.ray.io/en/latest/ray-core/scheduling/accelerators.html#fractional-accelerators>`__
+# so that one GPU can be shared among multiple trials, provided that the
+# models, optimizers, and data batches fit into the GPU memory.
+#
+# Validation split
+# ~~~~~~~~~~~~~~~~
+#
+# The original CIFAR10 dataset only has train and test subsets. This is
+# sufficient for training a single model, however for hyperparameter
+# tuning a validation subset is required. The training function creates a
+# validation subset by reserving 20% of the training subset. The test
+# subset is used to evaluate the best model’s generalization error after
+# the search completes.
+#
+# Evaluation function
+# ===================
+#
+# After finding the optimal hyperparameters, test the model on a held-out
+# test set to estimate the generalization error:
 
-def test_accuracy(net, device="cpu"):
-    _trainset, testset = load_data()
+def test_accuracy(net, device="cpu", data_dir=None):
+    _trainset, testset = load_data(data_dir)
 
     testloader = torch.utils.data.DataLoader(
         testset, batch_size=4, shuffle=False, num_workers=2
@@ -354,9 +365,9 @@ def test_accuracy(net, device="cpu"):
     total = 0
     with torch.no_grad():
         for data in testloader:
-            images, labels = data
-            images, labels = images.to(device), labels.to(device)
-            outputs = net(images)
+            image_batch, labels = data
+            image_batch, labels = image_batch.to(device), labels.to(device)
+            outputs = net(image_batch)
             _, predicted = torch.max(outputs.data, 1)
             total += labels.size(0)
             correct += (predicted == labels).sum().item()
@@ -364,109 +375,95 @@ def test_accuracy(net, device="cpu"):
     return correct / total
 
 ######################################################################
-# The function also expects a ``device`` parameter so you can run the test
-# set validation on a GPU.
-#
-# Configure the search space
+# Configure and run Ray Tune
 # ==========================
 #
-# Lastly, we need to define Ray Tune’s search space. Ray Tune offers a
-# variety of `search space
-# distributions <https://docs.ray.io/en/latest/tune/api/search_space.html>`__
-# to suit different parameter types: ``loguniform``, ``uniform``,
-# ``choice``, ``randint``, ``grid``, and more. It also lets you express
-# complex dependencies between parameters with `conditional search
-# spaces <https://docs.ray.io/en/latest/tune/tutorials/tune-search-spaces.html#how-to-use-custom-and-conditional-search-spaces-in-tune>`__.
+# With the training and evaluation functions defined, configure Ray Tune
+# to run the hyperparameter search.
+#
+# Scheduler for early stopping
+# ----------------------------
 #
-# Here is an example:
+# Ray Tune provides schedulers to improve the efficiency of the
+# hyperparameter search by detecting underperforming trials and stopping
+# them early. The ``ASHAScheduler`` uses the Asynchronous Successive
+# Halving Algorithm (ASHA) to aggressively terminate low-performing
+# trials:
 #
 # .. code-block:: python
 #
-#    config = {
-#        "l1": tune.choice([2**i for i in range(9)]),
-#        "l2": tune.choice([2**i for i in range(9)]),
-#        "lr": tune.loguniform(1e-4, 1e-1),
-#        "batch_size": tune.choice([2, 4, 8, 16]),
-#    }
+#    scheduler = ASHAScheduler(
+#        max_t=max_num_epochs,
+#        grace_period=1,
+#        reduction_factor=2,
+#    )
 #
-# The ``tune.choice()`` accepts a list of values that are uniformly
-# sampled from. In this example, the ``l1`` and ``l2`` parameter values
-# will be powers of 2 between 1 and 256. The learning rate is sampled on a
-# log scale between 0.0001 and 0.1. Sampling on a log scale ensures that
-# the search space is explored efficiently across different magnitudes.
-#
-# Smarter sampling and scheduling
-# ===============================
-#
-# To make the hyperparameter search process efficient, Ray Tune provides
-# two main controls:
-#
-# 1. It can intelligently pick the next set of hyperparameters to test
-#    based on previous results using `advanced search
-#    algorithms <https://docs.ray.io/en/latest/tune/api/suggestion.html>`__
-#    such as
-#    `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__
-#    or
-#    `BayesOpt <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__,
-#    instead of relying only on random or grid search.
-# 2. It can detect underperforming trials and stop them early using
-#    `schedulers <https://docs.ray.io/en/latest/tune/key-concepts.html#tune-schedulers>`__,
-#    enabling you to explore the parameter space more on the same compute
-#    budget.
-#
-# In this tutorial, we use the ``ASHAScheduler``, which aggressively
-# terminates low-performing trials to save computational resources.
-#
-# Configure the resources
-# =======================
+# Ray Tune also provides `advanced search
+# algorithms <https://docs.ray.io/en/latest/tune/api/suggestion.html>`__
+# to smartly pick the next set of hyperparameters based on previous
+# results, instead of relying only on random or grid search. Examples
+# include
+# `Optuna <https://docs.ray.io/en/latest/tune/api/suggestion.html#optuna>`__
+# and
+# `BayesOpt <https://docs.ray.io/en/latest/tune/api/suggestion.html#bayesopt>`__.
 #
-# Tell Ray Tune what resources should be available for each trial using
-# ``tune.with_resources``:
+# Resource allocation
+# -------------------
+#
+# Tell Ray Tune what resources to allocate for each trial by passing a
+# ``resources`` dictionary to ``tune.with_resources``:
 #
 # .. code-block:: python
 #
 #    tune.with_resources(
 #        partial(train_cifar, data_dir=data_dir),
-#        resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
+#        resources={"cpu": 2, "gpu": gpus_per_trial}
 #    )
 #
-# This tells Ray Tune to allocate ``cpus_per_trial`` CPUs and
-# ``gpus_per_trial`` GPUs for each trial. Ray Tune automatically manages
-# the placement of these trials and ensures they are isolated, so you
-# don’t need to manually assign GPUs to processes.
+# Ray Tune automatically manages the placement of these trials and ensures
+# that the trials run in isolation, so you don’t need to manually assign
+# GPUs to processes.
 #
 # For example, if you are running this experiment on a cluster of 20
 # machines, each with 8 GPUs, you can set ``gpus_per_trial = 0.5`` to
 # schedule two concurrent trials per GPU. This configuration runs 320
 # trials in parallel across the cluster.
 #
-# Putting it together
-# ===================
+#    **Note**: To run this tutorial without GPUs, set ``gpus_per_trial=0``
+#    and expect significantly longer runtimes.
+#
+#    To avoid long runtimes during development, start with a small number
+#    of trials and epochs.
+#
+# Creating the Tuner
+# ------------------
 #
-# The Ray Tune API is designed to be modular and composable: you pass your
-# configurations to the ``tune.Tuner`` class to create a tuner object,
-# then execute ``tuner.fit()`` to start training:
+# The Ray Tune API is modular and composable. Pass your configuration to
+# the ``tune.Tuner`` class to create a tuner object, then run
+# ``tuner.fit()`` to start training:
 #
 # .. code-block:: python
 #
 #    tuner = tune.Tuner(
 #        tune.with_resources(
 #            partial(train_cifar, data_dir=data_dir),
-#            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
+#            resources={"cpu": 2, "gpu": gpus_per_trial}
 #        ),
 #        tune_config=tune.TuneConfig(
 #            metric="loss",
 #            mode="min",
 #            scheduler=scheduler,
-#            num_samples=num_samples,
+#            num_samples=num_trials,
 #        ),
 #        param_space=config,
 #    )
 #    results = tuner.fit()
 #
-# After training the models, we will find the best performing one and load
-# the trained network from the checkpoint file. We then obtain the test
-# set accuracy and report the results.
+# After training completes, retrieve the best performing trial, load its
+# checkpoint, and evaluate on the test set.
+#
+# Putting it all together
+# -----------------------
 
 def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
     print("Starting hyperparameter tuning.")
@@ -519,17 +516,22 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
         best_checkpoint_data = torch.load(checkpoint_path)
 
         best_trained_model.load_state_dict(best_checkpoint_data["net_state_dict"])
-        test_acc = test_accuracy(best_trained_model, device)
-        print("Best trial test set accuracy: {}".format(test_acc))
+        test_acc = test_accuracy(best_trained_model, device, data_dir)
+        print(f"Best trial test set accuracy: {test_acc}")
 
 
 if __name__ == "__main__":
     # Set the number of trials, epochs, and GPUs per trial here:
-    # The following configuration is for a quick run (1 trial, 1 epoch, CPU only) for demonstration purposes.
+    # The following configuration uses 1 trial, 1 epoch, and CPU only for demonstration purposes.
     main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
 
 ######################################################################
-# Your Ray Tune trial summary output will look something like this:
+# Results
+# =======
+#
+# Your Ray Tune trial summary output looks something like this. The text
+# table summarizes the validation performance of the trials and highlights
+# the best hyperparameter configuration:
 #
 # .. code-block:: bash
 #
@@ -554,20 +556,18 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 #    Best trial final validation accuracy: 0.4761
 #    Best trial test set accuracy: 0.4737
 #
-# Most trials were stopped early to conserve resources. The best
-# performing trial achieved a validation accuracy of approximately 47%,
-# which could be confirmed on the test set.
-#
-# You can now tune the parameters of your PyTorch models.
+# Most trials stopped early to conserve resources. The best performing
+# trial achieved a validation accuracy of approximately 47%, which the
+# test set confirms.
 #
 # Observability
 # =============
 #
-# When running large-scale experiments, monitoring is crucial. Ray
+# Monitoring is critical when running large-scale experiments. Ray
 # provides a
-# `Dashboard <https://docs.ray.io/en/latest/ray-observability/getting-started.html>`__
+# `dashboard <https://docs.ray.io/en/latest/ray-observability/getting-started.html>`__
 # that lets you view the status of your trials, check cluster resource
-# utilization, and inspect logs in real-time.
+# use, and inspect logs in real time.
 #
 # For debugging, Ray also offers `distributed debugging
 # tools <https://docs.ray.io/en/latest/ray-observability/index.html>`__
@@ -579,13 +579,14 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 # In this tutorial, you learned how to tune the hyperparameters of a
 # PyTorch model using Ray Tune. You saw how to integrate Ray Tune into
 # your PyTorch training loop, define a search space for your
-# hyperparameters, use an efficient scheduler like ASHA to terminate bad
-# trials early, save checkpoints and report metrics to Ray Tune, and run
-# the hyperparameter search and analyze the results.
-#
-# Ray Tune makes it easy to scale your experiments from a single machine
-# to a large cluster, helping you find the best model configuration
-# efficiently.
+# hyperparameters, use an efficient scheduler like ASHAScheduler to
+# terminate low-performing trials early, save checkpoints and report
+# metrics to Ray Tune, and run the hyperparameter search and analyze the
+# results.
+#
+# Ray Tune makes it straightforward to scale your experiments from a
+# single machine to a large cluster, helping you find the best model
+# configuration efficiently.
 #
 # Further reading
 # ===============

From 025c44b44cc63dd46454082aef12f128f4b84428 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 16 Dec 2025 14:47:34 -0800
Subject: [PATCH 19/20] add GPUs back to default config

---
 beginner_source/hyperparameter_tuning_tutorial.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 3bae0fbc4bb..bfb44ee521b 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -465,7 +465,7 @@ def test_accuracy(net, device="cpu", data_dir=None):
 # Putting it all together
 # -----------------------
 
-def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
+def main(num_trials=10, max_num_epochs=10, gpus_per_trial=0):
     print("Starting hyperparameter tuning.")
     ray.init()
     
@@ -522,8 +522,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=2):
 
 if __name__ == "__main__":
     # Set the number of trials, epochs, and GPUs per trial here:
-    # The following configuration uses 1 trial, 1 epoch, and CPU only for demonstration purposes.
-    main(num_trials=1, max_num_epochs=1, gpus_per_trial=0)
+    main(num_trials=10, max_num_epochs=10, gpus_per_trial=1)
 
 ######################################################################
 # Results

From 94faa5397d6b0537cc4a24078a1fc02a51dd8a85 Mon Sep 17 00:00:00 2001
From: Ricardo Decal <public@ricardodecal.com>
Date: Tue, 16 Dec 2025 15:23:30 -0800
Subject: [PATCH 20/20] Expose cpus_per_trial for configuration

---
 beginner_source/hyperparameter_tuning_tutorial.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index bfb44ee521b..fbc214db92c 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -417,7 +417,7 @@ def test_accuracy(net, device="cpu", data_dir=None):
 #
 #    tune.with_resources(
 #        partial(train_cifar, data_dir=data_dir),
-#        resources={"cpu": 2, "gpu": gpus_per_trial}
+#        resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
 #    )
 #
 # Ray Tune automatically manages the placement of these trials and ensures
@@ -447,7 +447,7 @@ def test_accuracy(net, device="cpu", data_dir=None):
 #    tuner = tune.Tuner(
 #        tune.with_resources(
 #            partial(train_cifar, data_dir=data_dir),
-#            resources={"cpu": 2, "gpu": gpus_per_trial}
+#            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
 #        ),
 #        tune_config=tune.TuneConfig(
 #            metric="loss",
@@ -465,7 +465,7 @@ def test_accuracy(net, device="cpu", data_dir=None):
 # Putting it all together
 # -----------------------
 
-def main(num_trials=10, max_num_epochs=10, gpus_per_trial=0):
+def main(num_trials=10, max_num_epochs=10, gpus_per_trial=0, cpus_per_trial=2):
     print("Starting hyperparameter tuning.")
     ray.init()
     
@@ -488,7 +488,7 @@ def main(num_trials=10, max_num_epochs=10, gpus_per_trial=0):
     tuner = tune.Tuner(
         tune.with_resources(
             partial(train_cifar, data_dir=data_dir),
-            resources={"cpu": 2, "gpu": gpus_per_trial}
+            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
         ),
         tune_config=tune.TuneConfig(
             metric="loss",