From 08cc727cb4e51d1dd09a9381fa7c34de3c73548d Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 16:58:02 -0800
Subject: [PATCH 1/6] Add doc for nano-v3

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 docs/guides/nemotron-3-nano.md | 71 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 docs/guides/nemotron-3-nano.md

diff --git a/docs/guides/nemotron-3-nano.md b/docs/guides/nemotron-3-nano.md
new file mode 100644
index 0000000000..923afb26e3
--- /dev/null
+++ b/docs/guides/nemotron-3-nano.md
@@ -0,0 +1,71 @@
+# Nemtron 3 Nano
+
+This guide explains how to post-train the [Nemotron 3 Nano model](https://research.nvidia.com/labs/nemotron/files/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf) using NeMo RL.
+
+## Download and prepare the data
+
+```bash
+# Install Hugging Face CLI
+pip install huggingface-hub
+
+# Download RL data blend
+hf download nvidia/Nemotron-3-Nano-RL-Training-Blend --repo-type dataset --local-dir=data
+
+# Fill in placeholders in dataset
+chmod +x data/create_nanov3_jsonl.py
+./data/create_nanov3_jsonl.py --input data/train.jsonl --output data/train-full.jsonl
+
+# Use the last 1000 rows for validation
+head -n -1000 data/train-full.jsonl > data/train-split.jsonl
+tail -n 1000 data/train-full.jsonl > data/val-split.jsonl
+```
+
+## Prepare the code
+Note that we currently require using the `nano-v3` branch to train Nemotron 3 Nano.
+```bash
+# Checkout NeMo RL
+git clone -b nano-v3 https://github.com/NVIDIA-NeMo/RL.git
+cd RL
+
+# Initialize the submodules
+git submodule update --init --recursive
+```
+
+## Create a launch script
+
+Create a file named `launch.sh` with the following contents. Be sure to fill in the `DATA_DIR`, `MODEL_CHECKPOINT`, `WANDB_API_KEY`, `SLURM_ACCOUNT`, `SLURM_PARTITION`, `MOUNTS`. Note that the default recipe (`examples/nemo_gym/grpo_nanov3.yaml`) uses 32 nodes.
+
+```bash
+CODE_DIR=$PWD
+SLURM_JOB_NAME=nano-v3-rl-training
+
+# Fill these in
+DATA_DIR=...
+MODEL_CHECKPOINT=...
+WANDB_API_KEY=...
+SLURM_ACCOUNT=...
+SLURM_PARTITION=...
+MOUNTS=... # SRC:DST[,SRC:DST...] e.g., MOUNTS="/lustre:/lustre,/data:/data"
+
+CONTAINER="nvcr.io/nvidia/nemo-rl:v0.4.0.nemotron_3_nano"
+COMMAND="uv run examples/nemo_gym/run_grpo_nemo_gym.py --config examples/nemo_gym/grpo_nanov3.yaml data.train_jsonl_fpath=$DATA_DIR/train-split.jsonl data.validation_jsonl_fpath=$DATA_DIR/val-split.jsonl policy.model_name=$MODEL_CHECKPOINT logger.wandb_enabled=True"
+
+COMMAND="${COMMAND}" \
+CONTAINER="${CONTAINER}" \
+MOUNTS="${MOUNTS}" \
+WANDB_API_KEY=${WANDB_API_KEY} \
+sbatch \
+    --nodes=32 \
+    --account="${SLURM_ACCOUNT}" \
+    --job-name="${SLURM_JOB_NAME}" \
+    --partition="${SLURM_PARTITION}" \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
+
+## Launch training
+```bash
+bash launch.sh
+```

From ad96459d7807371658c7ba39e9a57908d24e4955 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 17:05:35 -0800
Subject: [PATCH 2/6] Add to index

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 docs/index.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/index.md b/docs/index.md
index 051893d618..0a6bf26623 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -216,6 +216,7 @@ guides/eval.md
 guides/deepseek.md
 model-quirks.md
 guides/async-grpo.md
+guides/nemotron-3-nano.md
 ```
 
 ```{toctree}

From a040a8f81b04ad3ad6cbd9b35096885c1d8ed0e7 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 17:24:33 -0800
Subject: [PATCH 3/6] Address comments

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 docs/guides/nemotron-3-nano.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/guides/nemotron-3-nano.md b/docs/guides/nemotron-3-nano.md
index 923afb26e3..23d493bb40 100644
--- a/docs/guides/nemotron-3-nano.md
+++ b/docs/guides/nemotron-3-nano.md
@@ -1,15 +1,12 @@
-# Nemtron 3 Nano
+# Nemotron 3 Nano
 
 This guide explains how to post-train the [Nemotron 3 Nano model](https://research.nvidia.com/labs/nemotron/files/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf) using NeMo RL.
 
 ## Download and prepare the data
 
 ```bash
-# Install Hugging Face CLI
-pip install huggingface-hub
-
 # Download RL data blend
-hf download nvidia/Nemotron-3-Nano-RL-Training-Blend --repo-type dataset --local-dir=data
+uvx --from huggingface-hub hf download nvidia/Nemotron-3-Nano-RL-Training-Blend --repo-type dataset --local-dir=data
 
 # Fill in placeholders in dataset
 chmod +x data/create_nanov3_jsonl.py

From 242d9bb4de945eb198f293240e2b5fff2667e306 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 17:28:50 -0800
Subject: [PATCH 4/6] Move nano guide higher in index

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index 0a6bf26623..18fd643104 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -203,6 +203,7 @@ guides/sft-openmathinstruct2.md
 :caption: Guides
 :hidden:
 
+guides/nemotron-3-nano.md
 adding-new-models.md
 guides/sft.md
 guides/dpo.md
@@ -216,7 +217,6 @@ guides/eval.md
 guides/deepseek.md
 model-quirks.md
 guides/async-grpo.md
-guides/nemotron-3-nano.md
 ```
 
 ```{toctree}

From 3d76bd080c81c9da1c999ad4a5b31c5b0183290e Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 17:33:12 -0800
Subject: [PATCH 5/6] Point to guide in README

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f68db216e9..169f8d0efa 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 </div>
 
 ## 📣 News
-* [12/15/2025] NeMo-RL is the framework that trained [NVIDIA-NeMotron-3-Nano-30B-A3B-FP8](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8)! [Reproducible code here](https://github.com/NVIDIA-NeMo/RL/tree/nano-v3)
+* [12/15/2025] NeMo-RL is the framework that trained [NVIDIA-NeMotron-3-Nano-30B-A3B-FP8](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8)! [Guide here](docs/guides/nemotron-3-nano.md)
 * [12/1/2025] [Release v0.4.0!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.4.0)
     * First release with official NGC Container [nvcr.io/nvidia/nemo-rl:v0.4.0](https://registry.ngc.nvidia.com/orgs/nvidia/containers/nemo-rl/tags).
     * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/1u5lmjHOsYpJqXaeYstjw7Qbzvbo67U0v?usp=sharing) to get a head start on your experimentation.

From 33e2d5fe7e648030bd5a710d7e860248bb19e029 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 23 Dec 2025 17:38:45 -0800
Subject: [PATCH 6/6] Update wording

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 169f8d0efa..a933709256 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 </div>
 
 ## 📣 News
-* [12/15/2025] NeMo-RL is the framework that trained [NVIDIA-NeMotron-3-Nano-30B-A3B-FP8](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8)! [Guide here](docs/guides/nemotron-3-nano.md)
+* [12/15/2025] NeMo-RL is the framework that trained [NVIDIA-NeMotron-3-Nano-30B-A3B-FP8](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8)! [This guide](docs/guides/nemotron-3-nano.md) provides reproducible instructions for the post-training process.
 * [12/1/2025] [Release v0.4.0!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.4.0)
     * First release with official NGC Container [nvcr.io/nvidia/nemo-rl:v0.4.0](https://registry.ngc.nvidia.com/orgs/nvidia/containers/nemo-rl/tags).
     * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/1u5lmjHOsYpJqXaeYstjw7Qbzvbo67U0v?usp=sharing) to get a head start on your experimentation.