From 1bdcf82d4561377842db2879d0e017bc3db63a12 Mon Sep 17 00:00:00 2001
From: Adnan Qureshi <adnanquresh262@gmail.com>
Date: Fri, 16 Jan 2026 17:30:37 +0530
Subject: [PATCH 1/4] feature(workflows): added a agentic workflow for sudoku

---
 trinity/common/workflows/__init__.py        |  2 +
 trinity/common/workflows/sudoku_workflow.py | 78 +++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 trinity/common/workflows/sudoku_workflow.py

diff --git a/trinity/common/workflows/__init__.py b/trinity/common/workflows/__init__.py
index ea7390b4a4..6086dba4c9 100644
--- a/trinity/common/workflows/__init__.py
+++ b/trinity/common/workflows/__init__.py
@@ -48,6 +48,8 @@
         # on-policy distillation workflows
         "on_policy_distill_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillWorkflow",
         "on_policy_distill_math_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillMathWorkflow",
+        # customed workflows
+        "sudoku_workflow": "trinity.common.workflows.sudoku_workflow.SudokuWorkflow",
     },
 )
 
diff --git a/trinity/common/workflows/sudoku_workflow.py b/trinity/common/workflows/sudoku_workflow.py
new file mode 100644
index 0000000000..d5605a8617
--- /dev/null
+++ b/trinity/common/workflows/sudoku_workflow.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+"""
+Sudoku Workflow for Trinity-RFT
+This workflow demonstrates a simple single-turn task where the model solves a Sudoku puzzle.
+"""
+
+from typing import List
+
+from trinity.common.experience import Experience
+from trinity.common.models.model import ModelWrapper
+from trinity.common.workflows import Task
+from trinity.common.workflows.workflow import Workflow
+
+
+class SudokuWorkflow(Workflow):
+    """
+    Workflow: SudokuWorkflow
+    Purpose: Ask the model to solve a Sudoku puzzle and give reward based on correctness.
+    """
+
+    # Workflow does not support reset or repeated runs for now.
+    can_reset: bool = False
+    can_repeat: bool = False
+
+    def __init__(self, task: Task, model: ModelWrapper, auxiliary_models=None):
+        """
+        Initialize workflow with:
+        - task.raw_task["puzzle"]
+        - task.raw_task["solution"]g
+        """
+
+        super().__init__(task=task, model=model, auxiliary_models=auxiliary_models)
+
+        # Extract puzzle input and ground truth
+        self.puzzle = task.raw_task.get("puzzle")
+        self.solution = task.raw_task.get("solution")
+
+        # Rollout arguments (e.g., temperature, n)
+        self.rollout_args = task.rollout_args
+
+    def calculate_reward(self, predicted: str) -> float:
+        """
+        Reward function:
+        Returns 1.0 if predicted output matches solution exactly, else 0.0.
+        """
+        return 1.0 if predicted.strip() == self.solution.strip() else 0.0
+
+    def run(self) -> List[Experience]:
+        """
+        Primary execution step of the workflow:
+        1. Send puzzle to model
+        2. Collect response
+        3. Evaluate with reward
+        4. Package into Experience list
+        """
+
+        responses = self.model.chat(
+            [
+                {
+                    "role": "user",
+                    "content": f"Solve this Sudoku puzzle:\n{self.puzzle}",
+                }
+            ],
+            temperature=self.rollout_args.temperature,
+        )
+
+        resp = responses[0]  # Single response
+        reward = self.calculate_reward(resp.response_text)
+
+        # Return experience in expected format
+        return [
+            Experience(
+                tokens=resp.tokens,
+                prompt_length=resp.prompt_length,
+                reward=reward,
+                logprobs=resp.logprobs,
+            )
+        ]

From fe7131641c307471f6689aaa49bc8ee4b3c0d708 Mon Sep 17 00:00:00 2001
From: Adnan Qureshi <adnanquresh262@gmail.com>
Date: Fri, 16 Jan 2026 19:35:21 +0530
Subject: [PATCH 2/4] feature: added Sudoku generator, and judge

---
 trinity/common/workflows/sudoku_generator.py |  33 ++++
 trinity/common/workflows/sudoku_judge.py     |  43 +++++
 trinity/common/workflows/sudoku_workflow.py  | 166 +++++++++++--------
 3 files changed, 176 insertions(+), 66 deletions(-)
 create mode 100644 trinity/common/workflows/sudoku_generator.py
 create mode 100644 trinity/common/workflows/sudoku_judge.py

diff --git a/trinity/common/workflows/sudoku_generator.py b/trinity/common/workflows/sudoku_generator.py
new file mode 100644
index 0000000000..bd4bfd1b62
--- /dev/null
+++ b/trinity/common/workflows/sudoku_generator.py
@@ -0,0 +1,33 @@
+import random
+
+
+class SudokuGenerator:
+    """
+    Very simple Sudoku generator.
+    - Uses a fixed solved grid
+    - Removes 'holes' positions to create a puzzle
+    """
+
+    BASE_SOLUTION = [
+        [5, 3, 4, 6, 7, 8, 9, 1, 2],
+        [6, 7, 2, 1, 9, 5, 3, 4, 8],
+        [1, 9, 8, 3, 4, 2, 5, 6, 7],
+        [8, 5, 9, 7, 6, 1, 4, 2, 3],
+        [4, 2, 6, 8, 5, 3, 7, 9, 1],
+        [7, 1, 3, 9, 2, 4, 8, 5, 6],
+        [9, 6, 1, 5, 3, 7, 2, 8, 4],
+        [2, 8, 7, 4, 1, 9, 6, 3, 5],
+        [3, 4, 5, 2, 8, 6, 1, 7, 9],
+    ]
+
+    def generate(self, holes=40):
+        """Return (puzzle, solution) tuple."""
+        solution = [row[:] for row in self.BASE_SOLUTION]
+        puzzle = [row[:] for row in solution]
+
+        for _ in range(holes):
+            r = random.randint(0, 8)
+            c = random.randint(0, 8)
+            puzzle[r][c] = 0
+
+        return puzzle, solution
diff --git a/trinity/common/workflows/sudoku_judge.py b/trinity/common/workflows/sudoku_judge.py
new file mode 100644
index 0000000000..9fee423710
--- /dev/null
+++ b/trinity/common/workflows/sudoku_judge.py
@@ -0,0 +1,43 @@
+class SudokuJudge:
+    """
+    Judge Sudoku board state.
+    - Checks row validity
+    - Checks column validity
+    - Checks 3x3 block validity
+    """
+
+    @staticmethod
+    def is_valid(board):
+        # Check rows
+        for row in board:
+            nums = [v for v in row if v != 0]
+            if len(nums) != len(set(nums)):
+                return False
+
+        # Check columns
+        for col in range(9):
+            nums = []
+            for row in range(9):
+                v = board[row][col]
+                if v != 0:
+                    nums.append(v)
+            if len(nums) != len(set(nums)):
+                return False
+
+        # Check 3x3 sub-grids
+        for br in range(0, 9, 3):
+            for bc in range(0, 9, 3):
+                nums = []
+                for r in range(br, br + 3):
+                    for c in range(bc, bc + 3):
+                        v = board[r][c]
+                        if v != 0:
+                            nums.append(v)
+                if len(nums) != len(set(nums)):
+                    return False
+
+        return True
+
+    @staticmethod
+    def is_solved(board, solution):
+        return board == solution
diff --git a/trinity/common/workflows/sudoku_workflow.py b/trinity/common/workflows/sudoku_workflow.py
index d5605a8617..5b99a4bb88 100644
--- a/trinity/common/workflows/sudoku_workflow.py
+++ b/trinity/common/workflows/sudoku_workflow.py
@@ -1,78 +1,112 @@
-# -*- coding: utf-8 -*-
-"""
-Sudoku Workflow for Trinity-RFT
-This workflow demonstrates a simple single-turn task where the model solves a Sudoku puzzle.
-"""
-
-from typing import List
-
 from trinity.common.experience import Experience
-from trinity.common.models.model import ModelWrapper
-from trinity.common.workflows import Task
 from trinity.common.workflows.workflow import Workflow
 
+from .sudoku_generator import SudokuGenerator
+from .sudoku_judge import SudokuJudge
+
 
 class SudokuWorkflow(Workflow):
     """
-    Workflow: SudokuWorkflow
-    Purpose: Ask the model to solve a Sudoku puzzle and give reward based on correctness.
+    Multi-step Sudoku solving workflow.
+    - Shows current puzzle board to model
+    - Model returns a move: "r c v"
+    - Workflow applies move
+    - Judge checks validity
+    - Continues for max_steps
     """
 
-    # Workflow does not support reset or repeated runs for now.
-    can_reset: bool = False
-    can_repeat: bool = False
-
-    def __init__(self, task: Task, model: ModelWrapper, auxiliary_models=None):
-        """
-        Initialize workflow with:
-        - task.raw_task["puzzle"]
-        - task.raw_task["solution"]g
-        """
+    can_reset = True
 
+    def __init__(self, task, model, auxiliary_models=None):
         super().__init__(task=task, model=model, auxiliary_models=auxiliary_models)
 
-        # Extract puzzle input and ground truth
-        self.puzzle = task.raw_task.get("puzzle")
-        self.solution = task.raw_task.get("solution")
-
-        # Rollout arguments (e.g., temperature, n)
-        self.rollout_args = task.rollout_args
-
-    def calculate_reward(self, predicted: str) -> float:
-        """
-        Reward function:
-        Returns 1.0 if predicted output matches solution exactly, else 0.0.
-        """
-        return 1.0 if predicted.strip() == self.solution.strip() else 0.0
-
-    def run(self) -> List[Experience]:
-        """
-        Primary execution step of the workflow:
-        1. Send puzzle to model
-        2. Collect response
-        3. Evaluate with reward
-        4. Package into Experience list
-        """
-
-        responses = self.model.chat(
-            [
-                {
-                    "role": "user",
-                    "content": f"Solve this Sudoku puzzle:\n{self.puzzle}",
-                }
-            ],
-            temperature=self.rollout_args.temperature,
-        )
-
-        resp = responses[0]  # Single response
-        reward = self.calculate_reward(resp.response_text)
-
-        # Return experience in expected format
-        return [
-            Experience(
-                tokens=resp.tokens,
-                prompt_length=resp.prompt_length,
-                reward=reward,
-                logprobs=resp.logprobs,
+        # If no dataset provided, generate puzzle
+        if "puzzle" in task.raw_task:
+            self.board = [row[:] for row in task.raw_task["puzzle"]]
+            self.solution = [row[:] for row in task.raw_task["solution"]]
+        else:
+            generator = SudokuGenerator()
+            self.board, self.solution = generator.generate()
+
+        self.judge = SudokuJudge()
+        self.max_steps = 20
+
+    def reset(self, task):
+        """Reset puzzle for new task."""
+        self.board = [row[:] for row in task.raw_task["puzzle"]]
+        self.solution = [row[:] for row in task.raw_task["solution"]]
+
+    def parse_action(self, text):
+        """Expected model output: 'row col value'"""
+        try:
+            parts = text.strip().split()
+            if len(parts) != 3:
+                return None
+            r, c, v = map(int, parts)
+            if not (0 <= r <= 8 and 0 <= c <= 8 and 1 <= v <= 9):
+                return None
+            return r, c, v
+        except Exception:
+            return None
+
+    def apply_move(self, r, c, v):
+        if self.board[r][c] == 0:
+            self.board[r][c] = v
+
+    def run(self):
+        experiences = []
+
+        for step in range(self.max_steps):
+            prompt = f"""
+Solve Sudoku by giving moves one at a time.
+Current board (0 = empty):
+
+{self.board}
+
+Respond ONLY with: row col value
+"""
+
+            # Call model
+            responses = self.model.chat([{"role": "user", "content": prompt}])
+            resp = responses[0]
+
+            action = self.parse_action(resp.response_text)
+            if action is None:
+                reward = -1.0
+                break
+
+            r, c, v = action
+            self.apply_move(r, c, v)
+
+            # Check validity
+            if not self.judge.is_valid(self.board):
+                reward = -1.0
+                break
+
+            # Check solved
+            if self.judge.is_solved(self.board, self.solution):
+                reward = 1.0
+                experiences.append(
+                    Experience(
+                        tokens=resp.tokens,
+                        prompt_length=resp.prompt_length,
+                        reward=reward,
+                        logprobs=resp.logprobs,
+                    )
+                )
+                break
+
+            # Neutral step reward
+            reward = 0.0
+
+            # Add experience
+            experiences.append(
+                Experience(
+                    tokens=resp.tokens,
+                    prompt_length=resp.prompt_length,
+                    reward=reward,
+                    logprobs=resp.logprobs,
+                )
             )
-        ]
+
+        return experiences

From 3b8796d8a3f750b78f80b8c4f2b71c8b764c2a05 Mon Sep 17 00:00:00 2001
From: Adnan Qureshi <adnanquresh262@gmail.com>
Date: Sat, 17 Jan 2026 19:00:34 +0530
Subject: [PATCH 3/4] feature(workflows): improve SudokuWorkflow prompt and
 generator

---
 trinity/common/workflows/__init__.py         |   2 +-
 trinity/common/workflows/sudoku_generator.py |  51 +++++++-
 trinity/common/workflows/sudoku_workflow.py  | 124 +++++++++++++++----
 3 files changed, 144 insertions(+), 33 deletions(-)

diff --git a/trinity/common/workflows/__init__.py b/trinity/common/workflows/__init__.py
index 6086dba4c9..ad38cb935c 100644
--- a/trinity/common/workflows/__init__.py
+++ b/trinity/common/workflows/__init__.py
@@ -48,7 +48,7 @@
         # on-policy distillation workflows
         "on_policy_distill_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillWorkflow",
         "on_policy_distill_math_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillMathWorkflow",
-        # customed workflows
+        # custom workflows
         "sudoku_workflow": "trinity.common.workflows.sudoku_workflow.SudokuWorkflow",
     },
 )
diff --git a/trinity/common/workflows/sudoku_generator.py b/trinity/common/workflows/sudoku_generator.py
index bd4bfd1b62..761268d6dc 100644
--- a/trinity/common/workflows/sudoku_generator.py
+++ b/trinity/common/workflows/sudoku_generator.py
@@ -3,9 +3,13 @@
 
 class SudokuGenerator:
     """
-    Very simple Sudoku generator.
-    - Uses a fixed solved grid
-    - Removes 'holes' positions to create a puzzle
+    Lightweight Sudoku generator.
+
+    This generator avoids relying on a single canonical solution by applying
+    randomized transformations to a solved grid before removing values to
+    create a puzzle. The difficulty is controlled by the number of removed
+    cells (holes).
+
     """
 
     BASE_SOLUTION = [
@@ -20,9 +24,46 @@ class SudokuGenerator:
         [3, 4, 5, 2, 8, 6, 1, 7, 9],
     ]
 
+    def _shuffle_solution(self, board):
+        """
+        Randomize a solved Sudoku grid while preserving validity.
+
+        This follows common Sudoku generation techniques:
+        - permuting numbers
+        - shuffling rows
+        - shuffling columns
+        """
+        board = [row[:] for row in board]
+
+        # Shuffle numbers 1–9
+        numbers = list(range(1, 10))
+        shuffled_numbers = numbers[:]
+        random.shuffle(shuffled_numbers)
+        mapping = dict(zip(numbers, shuffled_numbers))
+        board = [[mapping[v] for v in row] for row in board]
+
+        # Shuffle rows
+        random.shuffle(board)
+
+        # Shuffle columns
+        board = list(map(list, zip(*board)))
+        random.shuffle(board)
+        board = list(map(list, zip(*board)))
+
+        return board
+
     def generate(self, holes=40):
-        """Return (puzzle, solution) tuple."""
-        solution = [row[:] for row in self.BASE_SOLUTION]
+        """
+        Generate a Sudoku puzzle.
+
+        Args:
+            holes (int): Number of empty cells (0s) in the puzzle.
+                         Larger values correspond to higher difficulty.
+
+        Returns:
+            tuple: (puzzle, solution)
+        """
+        solution = self._shuffle_solution(self.BASE_SOLUTION)
         puzzle = [row[:] for row in solution]
 
         for _ in range(holes):
diff --git a/trinity/common/workflows/sudoku_workflow.py b/trinity/common/workflows/sudoku_workflow.py
index 5b99a4bb88..dcc1df28dd 100644
--- a/trinity/common/workflows/sudoku_workflow.py
+++ b/trinity/common/workflows/sudoku_workflow.py
@@ -1,3 +1,5 @@
+import re
+
 from trinity.common.experience import Experience
 from trinity.common.workflows.workflow import Workflow
 
@@ -8,11 +10,13 @@
 class SudokuWorkflow(Workflow):
     """
     Multi-step Sudoku solving workflow.
-    - Shows current puzzle board to model
-    - Model returns a move: "r c v"
-    - Workflow applies move
-    - Judge checks validity
-    - Continues for max_steps
+
+    This workflow follows a FrozenLake-style agentic interaction pattern:
+    - Maintains an internal environment state (Sudoku board)
+    - Interacts with the model step by step
+    - Provides explicit rules, task description, and strict output format
+    - Gives feedback on invalid or ineffective actions
+    - Terminates on success or failure
     """
 
     can_reset = True
@@ -20,8 +24,8 @@ class SudokuWorkflow(Workflow):
     def __init__(self, task, model, auxiliary_models=None):
         super().__init__(task=task, model=model, auxiliary_models=auxiliary_models)
 
-        # If no dataset provided, generate puzzle
-        if "puzzle" in task.raw_task:
+        # Initialize puzzle
+        if "puzzle" in task.raw_task and "solution" in task.raw_task:
             self.board = [row[:] for row in task.raw_task["puzzle"]]
             self.solution = [row[:] for row in task.raw_task["solution"]]
         else:
@@ -31,59 +35,124 @@ def __init__(self, task, model, auxiliary_models=None):
         self.judge = SudokuJudge()
         self.max_steps = 20
 
+        # State tracking (FrozenLake-style)
+        self.current_step = 0
+        self.last_board = None
+        self.last_action = None
+
     def reset(self, task):
-        """Reset puzzle for new task."""
+        """Reset the workflow state for a new task."""
         self.board = [row[:] for row in task.raw_task["puzzle"]]
         self.solution = [row[:] for row in task.raw_task["solution"]]
+        self.current_step = 0
+        self.last_board = None
+        self.last_action = None
+
+    def _build_prompt(self):
+        """
+        Build a detailed, step-aware prompt inspired by the Frozen Lake example.
+        """
+        prompt = (
+            "You are playing a Sudoku game.\n\n"
+            "Game Rules:\n"
+            "- The board is a 9x9 grid.\n"
+            "- A value of 0 represents an empty cell.\n"
+            "- Each row must contain the numbers 1 through 9 exactly once.\n"
+            "- Each column must contain the numbers 1 through 9 exactly once.\n"
+            "- Each 3x3 sub-grid must contain the numbers 1 through 9 exactly once.\n"
+            "- You may only place numbers in empty cells.\n\n"
+            "Task:\n"
+            "- At each step, output ONE valid move to progress toward solving the puzzle.\n\n"
+            "Output Format (STRICT):\n"
+            "```row col value```\n\n"
+            "Example:\n"
+            "```0 2 4```\n\n"
+            f"Current Step: {self.current_step}\n"
+            f"Remaining Steps: {self.max_steps - self.current_step}\n\n"
+            f"Current Board:\n{self.board}\n"
+        )
+
+        if self.last_board is not None and self.board == self.last_board:
+            prompt += (
+                "\nYour last response was invalid or had no effect. "
+                "Please recheck the Sudoku rules and the required output format."
+            )
+
+        return prompt
 
     def parse_action(self, text):
-        """Expected model output: 'row col value'"""
+        """
+        Parse model output.
+
+        Expected format:
+        ```row col value```
+        """
+        matches = re.findall(r"```(.*?)```", text, re.DOTALL)
+        if not matches:
+            return None
+
         try:
-            parts = text.strip().split()
+            parts = matches[-1].strip().split()
             if len(parts) != 3:
                 return None
+
             r, c, v = map(int, parts)
             if not (0 <= r <= 8 and 0 <= c <= 8 and 1 <= v <= 9):
                 return None
+
             return r, c, v
-        except Exception:
+        except ValueError:
             return None
 
     def apply_move(self, r, c, v):
+        """Apply a move to the board if the cell is empty."""
         if self.board[r][c] == 0:
             self.board[r][c] = v
 
     def run(self):
+        """
+        Execute the Sudoku workflow step by step.
+        """
         experiences = []
 
-        for step in range(self.max_steps):
-            prompt = f"""
-Solve Sudoku by giving moves one at a time.
-Current board (0 = empty):
-
-{self.board}
+        for _ in range(self.max_steps):
+            prompt = self._build_prompt()
 
-Respond ONLY with: row col value
-"""
-
-            # Call model
             responses = self.model.chat([{"role": "user", "content": prompt}])
             resp = responses[0]
 
+            self.last_board = [row[:] for row in self.board]
+
             action = self.parse_action(resp.response_text)
             if action is None:
                 reward = -1.0
+                experiences.append(
+                    Experience(
+                        tokens=resp.tokens,
+                        prompt_length=resp.prompt_length,
+                        reward=reward,
+                        logprobs=resp.logprobs,
+                    )
+                )
                 break
 
             r, c, v = action
             self.apply_move(r, c, v)
 
-            # Check validity
-            if not self.judge.is_valid(self.board):
+            # Invalid or ineffective action
+            if self.board == self.last_board or not self.judge.is_valid(self.board):
                 reward = -1.0
+                experiences.append(
+                    Experience(
+                        tokens=resp.tokens,
+                        prompt_length=resp.prompt_length,
+                        reward=reward,
+                        logprobs=resp.logprobs,
+                    )
+                )
                 break
 
-            # Check solved
+            # Solved
             if self.judge.is_solved(self.board, self.solution):
                 reward = 1.0
                 experiences.append(
@@ -96,10 +165,8 @@ def run(self):
                 )
                 break
 
-            # Neutral step reward
+            # Intermediate step
             reward = 0.0
-
-            # Add experience
             experiences.append(
                 Experience(
                     tokens=resp.tokens,
@@ -109,4 +176,7 @@ def run(self):
                 )
             )
 
+            self.last_action = action
+            self.current_step += 1
+
         return experiences

From bf1156ab69daa151e51ff74547f4180c5a44f2c5 Mon Sep 17 00:00:00 2001
From: Adnan Qureshi <adnanquresh262@gmail.com>
Date: Wed, 21 Jan 2026 02:33:18 +0530
Subject: [PATCH 4/4] improved sudoku workflow and generator

---
 trinity/common/workflows/sudoku_generator.py | 133 ++++++++++---------
 trinity/common/workflows/sudoku_workflow.py  | 129 ++++++++----------
 2 files changed, 120 insertions(+), 142 deletions(-)

diff --git a/trinity/common/workflows/sudoku_generator.py b/trinity/common/workflows/sudoku_generator.py
index 761268d6dc..bcca7e12cc 100644
--- a/trinity/common/workflows/sudoku_generator.py
+++ b/trinity/common/workflows/sudoku_generator.py
@@ -3,72 +3,73 @@
 
 class SudokuGenerator:
     """
-    Lightweight Sudoku generator.
-
-    This generator avoids relying on a single canonical solution by applying
-    randomized transformations to a solved grid before removing values to
-    create a puzzle. The difficulty is controlled by the number of removed
-    cells (holes).
+    Sudoku puzzle generator inspired by standard backtracking-based generators.
 
+    - Generates a fresh solved Sudoku board using backtracking
+    - Removes cells based on difficulty (number of empty cells)
+    - Avoids relying on a single canonical solution
     """
 
-    BASE_SOLUTION = [
-        [5, 3, 4, 6, 7, 8, 9, 1, 2],
-        [6, 7, 2, 1, 9, 5, 3, 4, 8],
-        [1, 9, 8, 3, 4, 2, 5, 6, 7],
-        [8, 5, 9, 7, 6, 1, 4, 2, 3],
-        [4, 2, 6, 8, 5, 3, 7, 9, 1],
-        [7, 1, 3, 9, 2, 4, 8, 5, 6],
-        [9, 6, 1, 5, 3, 7, 2, 8, 4],
-        [2, 8, 7, 4, 1, 9, 6, 3, 5],
-        [3, 4, 5, 2, 8, 6, 1, 7, 9],
-    ]
-
-    def _shuffle_solution(self, board):
-        """
-        Randomize a solved Sudoku grid while preserving validity.
-
-        This follows common Sudoku generation techniques:
-        - permuting numbers
-        - shuffling rows
-        - shuffling columns
-        """
-        board = [row[:] for row in board]
-
-        # Shuffle numbers 1–9
-        numbers = list(range(1, 10))
-        shuffled_numbers = numbers[:]
-        random.shuffle(shuffled_numbers)
-        mapping = dict(zip(numbers, shuffled_numbers))
-        board = [[mapping[v] for v in row] for row in board]
-
-        # Shuffle rows
-        random.shuffle(board)
-
-        # Shuffle columns
-        board = list(map(list, zip(*board)))
-        random.shuffle(board)
-        board = list(map(list, zip(*board)))
-
-        return board
-
-    def generate(self, holes=40):
-        """
-        Generate a Sudoku puzzle.
-
-        Args:
-            holes (int): Number of empty cells (0s) in the puzzle.
-                         Larger values correspond to higher difficulty.
-
-        Returns:
-            tuple: (puzzle, solution)
-        """
-        solution = self._shuffle_solution(self.BASE_SOLUTION)
-        puzzle = [row[:] for row in solution]
-
-        for _ in range(holes):
-            r = random.randint(0, 8)
-            c = random.randint(0, 8)
-            puzzle[r][c] = 0
-
-        return puzzle, solution
+    def generate(self, difficulty="medium"):
+        holes_map = {
+            "easy": 30,
+            "medium": 40,
+            "hard": 50,
+        }
+        holes = holes_map.get(difficulty, 40)
+
+        board = [[0 for _ in range(9)] for _ in range(9)]
+        self._fill_board(board)
+
+        solution = [row[:] for row in board]
+        self._remove_cells(board, holes)
+
+        return board, solution
+
+    def _fill_board(self, board):
+        empty = self._find_empty(board)
+        if not empty:
+            return True
+
+        r, c = empty
+        nums = list(range(1, 10))
+        random.shuffle(nums)
+
+        for v in nums:
+            if self._is_valid(board, r, c, v):
+                board[r][c] = v
+                if self._fill_board(board):
+                    return True
+                board[r][c] = 0
+
+        return False
+
+    def _find_empty(self, board):
+        for i in range(9):
+            for j in range(9):
+                if board[i][j] == 0:
+                    return i, j
+        return None
+
+    def _is_valid(self, board, r, c, v):
+        if v in board[r]:
+            return False
+
+        if v in [board[i][c] for i in range(9)]:
+            return False
+
+        br, bc = (r // 3) * 3, (c // 3) * 3
+        for i in range(br, br + 3):
+            for j in range(bc, bc + 3):
+                if board[i][j] == v:
+                    return False
+
+        return True
+
+    def _remove_cells(self, board, holes):
+        cells = [(i, j) for i in range(9) for j in range(9)]
+        random.shuffle(cells)
+
+        for i in range(min(holes, 81)):
+            r, c = cells[i]
+            board[r][c] = 0
diff --git a/trinity/common/workflows/sudoku_workflow.py b/trinity/common/workflows/sudoku_workflow.py
index dcc1df28dd..e65604ca30 100644
--- a/trinity/common/workflows/sudoku_workflow.py
+++ b/trinity/common/workflows/sudoku_workflow.py
@@ -1,5 +1,3 @@
-import re
-
 from trinity.common.experience import Experience
 from trinity.common.workflows.workflow import Workflow
 
@@ -8,23 +6,11 @@
 
 
 class SudokuWorkflow(Workflow):
-    """
-    Multi-step Sudoku solving workflow.
-
-    This workflow follows a FrozenLake-style agentic interaction pattern:
-    - Maintains an internal environment state (Sudoku board)
-    - Interacts with the model step by step
-    - Provides explicit rules, task description, and strict output format
-    - Gives feedback on invalid or ineffective actions
-    - Terminates on success or failure
-    """
-
     can_reset = True
 
     def __init__(self, task, model, auxiliary_models=None):
         super().__init__(task=task, model=model, auxiliary_models=auxiliary_models)
 
-        # Initialize puzzle
         if "puzzle" in task.raw_task and "solution" in task.raw_task:
             self.board = [row[:] for row in task.raw_task["puzzle"]]
             self.solution = [row[:] for row in task.raw_task["solution"]]
@@ -34,149 +20,140 @@ def __init__(self, task, model, auxiliary_models=None):
 
         self.judge = SudokuJudge()
         self.max_steps = 20
+        self.max_moves_per_step = 5
 
-        # State tracking (FrozenLake-style)
         self.current_step = 0
         self.last_board = None
         self.last_action = None
 
     def reset(self, task):
-        """Reset the workflow state for a new task."""
         self.board = [row[:] for row in task.raw_task["puzzle"]]
         self.solution = [row[:] for row in task.raw_task["solution"]]
         self.current_step = 0
         self.last_board = None
         self.last_action = None
 
+    def render_board(self):
+        return "\n".join(" ".join(str(v) for v in row) for row in self.board)
+
     def _build_prompt(self):
-        """
-        Build a detailed, step-aware prompt inspired by the Frozen Lake example.
-        """
         prompt = (
             "You are playing a Sudoku game.\n\n"
-            "Game Rules:\n"
-            "- The board is a 9x9 grid.\n"
-            "- A value of 0 represents an empty cell.\n"
-            "- Each row must contain the numbers 1 through 9 exactly once.\n"
-            "- Each column must contain the numbers 1 through 9 exactly once.\n"
-            "- Each 3x3 sub-grid must contain the numbers 1 through 9 exactly once.\n"
-            "- You may only place numbers in empty cells.\n\n"
+            "Rules:\n"
+            "- The board is 9x9.\n"
+            "- 0 means empty.\n"
+            "- Numbers 1–9 must appear exactly once in every row, column, and 3x3 block.\n"
+            "- You may only fill empty cells.\n\n"
             "Task:\n"
-            "- At each step, output ONE valid move to progress toward solving the puzzle.\n\n"
-            "Output Format (STRICT):\n"
-            "```row col value```\n\n"
+            "- In each step, output ONE OR MORE valid moves.\n"
+            f"- You may output up to {self.max_moves_per_step} moves per step.\n\n"
+            "Output format (STRICT):\n"
+            "row col value\n"
+            "row col value\n\n"
             "Example:\n"
-            "```0 2 4```\n\n"
-            f"Current Step: {self.current_step}\n"
-            f"Remaining Steps: {self.max_steps - self.current_step}\n\n"
-            f"Current Board:\n{self.board}\n"
+            "0 2 4\n"
+            "1 3 5\n\n"
+            f"Current step: {self.current_step}\n"
+            f"Remaining steps: {self.max_steps - self.current_step}\n\n"
+            f"Current board:\n{self.render_board()}\n"
         )
 
         if self.last_board is not None and self.board == self.last_board:
             prompt += (
-                "\nYour last response was invalid or had no effect. "
-                "Please recheck the Sudoku rules and the required output format."
+                "\nYour previous response was invalid or had no effect. "
+                "Please follow the rules and output format strictly."
             )
 
         return prompt
 
     def parse_action(self, text):
-        """
-        Parse model output.
-
-        Expected format:
-        ```row col value```
-        """
-        matches = re.findall(r"```(.*?)```", text, re.DOTALL)
-        if not matches:
-            return None
-
-        try:
-            parts = matches[-1].strip().split()
+        lines = text.strip().splitlines()
+        actions = []
+
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            parts = line.split()
             if len(parts) != 3:
                 return None
-
-            r, c, v = map(int, parts)
+            try:
+                r, c, v = map(int, parts)
+            except ValueError:
+                return None
             if not (0 <= r <= 8 and 0 <= c <= 8 and 1 <= v <= 9):
                 return None
+            actions.append((r, c, v))
 
-            return r, c, v
-        except ValueError:
+        if not actions or len(actions) > self.max_moves_per_step:
             return None
 
-    def apply_move(self, r, c, v):
-        """Apply a move to the board if the cell is empty."""
-        if self.board[r][c] == 0:
-            self.board[r][c] = v
+        return actions
 
     def run(self):
-        """
-        Execute the Sudoku workflow step by step.
-        """
         experiences = []
 
         for _ in range(self.max_steps):
             prompt = self._build_prompt()
-
             responses = self.model.chat([{"role": "user", "content": prompt}])
             resp = responses[0]
 
             self.last_board = [row[:] for row in self.board]
 
-            action = self.parse_action(resp.response_text)
-            if action is None:
-                reward = -1.0
+            actions = self.parse_action(resp.response_text)
+            if actions is None:
                 experiences.append(
                     Experience(
                         tokens=resp.tokens,
                         prompt_length=resp.prompt_length,
-                        reward=reward,
+                        reward=-1.0,
                         logprobs=resp.logprobs,
                     )
                 )
                 break
 
-            r, c, v = action
-            self.apply_move(r, c, v)
+            board_changed = False
+            invalid_move = False
+
+            for r, c, v in actions:
+                if self.board[r][c] != 0:
+                    invalid_move = True
+                    break
+                self.board[r][c] = v
+                board_changed = True
 
-            # Invalid or ineffective action
-            if self.board == self.last_board or not self.judge.is_valid(self.board):
-                reward = -1.0
+            if invalid_move or not board_changed or not self.judge.is_valid(self.board):
                 experiences.append(
                     Experience(
                         tokens=resp.tokens,
                         prompt_length=resp.prompt_length,
-                        reward=reward,
+                        reward=-1.0,
                         logprobs=resp.logprobs,
                     )
                 )
                 break
 
-            # Solved
             if self.judge.is_solved(self.board, self.solution):
-                reward = 1.0
                 experiences.append(
                     Experience(
                         tokens=resp.tokens,
                         prompt_length=resp.prompt_length,
-                        reward=reward,
+                        reward=1.0,
                         logprobs=resp.logprobs,
                     )
                 )
                 break
 
-            # Intermediate step
-            reward = 0.0
             experiences.append(
                 Experience(
                     tokens=resp.tokens,
                     prompt_length=resp.prompt_length,
-                    reward=reward,
+                    reward=0.0,
                     logprobs=resp.logprobs,
                 )
             )
 
-            self.last_action = action
+            self.last_action = actions
             self.current_step += 1
 
         return experiences