AgentOpt · chinganc · Dec 3, 2025 · Dec 3, 2025 · Dec 3, 2025 · Dec 4, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -49,7 +49,7 @@ jobs:
 
     # 6) Set up Python & install dependencies
     - uses: actions/setup-python@v5
-      with: { python-version: "3.10" }
+      with: { python-version: "3.13" }
     - name: Install Python deps
       run: |
         pip install -e .

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -19,10 +19,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-    - name: Set up Python 3.10
+    - name: Set up Python 3.13
       uses: actions/setup-python@v3
       with:
-        python-version: "3.10"
+        python-version: "3.13"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/opto/features/priority_search/priority_search.py b/opto/features/priority_search/priority_search.py
@@ -61,9 +61,11 @@ def __eq__(self, other):
     def __lt__(self, other):
         """ Compare two candidates based on their update_dict. """
         assert isinstance(other, ModuleCandidate), "other must be an instance of ModuleCandidate."
-        return self.created_time > other.created_time
+        # return self.created_time > other.created_time
         # self < other if, self is created later than other
         # Since we will use minheap, and this would give priority to later created candidates in the heap memory.
+        return self.num_rollouts < other.num_rollouts
+        # This would give priority to candidates with fewer rollouts in the heap memory for tie-breaking.
 
     def __hash__(self):
         """ Hash the candidate based on its update_dict. """

diff --git a/opto/features/priority_search/search_template.py b/opto/features/priority_search/search_template.py
@@ -230,13 +230,13 @@ def train(self,
 
             train_scores.append(info_sample['mean_score'])  # so that mean can be computed
             train_num_samples.append(info_sample['num_samples'])
+            self.n_samples += len(samples)  # update the number of samples processed
 
             if self.n_iters % log_frequency == 0:
                 avg_train_score = np.sum(np.array(train_scores) * np.array(train_num_samples)) / np.sum(train_num_samples)
                 self.logger.log('Algo/Average train score', avg_train_score, self.n_iters, color='blue')
                 self.log(info_update, prefix="Update/")
                 self.log(info_sample, prefix="Sample/")
-                self.n_samples += len(samples)  # update the number of samples processed
                 self.logger.log('Algo/Number of training samples', self.n_samples, self.n_iters, color='blue')
                 # Log parameters
                 for p in self.agent.parameters():