diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7889b69d..622c9626 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: # 6) Set up Python & install dependencies - uses: actions/setup-python@v5 - with: { python-version: "3.10" } + with: { python-version: "3.13" } - name: Install Python deps run: | pip install -e . diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 8074be85..a111e34f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -19,10 +19,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + - name: Set up Python 3.13 uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: "3.13" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/opto/features/priority_search/priority_search.py b/opto/features/priority_search/priority_search.py index 86bcbd60..ae72b100 100644 --- a/opto/features/priority_search/priority_search.py +++ b/opto/features/priority_search/priority_search.py @@ -61,9 +61,11 @@ def __eq__(self, other): def __lt__(self, other): """ Compare two candidates based on their update_dict. """ assert isinstance(other, ModuleCandidate), "other must be an instance of ModuleCandidate." - return self.created_time > other.created_time + # return self.created_time > other.created_time # self < other if, self is created later than other # Since we will use minheap, and this would give priority to later created candidates in the heap memory. + return self.num_rollouts < other.num_rollouts + # This would give priority to candidates with fewer rollouts in the heap memory for tie-breaking. def __hash__(self): """ Hash the candidate based on its update_dict. """ diff --git a/opto/features/priority_search/search_template.py b/opto/features/priority_search/search_template.py index ec244f74..616dd1ff 100644 --- a/opto/features/priority_search/search_template.py +++ b/opto/features/priority_search/search_template.py @@ -230,13 +230,13 @@ def train(self, train_scores.append(info_sample['mean_score']) # so that mean can be computed train_num_samples.append(info_sample['num_samples']) + self.n_samples += len(samples) # update the number of samples processed if self.n_iters % log_frequency == 0: avg_train_score = np.sum(np.array(train_scores) * np.array(train_num_samples)) / np.sum(train_num_samples) self.logger.log('Algo/Average train score', avg_train_score, self.n_iters, color='blue') self.log(info_update, prefix="Update/") self.log(info_sample, prefix="Sample/") - self.n_samples += len(samples) # update the number of samples processed self.logger.log('Algo/Number of training samples', self.n_samples, self.n_iters, color='blue') # Log parameters for p in self.agent.parameters():