From 8f1ff3687562759e4e9c639cea052ec8aebbe60c Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 20 Jan 2026 15:30:06 -0800 Subject: [PATCH 1/2] Fix quiet dropping of S3 granules --- CHANGELOG.md | 1 + .../collection_manager/services/CollectionWatcher.py | 5 +++++ collection_manager/collection_manager/services/S3Observer.py | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65ca82d..28b55c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Deprecated ### Removed ### Fixed +- SDAP-539: Fixed issue where similar S3 paths could lead to granules being incorrectly and quietly ignored in very specific scenarios - Fixed issue with Collection Manager Docker build failing due to setuptools issue ### Security diff --git a/collection_manager/collection_manager/services/CollectionWatcher.py b/collection_manager/collection_manager/services/CollectionWatcher.py index 94863fd..fd9f73e 100644 --- a/collection_manager/collection_manager/services/CollectionWatcher.py +++ b/collection_manager/collection_manager/services/CollectionWatcher.py @@ -241,5 +241,10 @@ def _handle_event(self, event): else: modified_time = int(os.path.getmtime(path)) self._loop.create_task(self._callback(path, modified_time, collection)) + else: + logger.error(f'Event for file {path} will be discarded as it is not owned by the collection it ' + f'matched to: {collection.dataset_id}. This should not happen. Please report this ' + f'with the relevant logs and collection configuration to dev@sdap.apache.org or ' + f'https://issues.apache.org/jira/projects/SDAP/issues/') except IsADirectoryError: return diff --git a/collection_manager/collection_manager/services/S3Observer.py b/collection_manager/collection_manager/services/S3Observer.py index 5ee84d6..64bd03c 100644 --- a/collection_manager/collection_manager/services/S3Observer.py +++ b/collection_manager/collection_manager/services/S3Observer.py @@ -152,7 +152,7 @@ async def _get_s3_files(self, path: str): return new_cache def _get_object_key(full_path: str): - key = urlparse(full_path).path.strip("/") + key = urlparse(full_path).path.lstrip("/") return key From f0adadecc3c3067d550f3c560f8824a2893a2d77 Mon Sep 17 00:00:00 2001 From: rileykk Date: Tue, 20 Jan 2026 15:36:43 -0800 Subject: [PATCH 2/2] Add mismatch path to error log --- .../collection_manager/services/CollectionWatcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collection_manager/collection_manager/services/CollectionWatcher.py b/collection_manager/collection_manager/services/CollectionWatcher.py index fd9f73e..0c524a5 100644 --- a/collection_manager/collection_manager/services/CollectionWatcher.py +++ b/collection_manager/collection_manager/services/CollectionWatcher.py @@ -243,8 +243,8 @@ def _handle_event(self, event): self._loop.create_task(self._callback(path, modified_time, collection)) else: logger.error(f'Event for file {path} will be discarded as it is not owned by the collection it ' - f'matched to: {collection.dataset_id}. This should not happen. Please report this ' - f'with the relevant logs and collection configuration to dev@sdap.apache.org or ' - f'https://issues.apache.org/jira/projects/SDAP/issues/') + f'matched to: {collection.dataset_id} ({collection.path}). This should not happen. ' + f'Please report this with the relevant logs and collection configuration to ' + f'dev@sdap.apache.org or https://issues.apache.org/jira/projects/SDAP/issues/') except IsADirectoryError: return