From 25ef72329b19d3fd83cd335966e1000289a65875 Mon Sep 17 00:00:00 2001 From: Karthick Date: Tue, 16 Dec 2025 11:08:02 +0000 Subject: [PATCH 1/2] MODIS: Remove XML metadata dependency --- datasets/modis/Dockerfile | 11 ++++--- datasets/modis/README.md | 2 +- datasets/modis/dataset.yaml | 2 +- datasets/modis/misc.py | 18 ---------- datasets/modis/modis.py | 58 ++++++++++++++++++++++++--------- datasets/modis/requirements.txt | 2 +- 6 files changed, 51 insertions(+), 42 deletions(-) delete mode 100644 datasets/modis/misc.py diff --git a/datasets/modis/Dockerfile b/datasets/modis/Dockerfile index 17cd53099..76f9fe5dd 100644 --- a/datasets/modis/Dockerfile +++ b/datasets/modis/Dockerfile @@ -21,15 +21,16 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 # See https://github.com/mapbox/rasterio/issues/1289 ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt -# Install Python 3.8 -RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ - && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ - && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" +# Install Python via Miniforge (Mambaforge was deprecated) +RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \ + && bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \ + && rm -rf "Miniforge3-$(uname)-$(uname -m).sh" ENV PATH /opt/conda/bin:$PATH ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH -RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5 +# Install Python and GDAL with HDF4 support (required for MODIS .hdf files) +RUN mamba install -y -c conda-forge python=3.10 gdal libgdal-hdf4 pip setuptools cython numpy RUN python -m pip install --upgrade pip diff --git a/datasets/modis/README.md b/datasets/modis/README.md index 2d489db08..61f41a703 100644 --- a/datasets/modis/README.md +++ b/datasets/modis/README.md @@ -21,7 +21,7 @@ Note the force reinstall of `rasterio` in the Dockerfile is necessary for raster The update workflows were registered with ```shell -ls -1 datasets/modis/collection/ | xargs -I {} pctasks dataset process-items goes-update --is-update-workflow --dataset datasets/modis/dataset.yaml -u -c {} +ls -1 datasets/modis/collection/ | xargs -I {} bash -c 'echo y | pctasks dataset process-items goes-update --is-update-workflow --dataset datasets/modis/dataset.yaml -u -c {}' ``` diff --git a/datasets/modis/dataset.yaml b/datasets/modis/dataset.yaml index 86c0579f7..f31a44bab 100644 --- a/datasets/modis/dataset.yaml +++ b/datasets/modis/dataset.yaml @@ -1,5 +1,5 @@ id: modis -image: ${{ args.registry }}/pctasks-modis:2023.7.6.0 +image: ${{ args.registry }}/pctasks-modis:2025.12.15.0 args: - registry diff --git a/datasets/modis/misc.py b/datasets/modis/misc.py deleted file mode 100644 index ef94b030a..000000000 --- a/datasets/modis/misc.py +++ /dev/null @@ -1,18 +0,0 @@ -# Add back in the platform property which NASA removed from their XML on March 13 2024 -# On the MODIS side terra is distributed as MOD and aqua as MYD, -# but Within MPC both are distributed as MODxxx -def add_platform_field(item, href, logger): - if ("platform" not in item.properties) or (item.properties["platform"] == ""): - logger.debug("platform field missing, filling it in based on original xml href") - try: - if href.split('/')[4][0:3] == "MOD": - item.properties["platform"] = "terra" - elif href.split('/')[4][0:3] == "MYD": - item.properties["platform"] = "aqua" - elif href.split('/')[4][0:3] == "MCD": - item.properties["platform"] = "terra,aqua" - else: - logger.warning("href did not contain MOD/MYD/MCD in the usual spot") - except Exception as e: - logger.warning(f"href did not contain MOD/MYD/MCD in the usual spot, got error: {e}") - return item diff --git a/datasets/modis/modis.py b/datasets/modis/modis.py index bc52d258e..56735b5f4 100644 --- a/datasets/modis/modis.py +++ b/datasets/modis/modis.py @@ -6,10 +6,7 @@ import pystac import stactools.modis.cog import stactools.modis.stac -from azure.core.exceptions import ResourceNotFoundError -from stactools.core.utils.antimeridian import Strategy from stactools.modis.file import File -from misc import add_platform_field from pctasks.core.models.task import WaitTaskResult from pctasks.core.storage import StorageFactory @@ -25,6 +22,42 @@ COG_CONTAINER = "blob://modiseuwest/modis-061-cogs/" +# Add back in the platform property which NASA removed from their XML on March 13 2024 +# On the MODIS side terra is distributed as MOD and aqua as MYD, +# but Within MPC both are distributed as MODxxx +# Copied the method from misc.py and deleted the file +def add_platform_field(item, href, logger): + """ + add_platform_field # noqa: E501 + + Adds the platform field to a STAC item based on the HDF file href. + NASA removed this property from their XML metadata on March 13, 2024. + + :param item: The STAC item to update + :type item: pystac.Item + :param href: The href path containing MOD/MYD/MCD prefix + :type href: str + :param logger: Logger instance for debug/warning messages + :type logger: logging.Logger + :return: The updated STAC item with platform field + :rtype: pystac.Item + """ + if ("platform" not in item.properties) or (item.properties["platform"] == ""): + logger.debug("platform field missing, filling it in based on original xml href") + try: + if href.split('/')[4][0:3] == "MOD": + item.properties["platform"] = "terra" + elif href.split('/')[4][0:3] == "MYD": + item.properties["platform"] = "aqua" + elif href.split('/')[4][0:3] == "MCD": + item.properties["platform"] = "terra,aqua" + else: + logger.warning("href did not contain MOD/MYD/MCD in the usual spot") + except Exception as e: + logger.warning(f"href did not contain MOD/MYD/MCD in the usual spot, got error: {e}") + return item + + class MODISCollection(Collection): @classmethod def create_item( @@ -50,17 +83,9 @@ def create_item( file = File(os.path.join(temporary_directory, os.path.basename(asset_uri))) logger.debug(f"Downloading {asset_uri}") asset_storage.download_file(asset_path, file.hdf_href) - logger.debug(f"Downloading {asset_uri}.xml") - try: - asset_storage.download_file(f"{asset_path}.xml", file.xml_href) - except ResourceNotFoundError as e: - logger.warning(f"Missing XML file, skipping: {e}") - return [] logger.debug("Creating item") - item = stactools.modis.stac.create_item( - file.xml_href, antimeridian_strategy=Strategy.NORMALIZE - ) + item = stactools.modis.stac.create_item(file.hdf_href) if create_cogs: logger.debug(f"Adding COGS to item {item}") @@ -83,10 +108,11 @@ def create_item( file = File(asset_storage.get_url(asset_path)) logger.debug(f"Setting HDF asset href to {file.hdf_href}") item.assets["hdf"].href = file.hdf_href - logger.debug(f"Setting metadata asset href to {file.xml_href}") - item.assets["metadata"].href = file.xml_href - item.assets["metadata"].href = file.xml_href - item = add_platform_field(item, file.xml_href, logger) + # Remove metadata asset if it exists since XML files are no longer provided + if "metadata" in item.assets: + del item.assets["metadata"] + + item = add_platform_field(item, file.hdf_href, logger) return [item] diff --git a/datasets/modis/requirements.txt b/datasets/modis/requirements.txt index 5f8097612..df5ab480c 100644 --- a/datasets/modis/requirements.txt +++ b/datasets/modis/requirements.txt @@ -1 +1 @@ -git+https://github.com/stactools-packages/modis@419101223609805f9ac9d2a38401448a36331460 +git+https://github.com/stactools-packages/modis@8854ceb263907e7b42a183fb5a79476baace3219 From 5f6d598b8fb1713b9d8ede9a2013d63331255dc1 Mon Sep 17 00:00:00 2001 From: Karthick Date: Wed, 17 Dec 2025 14:16:01 +0000 Subject: [PATCH 2/2] minor update to readme --- datasets/modis/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datasets/modis/README.md b/datasets/modis/README.md index 61f41a703..e5c8a34fb 100644 --- a/datasets/modis/README.md +++ b/datasets/modis/README.md @@ -30,10 +30,12 @@ ls -1 datasets/modis/collection/ | xargs -I {} bash -c 'echo y | pctasks dataset ``` ls -1 datasets/modis/collection/ | \ - xargs -I {} pctasks dataset process-items update \ + xargs -I {} bash -c 'echo y | pctasks dataset process-items update \ -c {} \ --workflow-id {}-update \ --is-update-workflow \ --dataset datasets/modis/dataset.yaml \ - --upsert -``` \ No newline at end of file + --upsert' +``` + +After running the Dynamic Updates command, the ingestion workflow will start using the latest code changes. \ No newline at end of file