From b7b6545c11cd6e624769464457ea145062b188a5 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Dec 2025 12:40:17 +0100 Subject: [PATCH 01/16] Add ImpactForecast --- climada/engine/__init__.py | 1 + climada/engine/impact_forecast.py | 70 +++++++++++++++++++++ climada/engine/test/test_impact.py | 40 ++++++------ climada/engine/test/test_impact_forecast.py | 49 +++++++++++++++ 4 files changed, 142 insertions(+), 18 deletions(-) diff --git a/climada/engine/__init__.py b/climada/engine/__init__.py index ef8292f75d..1970f7706b 100755 --- a/climada/engine/__init__.py +++ b/climada/engine/__init__.py @@ -22,3 +22,4 @@ from .cost_benefit import * from .impact import * from .impact_calc import * +from .impact_forecast import ImpactForecast diff --git a/climada/engine/impact_forecast.py b/climada/engine/impact_forecast.py index eb69b01b5d..4ff5279d97 100644 --- a/climada/engine/impact_forecast.py +++ b/climada/engine/impact_forecast.py @@ -18,3 +18,73 @@ Define Forecast variant of Impact. """ + +import logging + +import numpy as np + +from ..util import log_level +from ..util.forecast import Forecast +from .impact import Impact + +LOGGER = logging.getLogger(__name__) + + +class ImpactForecast(Forecast, Impact): + """An impact object with forecast information""" + + def __init__( + self, + *, + lead_time: np.ndarray | None, + member: np.ndarray | None, + **impact_kwargs, + ): + """Initialize the impact forecast. + + Parameters + ---------- + lead_time : np.ndarray, optional + The lead time associated with each event entry + member : np.ndarray, optional + The ensemble member associated with each event entry + impact_kwargs + Keyword-arguments passed to ~:py:class`climada.engine.impact.Impact`. + """ + # TODO: Maybe assert array lengths? + super().__init__(lead_time, member, **impact_kwargs) + + @classmethod + def from_impact( + cls, impact: Impact, lead_time: np.ndarray | None, member: np.ndarray | None + ): + """Create an impact forecast from an impact object and forecast information. + + Parameters + ---------- + impact : climada.engine.impact.Impact + The impact object whose data to use in the forecast object + lead_time : np.ndarray, optional + The lead time associated with each event entry + member : np.ndarray, optional + The ensemble member associated with each event entry + """ + with log_level("WARNING", "climada.engine.impact"): + return cls( + lead_time=lead_time, + member=member, + event_id=impact.event_id, + event_name=impact.event_name, + date=impact.date, + frequency=impact.frequency, + frequency_unit=impact.frequency_unit, + coord_exp=impact.coord_exp, + crs=impact.crs, + eai_exp=impact.eai_exp, + at_event=impact.at_event, + tot_value=impact.tot_value, + aai_agg=impact.aai_agg, + unit=impact.unit, + imp_mat=impact.imp_mat, + haz_type=impact.haz_type, + ) diff --git a/climada/engine/test/test_impact.py b/climada/engine/test/test_impact.py index 38b3def3d8..f91fc2de98 100644 --- a/climada/engine/test/test_impact.py +++ b/climada/engine/test/test_impact.py @@ -47,26 +47,30 @@ STR_DT = h5py.special_dtype(vlen=str) -def dummy_impact(): - """Return an impact object for testing""" - return Impact( - event_id=np.arange(6) + 10, - event_name=[0, 1, "two", "three", 30, 31], - date=np.arange(6), - coord_exp=np.array([[1, 2], [1.5, 2.5]]), - crs=DEF_CRS, - eai_exp=np.array([7.2, 7.2]), - at_event=np.array([0, 2, 4, 6, 60, 62]), - frequency=np.array([1 / 6, 1 / 6, 1, 1, 1 / 30, 1 / 30]), - tot_value=7, - aai_agg=14.4, - unit="USD", - frequency_unit="1/month", - imp_mat=sparse.csr_matrix( +def impact_kwargs(): + return { + "event_id": np.arange(6) + 10, + "event_name": [0, 1, "two", "three", 30, 31], + "date": np.arange(6), + "coord_exp": np.array([[1, 2], [1.5, 2.5]]), + "crs": DEF_CRS, + "eai_exp": np.array([7.2, 7.2]), + "at_event": np.array([0, 2, 4, 6, 60, 62]), + "frequency": np.array([1 / 6, 1 / 6, 1, 1, 1 / 30, 1 / 30]), + "tot_value": 7, + "aai_agg": 14.4, + "unit": "USD", + "frequency_unit": "1/month", + "imp_mat": sparse.csr_matrix( np.array([[0, 0], [1, 1], [2, 2], [3, 3], [30, 30], [31, 31]]) ), - haz_type="TC", - ) + "haz_type": "TC", + } + + +def dummy_impact(): + """Return an impact object for testing""" + return Impact(**impact_kwargs()) def dummy_impact_yearly(): diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index 8fe6fa72d4..2ba30d9f94 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -18,3 +18,52 @@ Tests for Impact Forecast. """ + +import numpy as np +import numpy.testing as npt +import pandas as pd +import pytest +from scipy.sparse import csr_matrix + +from climada.engine import Impact, ImpactForecast + +from .test_impact import impact_kwargs + + +@pytest.fixture +def impact(): + return Impact(**impact_kwargs()) + + +def assert_impact_kwargs(impact: Impact, **kwargs): + for key, value in kwargs.items(): + attr = getattr(impact, key) + if isinstance(value, (np.ndarray, list)): + npt.assert_array_equal(attr, value) + elif isinstance(value, csr_matrix): + npt.assert_array_equal(attr.todense(), value.todense()) + else: + assert attr == value + + +class TestImpactForecastInit: + lead_time = pd.date_range("2000-01-01", "2000-01-02", periods=6).to_numpy() + member = np.arange(6) + + def test_impact_forecast_init(self): + forecast1 = ImpactForecast( + lead_time=self.lead_time, + member=self.member, + **impact_kwargs(), + ) + npt.assert_array_equal(forecast1.lead_time, self.lead_time) + npt.assert_array_equal(forecast1.member, self.member) + assert_impact_kwargs(forecast1, **impact_kwargs()) + + def test_impact_forecast_from_impact(self, impact): + forecast = ImpactForecast.from_impact( + impact, lead_time=self.lead_time, member=self.member + ) + npt.assert_array_equal(forecast.lead_time, self.lead_time) + npt.assert_array_equal(forecast.member, self.member) + assert_impact_kwargs(forecast, **impact_kwargs()) From 3b077a0cb0cd6506d399083dd8b56f6f684ff4f1 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Dec 2025 12:44:37 +0100 Subject: [PATCH 02/16] Add fixture for impact kwargs --- climada/engine/test/test_impact_forecast.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index 2ba30d9f94..20f5420ae3 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -27,12 +27,17 @@ from climada.engine import Impact, ImpactForecast -from .test_impact import impact_kwargs +from .test_impact import impact_kwargs as imp_kwargs @pytest.fixture -def impact(): - return Impact(**impact_kwargs()) +def impact_kwargs(): + return imp_kwargs() + + +@pytest.fixture +def impact(impact_kwargs): + return Impact(**impact_kwargs) def assert_impact_kwargs(impact: Impact, **kwargs): @@ -50,20 +55,20 @@ class TestImpactForecastInit: lead_time = pd.date_range("2000-01-01", "2000-01-02", periods=6).to_numpy() member = np.arange(6) - def test_impact_forecast_init(self): + def test_impact_forecast_init(self, impact_kwargs): forecast1 = ImpactForecast( lead_time=self.lead_time, member=self.member, - **impact_kwargs(), + **impact_kwargs, ) npt.assert_array_equal(forecast1.lead_time, self.lead_time) npt.assert_array_equal(forecast1.member, self.member) - assert_impact_kwargs(forecast1, **impact_kwargs()) + assert_impact_kwargs(forecast1, **impact_kwargs) - def test_impact_forecast_from_impact(self, impact): + def test_impact_forecast_from_impact(self, impact, impact_kwargs): forecast = ImpactForecast.from_impact( impact, lead_time=self.lead_time, member=self.member ) npt.assert_array_equal(forecast.lead_time, self.lead_time) npt.assert_array_equal(forecast.member, self.member) - assert_impact_kwargs(forecast, **impact_kwargs()) + assert_impact_kwargs(forecast, **impact_kwargs) From 44bf09a4ad2316cd7be7ec0ea686f6b3381f284f Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Dec 2025 12:55:42 +0100 Subject: [PATCH 03/16] Use kwarg assignment in ImpactForecast.__init__ --- climada/engine/impact_forecast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/engine/impact_forecast.py b/climada/engine/impact_forecast.py index 4ff5279d97..d534e32ead 100644 --- a/climada/engine/impact_forecast.py +++ b/climada/engine/impact_forecast.py @@ -52,7 +52,7 @@ def __init__( Keyword-arguments passed to ~:py:class`climada.engine.impact.Impact`. """ # TODO: Maybe assert array lengths? - super().__init__(lead_time, member, **impact_kwargs) + super().__init__(lead_time=lead_time, member=member, **impact_kwargs) @classmethod def from_impact( From 347fcf1877f7e0b38977f6bc498e9cc68c8bb2ee Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Dec 2025 13:08:40 +0100 Subject: [PATCH 04/16] Add test for ImpactForecast.select --- climada/engine/test/test_impact_forecast.py | 73 +++++++++++++-------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index 20f5420ae3..65cb72d389 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -40,35 +40,56 @@ def impact(impact_kwargs): return Impact(**impact_kwargs) -def assert_impact_kwargs(impact: Impact, **kwargs): - for key, value in kwargs.items(): - attr = getattr(impact, key) - if isinstance(value, (np.ndarray, list)): - npt.assert_array_equal(attr, value) - elif isinstance(value, csr_matrix): - npt.assert_array_equal(attr.todense(), value.todense()) - else: - assert attr == value +@pytest.fixture +def lead_time(): + return pd.date_range("2000-01-01", "2000-01-02", periods=6).to_numpy() -class TestImpactForecastInit: - lead_time = pd.date_range("2000-01-01", "2000-01-02", periods=6).to_numpy() - member = np.arange(6) +@pytest.fixture +def member(): + return np.arange(6) + + +@pytest.fixture +def impact_forecast(impact, lead_time, member): + return ImpactForecast.from_impact(impact, lead_time=lead_time, member=member) + - def test_impact_forecast_init(self, impact_kwargs): +class TestImpactForecastInit: + def assert_impact_kwargs(self, impact: Impact, **kwargs): + for key, value in kwargs.items(): + attr = getattr(impact, key) + if isinstance(value, (np.ndarray, list)): + npt.assert_array_equal(attr, value) + elif isinstance(value, csr_matrix): + npt.assert_array_equal(attr.todense(), value.todense()) + else: + assert attr == value + + def test_impact_forecast_init(self, impact_kwargs, lead_time, member): forecast1 = ImpactForecast( - lead_time=self.lead_time, - member=self.member, + lead_time=lead_time, + member=member, **impact_kwargs, ) - npt.assert_array_equal(forecast1.lead_time, self.lead_time) - npt.assert_array_equal(forecast1.member, self.member) - assert_impact_kwargs(forecast1, **impact_kwargs) - - def test_impact_forecast_from_impact(self, impact, impact_kwargs): - forecast = ImpactForecast.from_impact( - impact, lead_time=self.lead_time, member=self.member - ) - npt.assert_array_equal(forecast.lead_time, self.lead_time) - npt.assert_array_equal(forecast.member, self.member) - assert_impact_kwargs(forecast, **impact_kwargs) + npt.assert_array_equal(forecast1.lead_time, lead_time) + npt.assert_array_equal(forecast1.member, member) + self.assert_impact_kwargs(forecast1, **impact_kwargs) + + def test_impact_forecast_from_impact( + self, impact_forecast, impact_kwargs, lead_time, member + ): + npt.assert_array_equal(impact_forecast.lead_time, lead_time) + npt.assert_array_equal(impact_forecast.member, member) + self.assert_impact_kwargs(impact_forecast, **impact_kwargs) + + +def test_impact_forecast_select(impact_forecast, lead_time, member): + """Check if Impact.select works on the derived class""" + impact_fc = impact_forecast.select(event_ids=[12, 10]) + # NOTE: Events keep their original order + npt.assert_array_equal( + impact_fc.event_id, impact_forecast.event_id[np.array([0, 2])] + ) + npt.assert_array_equal(impact_fc.member, member[np.array([0, 2])]) + npt.assert_array_equal(impact_fc.lead_time, lead_time[np.array([0, 2])]) From 4d36a0de8152971c9868041a87b6fc2683165cff Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:12:22 +0100 Subject: [PATCH 05/16] Fix merge issue --- climada/engine/test/test_impact_forecast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index 65cb72d389..754b9b4a90 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -42,7 +42,7 @@ def impact(impact_kwargs): @pytest.fixture def lead_time(): - return pd.date_range("2000-01-01", "2000-01-02", periods=6).to_numpy() + return pd.timedelta_range(start="1 day", periods=6).to_numpy() @pytest.fixture From 78878b769b79ea83f7e6304d8db6496ff741018f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Mon, 8 Dec 2025 15:40:38 +0100 Subject: [PATCH 06/16] minor test modification --- climada/engine/test/test_impact_forecast.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index 754b9b4a90..14184cda6e 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -56,6 +56,7 @@ def impact_forecast(impact, lead_time, member): class TestImpactForecastInit: + def assert_impact_kwargs(self, impact: Impact, **kwargs): for key, value in kwargs.items(): attr = getattr(impact, key) @@ -84,9 +85,11 @@ def test_impact_forecast_from_impact( self.assert_impact_kwargs(impact_forecast, **impact_kwargs) -def test_impact_forecast_select(impact_forecast, lead_time, member): +def test_impact_forecast_select(impact_forecast, lead_time, member, impact_kwargs): """Check if Impact.select works on the derived class""" - impact_fc = impact_forecast.select(event_ids=[12, 10]) + + event_ids = impact_kwargs["event_id"][np.array([2, 0])] + impact_fc = impact_forecast.select(event_ids=event_ids) # NOTE: Events keep their original order npt.assert_array_equal( impact_fc.event_id, impact_forecast.event_id[np.array([0, 2])] From f79f4a153def1c82ad1bdf9a4c5bf80226ebc079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Tue, 9 Dec 2025 16:16:23 +0100 Subject: [PATCH 07/16] fix formatting --- climada/hazard/forecast.py | 134 +++++++++++++++++++++++++++++++++++++ climada/hazard/xarray.py | 22 +++--- 2 files changed, 148 insertions(+), 8 deletions(-) diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index c2705134a0..6488700a81 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -20,10 +20,14 @@ """ import logging +import pathlib +from typing import Any, Dict, Optional import numpy as np +import xarray as xr from climada.hazard.base import Hazard +from climada.hazard.xarray import HazardXarrayReader from climada.util.forecast import Forecast LOGGER = logging.getLogger(__name__) @@ -89,3 +93,133 @@ def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): intensity=hazard.intensity, fraction=hazard.fraction, ) + + @classmethod + def from_xarray_raster( + cls, + data: xr.Dataset | pathlib.Path | str, + hazard_type: str, + intensity_unit: str, + *, + intensity: str = "intensity", + coordinate_vars: Optional[Dict[str, str]] = None, + data_vars: Optional[Dict[str, str]] = None, + crs: str = "EPSG:4326", + rechunk: bool = False, + open_dataset_kws: dict[str, Any] | None = None, + ): + """Read forecast hazard data from an xarray Dataset + + This extends the parent :py:meth:`~climada.hazard.base.Hazard.from_xarray_raster` + to handle forecast dimensions (lead_time and member). For forecast data, the + "event" dimension is constructed from the Cartesian product of lead_time and + member dimensions, so you don't need to specify an "event" coordinate. + + Parameters + ---------- + data : xarray.Dataset or Path or str + The filepath to read the data from or the already opened dataset + hazard_type : str + The type identifier of the hazard + intensity_unit : str + The physical units of the intensity + intensity : str, optional + Identifier of the DataArray containing the hazard intensity data + coordinate_vars : dict(str, str), optional + Mapping from default coordinate names to coordinate names in the data. + For HazardForecast, should include: + + - ``"leadtime"``: name of the lead time coordinate (required) + - ``"members"``: name of the ensemble member coordinate (required) + - ``"longitude"``: name of longitude coordinate (default: "longitude") + - ``"latitude"``: name of latitude coordinate (default: "latitude") + + Note: The "event" coordinate is automatically constructed from lead_time + and member, so it should not be specified. + data_vars : dict(str, str), optional + Mapping from default variable names to variable names in the data + crs : str, optional + Coordinate reference system identifier. Defaults to "EPSG:4326" + rechunk : bool, optional + Whether to rechunk the dataset before processing. Defaults to False + open_dataset_kws : dict, optional + Keyword arguments passed to xarray.open_dataset if data is a file path + + Returns + ------- + HazardForecast + A forecast hazard object with lead_time and member attributes populated + + See Also + -------- + :py:meth:`climada.hazard.base.Hazard.from_xarray_raster` + Parent method documentation for standard hazard loading + """ + + # Open dataset if needed + + hazard_type = "PR" + intensity_unit = "mm/h" + coordinate_vars = { + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + "event": "event", + } + + if isinstance(data, (pathlib.Path, str)): + open_dataset_kws = open_dataset_kws or {} + open_dataset_kws = {"chunks": "auto"} | open_dataset_kws + dset = xr.open_dataset(data) # , **open_dataset_kws + else: + dset = data + + # Dynamically extract the data variable name + data_var_names = list(dset.data_vars.keys()) + if len(data_var_names) == 0: + raise ValueError("Dataset has no data variables") + intensity = data_var_names[0] # Use first data variable name + + # Extract forecast coordinates + coordinate_vars = coordinate_vars or {} + for key in ["lead_time", "member"]: + if key not in coordinate_vars: + raise ValueError( + f"coordinate_vars must include '{key}' key. " + f"Available coordinates: {list(dset.coords.keys())}" + ) + leadtime_var = coordinate_vars["lead_time"] + member_var = coordinate_vars["member"] + + dset = ds.assign_coords( + event=( + ("lead_time", "eps"), + np.zeros((len(ds["lead_time"]), len(ds["eps"]))), + ) + ) + + dset_squeezed = dset.squeeze() + + # Prepare coordinate_vars for parent call + # Remove forecast-specific keys that the parent doesn't understand + parent_coord_vars = { + k: v for k, v in coordinate_vars.items() if k not in ["member", "lead_time"] + } + parent_coord_vars["event"] = "event" + + reader = HazardXarrayReader( + data=dset_squeezed, + coordinate_vars=parent_coord_vars, + intensity=intensity, + ) + + kwargs = reader.get_hazard_kwargs() | { + "haz_type": hazard_type, + "units": intensity_unit, + "lead_time": reader.data_stacked[leadtime_var].values, + "member": reader.data_stacked[member_var].values, + } + + # Convert to HazardForecast with forecast attributes + return cls(**Hazard._check_and_cast_attrs(kwargs)) diff --git a/climada/hazard/xarray.py b/climada/hazard/xarray.py index df7fc9bf67..7e04f430ce 100644 --- a/climada/hazard/xarray.py +++ b/climada/hazard/xarray.py @@ -238,6 +238,8 @@ class HazardXarrayReader: ---------- data : xr.Dataset The data to be read as hazard. + data_stacked : xr.Dataset + The internally stacked (vectorized) version of ``data``. intensity : str The name of the variable containing the hazard intensity information. Default: ``"intensity"`` @@ -254,6 +256,7 @@ class HazardXarrayReader: """ data: xr.Dataset + data_stacked: xr.Dataset = field(init=False) intensity: str = "intensity" coordinate_vars: InitVar[dict[str, str] | None] = field(default=None, kw_only=True) data_vars: dict[str, str] | None = field(default=None, kw_only=True) @@ -344,7 +347,7 @@ def get_hazard_kwargs(self) -> dict[str, Any]: # preserve order. However, we want longitude to run faster than latitude. # So we use 'dict' without values, as 'dict' preserves insertion order # (dict keys behave like a set). - data = data.stack( + self.data_stacked = data.stack( event=self.data_dims["event"], lat_lon=list( dict.fromkeys( @@ -355,20 +358,20 @@ def get_hazard_kwargs(self) -> dict[str, Any]: # Transform coordinates into centroids centroids = Centroids( - lat=data[self.coords["latitude"]].to_numpy(), - lon=data[self.coords["longitude"]].to_numpy(), + lat=self.data_stacked[self.coords["latitude"]].to_numpy(), + lon=self.data_stacked[self.coords["longitude"]].to_numpy(), crs=self.crs, ) # Read the intensity data LOGGER.debug("Loading Hazard intensity from DataArray '%s'", self.intensity) - intensity_matrix = _to_csr_matrix(data[self.intensity]) + intensity_matrix = _to_csr_matrix(self.data_stacked[self.intensity]) # Create a DataFrame storing access information for each of data_vars # NOTE: Each row will be passed as arguments to # `load_from_xarray_or_return_default`, see its docstring for further # explanation of the DataFrame columns / keywords. - num_events = data.sizes["event"] + num_events = self.data_stacked.sizes["event"] data_ident = pd.DataFrame( data={ # The attribute of the Hazard class where the data will be stored @@ -384,10 +387,12 @@ def get_hazard_kwargs(self) -> dict[str, Any]: np.array(range(num_events), dtype=int) + 1, list( _year_month_day_accessor( - data[self.coords["event"]], strict=False + self.data_stacked[self.coords["event"]], strict=False ).flat ), - _date_to_ordinal_accessor(data[self.coords["event"]], strict=False), + _date_to_ordinal_accessor( + self.data_stacked[self.coords["event"]], strict=False + ), ], # The accessor for the data in the Dataset "accessor": [ @@ -411,10 +416,11 @@ def get_hazard_kwargs(self) -> dict[str, Any]: # Set the Hazard attributes for _, ident in data_ident.iterrows(): self.hazard_kwargs[ident["hazard_attr"]] = ( - _load_from_xarray_or_return_default(data=data, **ident) + _load_from_xarray_or_return_default(data=self.data_stacked, **ident) ) # Done! + LOGGER.debug("Hazard successfully loaded. Number of events: %i", num_events) self.hazard_kwargs.update(centroids=centroids, intensity=intensity_matrix) return self.hazard_kwargs From d01aef453a61b36bbd56d1bf335c588f09763fec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Tue, 9 Dec 2025 17:08:20 +0100 Subject: [PATCH 08/16] fc-specific date and event_name attrs --- climada/hazard/forecast.py | 109 +++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 28 deletions(-) diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index 6488700a81..6d7a95b42f 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -21,7 +21,7 @@ import logging import pathlib -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import numpy as np import xarray as xr @@ -54,9 +54,67 @@ def __init__( **hazard_kwargs keyword arguments to pass to :py:class:`~climada.hazard.base.Hazard` See py:meth`~climada.hazard.base.Hazard.__init__` for details. + + Notes + ----- + If event_name or date are not provided in hazard_kwargs, they will + be automatically generated from the Cartesian product of lead_time and member. + Event names are formatted as "lt_{hours}h_m_{member}" where hours is the + lead time converted to hours. Dates are set to 0 (undefined) by default + since forecast events don't have historical dates. event_id is preserved + from hazard_kwargs if provided. """ + auto_generate = ( + "event_name" not in hazard_kwargs and "date" not in hazard_kwargs + ) + super().__init__(lead_time=lead_time, member=member, **hazard_kwargs) + if auto_generate and len(self.lead_time) > 0 and len(self.member) > 0: + self._set_event_attrs_from_forecast_dims() + + def _set_event_attrs_from_forecast_dims(self) -> None: + """ + Set event_name and date from lead_time and member dimensions. + + This method generates event attributes based on the Cartesian product of + lead_time and member arrays. It should have the same length as the number + of events in the hazard intensity matrix. event_id is left unchanged. + """ + n_events = len(self.lead_time) + if n_events != len(self.member): + raise ValueError( + f"Length mismatch: lead_time has {len(self.lead_time)} elements " + f"but member has {len(self.member)} elements. They should be equal " + f"in a stacked forecast hazard." + ) + + self.event_name = [ + f"lt_{self._format_lead_time(lt)}_m_{m}" + for lt, m in zip(self.lead_time, self.member) + ] + + self.date = np.zeros(n_events, dtype=int) + + @staticmethod + def _format_lead_time(lead_time: np.timedelta64) -> str: + """ + Format lead_time as hours for event names. + + Parameters + ---------- + lead_time : np.timedelta64 + Lead time to format + + Returns + ------- + str + Formatted lead time as "{hours}h" + """ + # Convert to hours + hours = lead_time / np.timedelta64(1, "h") + return f"{hours:.0f}h" + @classmethod def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): """ @@ -77,6 +135,7 @@ def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): A HazardForecast object with the same attributes as the input hazard, but with lead_time and member attributes set from instance of HazardForecast. """ + # Keep event_id from hazard but let event_name and date be auto-generated return cls( lead_time=lead_time, member=member, @@ -87,8 +146,6 @@ def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): event_id=hazard.event_id, frequency=hazard.frequency, frequency_unit=hazard.frequency_unit, - event_name=hazard.event_name, - date=hazard.date, orig=hazard.orig, intensity=hazard.intensity, fraction=hazard.fraction, @@ -101,7 +158,7 @@ def from_xarray_raster( hazard_type: str, intensity_unit: str, *, - intensity: str = "intensity", + intensity: Optional[str] = None, coordinate_vars: Optional[Dict[str, str]] = None, data_vars: Optional[Dict[str, str]] = None, crs: str = "EPSG:4326", @@ -128,9 +185,8 @@ def from_xarray_raster( coordinate_vars : dict(str, str), optional Mapping from default coordinate names to coordinate names in the data. For HazardForecast, should include: - - - ``"leadtime"``: name of the lead time coordinate (required) - - ``"members"``: name of the ensemble member coordinate (required) + - ``"lead_time"``: name of the lead time coordinate (required) + - ``"member"``: name of the ensemble member coordinate (required) - ``"longitude"``: name of longitude coordinate (default: "longitude") - ``"latitude"``: name of latitude coordinate (default: "latitude") @@ -157,29 +213,23 @@ def from_xarray_raster( """ # Open dataset if needed - - hazard_type = "PR" - intensity_unit = "mm/h" - coordinate_vars = { - "longitude": "lon", - "latitude": "lat", - "lead_time": "lead_time", - "member": "eps", - "event": "event", - } - if isinstance(data, (pathlib.Path, str)): open_dataset_kws = open_dataset_kws or {} open_dataset_kws = {"chunks": "auto"} | open_dataset_kws - dset = xr.open_dataset(data) # , **open_dataset_kws + dset = xr.open_dataset(data, **open_dataset_kws) else: dset = data - # Dynamically extract the data variable name - data_var_names = list(dset.data_vars.keys()) - if len(data_var_names) == 0: - raise ValueError("Dataset has no data variables") - intensity = data_var_names[0] # Use first data variable name + if intensity is None: + data_var_names = list(dset.data_vars.keys()) + if len(data_var_names) == 0: + raise ValueError("Dataset has no data variables") + intensity = data_var_names[0] + LOGGER.info( + "No intensity variable specified. " + "Assuming intensity variable is '%s'", + intensity, + ) # Extract forecast coordinates coordinate_vars = coordinate_vars or {} @@ -192,17 +242,16 @@ def from_xarray_raster( leadtime_var = coordinate_vars["lead_time"] member_var = coordinate_vars["member"] - dset = ds.assign_coords( + dset = dset.assign_coords( event=( - ("lead_time", "eps"), - np.zeros((len(ds["lead_time"]), len(ds["eps"]))), + (leadtime_var, member_var), + np.zeros((len(dset[leadtime_var]), len(dset[member_var]))), ) ) dset_squeezed = dset.squeeze() # Prepare coordinate_vars for parent call - # Remove forecast-specific keys that the parent doesn't understand parent_coord_vars = { k: v for k, v in coordinate_vars.items() if k not in ["member", "lead_time"] } @@ -221,5 +270,9 @@ def from_xarray_raster( "member": reader.data_stacked[member_var].values, } + # Remove event_name and date so they get auto-generated from lead_time/member + kwargs.pop("event_name", None) + kwargs.pop("date", None) + # Convert to HazardForecast with forecast attributes return cls(**Hazard._check_and_cast_attrs(kwargs)) From 3d7e37aebca91e94c1ace821bb8f2e67b19b0880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Wed, 10 Dec 2025 12:05:19 +0100 Subject: [PATCH 09/16] tests for xarray hazard fc reader --- climada/hazard/test/test_forecast.py | 160 ++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 1 deletion(-) diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index 6c8b29c2bb..f3a6aa41c9 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -19,10 +19,14 @@ Tests for Hazard Forecast. """ +import datetime as dt +from pathlib import Path + import numpy as np import numpy.testing as npt import pandas as pd import pytest +import xarray as xr from scipy.sparse import csr_matrix from climada.hazard.base import Hazard @@ -85,7 +89,20 @@ def test_from_hazard(lead_time, member, hazard, haz_kwargs): assert isinstance(haz_fc_from_haz, HazardForecast) npt.assert_array_equal(haz_fc_from_haz.lead_time, lead_time) npt.assert_array_equal(haz_fc_from_haz.member, member) - assert_hazard_kwargs(haz_fc_from_haz, **haz_kwargs) + + # Check most hazard kwargs (excluding event_name and date which are auto-generated) + check_kwargs = { + k: v for k, v in haz_kwargs.items() if k not in ["event_name", "date"] + } + assert_hazard_kwargs(haz_fc_from_haz, **check_kwargs) + + # Check that event_name and date are auto-generated from lead_time and member + assert len(haz_fc_from_haz.event_name) == len(lead_time) + assert len(haz_fc_from_haz.date) == len(lead_time) + # Date should be all zeros for forecast + npt.assert_array_equal(haz_fc_from_haz.date, np.zeros(len(lead_time), dtype=int)) + # Event names should be formatted with lead_time and member + assert haz_fc_from_haz.event_name[0] == f"lt_1h_m_{member[0]}" def test_hazard_forecast_select(haz_fc, lead_time, member): @@ -95,3 +112,144 @@ def test_hazard_forecast_select(haz_fc, lead_time, member): npt.assert_array_equal(haz_fc_select.event_id, haz_fc.event_id[np.array([3, 0])]) npt.assert_array_equal(haz_fc_select.member, member[np.array([3, 0])]) npt.assert_array_equal(haz_fc_select.lead_time, lead_time[np.array([3, 0])]) + + +@pytest.fixture(scope="module") +def forecast_netcdf_file(tmp_path_factory): + """Create a NetCDF file with forecast data structure""" + tmpdir = tmp_path_factory.mktemp("forecast_data") + netcdf_path = tmpdir / "forecast_data.nc" + + n_eps = 5 + n_lead_time = 4 + n_lat = 3 + n_lon = 4 + + eps = np.array([3, 8, 13, 16, 20]) + ref_time = np.array([dt.datetime(2025, 12, 8, 6, 0, 0)], dtype="datetime64[ns]") + lead_time_vals = pd.timedelta_range("3h", periods=n_lead_time, freq="2h").to_numpy() + lon = np.array([10.0, 10.5, 11.0, 11.5]) + lat = np.array([45.0, 45.5, 46.0]) + + valid_time = ref_time[0] + lead_time_vals + + np.random.seed(42) + intensity = np.random.rand(n_eps, 1, n_lead_time, n_lat, n_lon) * 10 + + # Create xarray Dataset + dset = xr.Dataset( + { + "__xarray_dataarray_variable__": ( + ["eps", "ref_time", "lead_time", "lat", "lon"], + intensity, + ), + }, + coords={ + "eps": eps, + "ref_time": ref_time, + "lead_time": lead_time_vals, + "lon": lon, + "lat": lat, + "valid_time": (["lead_time"], valid_time), + }, + ) + dset.to_netcdf(netcdf_path) + + return { + "path": netcdf_path, + "n_eps": n_eps, + "n_lead_time": n_lead_time, + "n_lat": n_lat, + "n_lon": n_lon, + "eps": eps, + "lead_time": lead_time_vals, + "lon": lon, + "lat": lat, + } + + +def test_from_xarray_raster_basic(forecast_netcdf_file): + """Test basic loading of forecast hazard from xarray""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that it's a HazardForecast instance + assert isinstance(haz_fc, HazardForecast) + + # Check dimensions - after stacking, we should have n_eps * n_lead_time events + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_id) == expected_n_events + assert len(haz_fc.lead_time) == expected_n_events + assert len(haz_fc.member) == expected_n_events + + # Check that lead_time and member are correctly extracted + npt.assert_array_equal(np.unique(haz_fc.member), forecast_netcdf_file["eps"]) + + # Check intensity shape (events x centroids) + expected_n_centroids = forecast_netcdf_file["n_lat"] * forecast_netcdf_file["n_lon"] + assert haz_fc.intensity.shape == (expected_n_events, expected_n_centroids) + + # Check centroids + assert len(haz_fc.centroids.lat) == expected_n_centroids + assert len(haz_fc.centroids.lon) == expected_n_centroids + + +def test_from_xarray_raster_event_names(forecast_netcdf_file): + """Test that event names are auto-generated from lead_time and member""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that event names are generated with lead_time in hours + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_name) == expected_n_events + + # First event should be for first lead_time and first member + # Lead time should be in hours (e.g., "lt_3h_m_3") + first_lead_hours = forecast_netcdf_file["lead_time"][0] / np.timedelta64(1, "h") + expected_first_name = ( + f"lt_{first_lead_hours:.0f}h_m_{forecast_netcdf_file['eps'][0]}" + ) + assert haz_fc.event_name[0] == expected_first_name + + +def test_from_xarray_raster_dates(forecast_netcdf_file): + """Test that dates are set to 0 for forecast events""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that all dates are 0 (undefined for forecast) + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + npt.assert_array_equal(haz_fc.date, np.zeros(expected_n_events, dtype=int)) From 5d0f06252eb319f2b1120e93c7f9afc42cf9c5bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Wed, 10 Dec 2025 14:27:54 +0100 Subject: [PATCH 10/16] fix failing forecast tests --- climada/engine/test/test_impact_forecast.py | 4 + climada/hazard/forecast.py | 51 +------ climada/hazard/test/test_forecast.py | 141 ++++++++++++++++++++ 3 files changed, 152 insertions(+), 44 deletions(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index a6f184812b..cba77e1ca5 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -19,10 +19,14 @@ Tests for Impact Forecast. """ +import datetime as dt +from pathlib import Path + import numpy as np import numpy.testing as npt import pandas as pd import pytest +import xarray as xr from scipy.sparse import csr_matrix from climada.engine import Impact, ImpactForecast diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index 6ac49b2f55..5f75a88813 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -72,50 +72,13 @@ def __init__( super().__init__(lead_time=lead_time, member=member, **hazard_kwargs) - if auto_generate and len(self.lead_time) > 0 and len(self.member) > 0: - self._set_event_attrs_from_forecast_dims() - - def _set_event_attrs_from_forecast_dims(self) -> None: - """ - Set event_name and date from lead_time and member dimensions. - - This method generates event attributes based on the Cartesian product of - lead_time and member arrays. It should have the same length as the number - of events in the hazard intensity matrix. event_id is left unchanged. - """ - n_events = len(self.lead_time) - if n_events != len(self.member): - raise ValueError( - f"Length mismatch: lead_time has {len(self.lead_time)} elements " - f"but member has {len(self.member)} elements. They should be equal " - f"in a stacked forecast hazard." - ) - - self.event_name = [ - f"lt_{self._format_lead_time(lt)}_m_{m}" - for lt, m in zip(self.lead_time, self.member) - ] - - self.date = np.zeros(n_events, dtype=int) - - @staticmethod - def _format_lead_time(lead_time: np.timedelta64) -> str: - """ - Format lead_time as hours for event names. - - Parameters - ---------- - lead_time : np.timedelta64 - Lead time to format - - Returns - ------- - str - Formatted lead time as "{hours}h" - """ - # Convert to hours - hours = lead_time / np.timedelta64(1, "h") - return f"{hours:.0f}h" + # Validate that lead_time and member have matching lengths + if len(self.lead_time) > 0 and len(self.member) > 0: + if len(self.lead_time) != len(self.member): + raise ValueError( + f"Forecast.lead_time and Forecast.member must have the same length. " + f"Got {len(self.lead_time)} lead times and {len(self.member)} members." + ) if auto_generate and len(self.lead_time) > 0 and len(self.member) > 0: self._set_event_attrs_from_forecast_dims() diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index a4ec3c214d..c91a599674 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -124,6 +124,147 @@ def test_hazard_forecast_concat(haz_fc, lead_time, member): npt.assert_array_equal(haz_fc_concat.member, np.concatenate([member, member])) +@pytest.fixture(scope="module") +def forecast_netcdf_file(tmp_path_factory): + """Create a NetCDF file with forecast data structure""" + tmpdir = tmp_path_factory.mktemp("forecast_data") + netcdf_path = tmpdir / "forecast_data.nc" + + n_eps = 5 + n_lead_time = 4 + n_lat = 3 + n_lon = 4 + + eps = np.array([3, 8, 13, 16, 20]) + ref_time = np.array([dt.datetime(2025, 12, 8, 6, 0, 0)], dtype="datetime64[ns]") + lead_time_vals = pd.timedelta_range("3h", periods=n_lead_time, freq="2h").to_numpy() + lon = np.array([10.0, 10.5, 11.0, 11.5]) + lat = np.array([45.0, 45.5, 46.0]) + + valid_time = ref_time[0] + lead_time_vals + + np.random.seed(42) + intensity = np.random.rand(n_eps, 1, n_lead_time, n_lat, n_lon) * 10 + + # Create xarray Dataset + dset = xr.Dataset( + { + "__xarray_dataarray_variable__": ( + ["eps", "ref_time", "lead_time", "lat", "lon"], + intensity, + ), + }, + coords={ + "eps": eps, + "ref_time": ref_time, + "lead_time": lead_time_vals, + "lon": lon, + "lat": lat, + "valid_time": (["lead_time"], valid_time), + }, + ) + dset.to_netcdf(netcdf_path) + + return { + "path": netcdf_path, + "n_eps": n_eps, + "n_lead_time": n_lead_time, + "n_lat": n_lat, + "n_lon": n_lon, + "eps": eps, + "lead_time": lead_time_vals, + "lon": lon, + "lat": lat, + } + + +def test_from_xarray_raster_basic(forecast_netcdf_file): + """Test basic loading of forecast hazard from xarray""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that it's a HazardForecast instance + assert isinstance(haz_fc, HazardForecast) + + # Check dimensions - after stacking, we should have n_eps * n_lead_time events + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_id) == expected_n_events + assert len(haz_fc.lead_time) == expected_n_events + assert len(haz_fc.member) == expected_n_events + + # Check that lead_time and member are correctly extracted + npt.assert_array_equal(np.unique(haz_fc.member), forecast_netcdf_file["eps"]) + + # Check intensity shape (events x centroids) + expected_n_centroids = forecast_netcdf_file["n_lat"] * forecast_netcdf_file["n_lon"] + assert haz_fc.intensity.shape == (expected_n_events, expected_n_centroids) + + # Check centroids + assert len(haz_fc.centroids.lat) == expected_n_centroids + assert len(haz_fc.centroids.lon) == expected_n_centroids + + +def test_from_xarray_raster_event_names(forecast_netcdf_file): + """Test that event names are auto-generated from lead_time and member""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that event names are generated with lead_time in hours + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_name) == expected_n_events + + # First event should be for first lead_time and first member + # Lead time should be in hours (e.g., "lt_3h_m_3") + first_lead_hours = forecast_netcdf_file["lead_time"][0] / np.timedelta64(1, "h") + expected_first_name = ( + f"lt_{first_lead_hours:.0f}h_m_{forecast_netcdf_file['eps'][0]}" + ) + assert haz_fc.event_name[0] == expected_first_name + + +def test_from_xarray_raster_dates(forecast_netcdf_file): + """Test that dates are set to 0 for forecast events""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) + + # Check that all dates are 0 (undefined for forecast) + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + npt.assert_array_equal(haz_fc.date, np.zeros(expected_n_events, dtype=int)) + + class TestSelect: @pytest.mark.parametrize( From c9041fe6084abd6103d40012a7a34f8810fa4a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Wed, 10 Dec 2025 14:31:27 +0100 Subject: [PATCH 11/16] remove unused kwargs --- climada/hazard/forecast.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index 5f75a88813..3752c23f13 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -244,9 +244,7 @@ def from_xarray_raster( *, intensity: Optional[str] = None, coordinate_vars: Optional[Dict[str, str]] = None, - data_vars: Optional[Dict[str, str]] = None, crs: str = "EPSG:4326", - rechunk: bool = False, open_dataset_kws: dict[str, Any] | None = None, ): """Read forecast hazard data from an xarray Dataset @@ -276,12 +274,8 @@ def from_xarray_raster( Note: The "event" coordinate is automatically constructed from lead_time and member, so it should not be specified. - data_vars : dict(str, str), optional - Mapping from default variable names to variable names in the data crs : str, optional Coordinate reference system identifier. Defaults to "EPSG:4326" - rechunk : bool, optional - Whether to rechunk the dataset before processing. Defaults to False open_dataset_kws : dict, optional Keyword arguments passed to xarray.open_dataset if data is a file path From 464e5927c732318c54fadba0d746232013f76107 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Wed, 10 Dec 2025 14:47:46 +0100 Subject: [PATCH 12/16] add crs and private method tests --- climada/hazard/forecast.py | 1 + climada/hazard/test/test_forecast.py | 88 ++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index 3752c23f13..b2bcc546ad 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -339,6 +339,7 @@ def from_xarray_raster( data=dset_squeezed, coordinate_vars=parent_coord_vars, intensity=intensity, + crs=crs, ) kwargs = reader.get_hazard_kwargs() | { diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index c91a599674..84ea6ca3b9 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -27,9 +27,11 @@ import pandas as pd import pytest import xarray as xr +from scipy import sparse from scipy.sparse import csr_matrix from climada.hazard.base import Hazard +from climada.hazard.centroids.centr import Centroids from climada.hazard.forecast import HazardForecast from climada.hazard.test.test_base import hazard_kwargs @@ -355,6 +357,92 @@ def test_derived_select_null(self, haz_fc, haz_kwargs): ) +def test_check_sizes(haz_fc): + """Test that _check_sizes validates matching lengths""" + # Should pass with matching lengths + haz_fc._check_sizes() + + # Test with mismatched member length - manipulate after creation + haz_fc_bad = HazardForecast( + lead_time=haz_fc.lead_time, + member=haz_fc.member, + event_id=haz_fc.event_id, + event_name=haz_fc.event_name, + date=haz_fc.date, + haz_type=haz_fc.haz_type, + units=haz_fc.units, + centroids=haz_fc.centroids, + intensity=haz_fc.intensity, + fraction=haz_fc.fraction, + ) + # Manipulate member array directly to bypass __init__ validation + haz_fc_bad.member = haz_fc.member[:-1] + with pytest.raises(ValueError, match="Forecast.member"): + haz_fc_bad._check_sizes() + + # Test with mismatched lead_time length + haz_fc_bad2 = HazardForecast( + lead_time=haz_fc.lead_time, + member=haz_fc.member, + event_id=haz_fc.event_id, + event_name=haz_fc.event_name, + date=haz_fc.date, + haz_type=haz_fc.haz_type, + units=haz_fc.units, + centroids=haz_fc.centroids, + intensity=haz_fc.intensity, + fraction=haz_fc.fraction, + ) + # Manipulate lead_time array directly to bypass __init__ validation + haz_fc_bad2.lead_time = haz_fc.lead_time[:-1] + with pytest.raises(ValueError, match="Forecast.lead_time"): + haz_fc_bad2._check_sizes() + + +def test_set_event_attrs_from_forecast_dims(): + """Test that _set_event_attrs_from_forecast_dims generates event attributes correctly""" + lead_time = pd.timedelta_range("3h", periods=4, freq="2h").to_numpy() + member = np.array([1, 2, 3, 4]) + + # Create a HazardForecast without event_name and date (they will be auto-generated) + haz_fc = HazardForecast( + lead_time=lead_time, + member=member, + haz_type="TC", + units="m/s", + event_id=np.array([10, 20, 30, 40]), + intensity=sparse.csr_matrix(np.random.rand(4, 3)), + centroids=Centroids(lat=np.array([1, 2, 3]), lon=np.array([4, 5, 6])), + ) + + # Check that event_name was auto-generated + assert len(haz_fc.event_name) == 4 + assert haz_fc.event_name[0] == "lt_3h_m_1" + assert haz_fc.event_name[1] == "lt_5h_m_2" + assert haz_fc.event_name[2] == "lt_7h_m_3" + assert haz_fc.event_name[3] == "lt_9h_m_4" + + # Check that date was set to zeros + npt.assert_array_equal(haz_fc.date, np.zeros(4, dtype=int)) + + # Test that it raises error when lead_time and member have different lengths + haz_fc_bad = HazardForecast( + lead_time=lead_time, + member=member, + haz_type="TC", + units="m/s", + event_id=np.array([10, 20, 30, 40]), + event_name=["a", "b", "c", "d"], # Provide event_name to bypass auto-generation + date=np.array([1, 2, 3, 4]), + intensity=sparse.csr_matrix(np.random.rand(4, 3)), + centroids=Centroids(lat=np.array([1, 2, 3]), lon=np.array([4, 5, 6])), + ) + # Now manipulate arrays to create mismatch and call the method directly + haz_fc_bad.member = member[:-1] + with pytest.raises(ValueError, match="Length mismatch"): + haz_fc_bad._set_event_attrs_from_forecast_dims() + + def test_write_read_hazard_forecast(haz_fc, tmp_path): file_name = tmp_path / "test_hazard_forecast.h5" From b2a691d3cd007a33889eba0509e9206dd3af5aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Evelyn=20M=C3=BChlhofer?= Date: Wed, 10 Dec 2025 15:16:59 +0100 Subject: [PATCH 13/16] add crs attr to test file --- climada/hazard/test/test_forecast.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index cf3b63761a..bf4150d036 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -153,6 +153,8 @@ def forecast_netcdf_file(tmp_path_factory): tmpdir = tmp_path_factory.mktemp("forecast_data") netcdf_path = tmpdir / "forecast_data.nc" + crs = "EPSG:4326" + n_eps = 5 n_lead_time = 4 n_lat = 3 @@ -198,6 +200,7 @@ def forecast_netcdf_file(tmp_path_factory): "lead_time": lead_time_vals, "lon": lon, "lat": lat, + "crs": crs, } @@ -250,6 +253,7 @@ def test_from_xarray_raster_event_names(forecast_netcdf_file): "lead_time": "lead_time", "member": "eps", }, + crs=forecast_netcdf_file["crs"], ) # Check that event names are generated with lead_time in hours @@ -279,6 +283,7 @@ def test_from_xarray_raster_dates(forecast_netcdf_file): "lead_time": "lead_time", "member": "eps", }, + crs=forecast_netcdf_file["crs"], ) # Check that all dates are 0 (undefined for forecast) From 96ccc5854b2cbf14cdb32124997ba6eac6a34961 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Wed, 10 Dec 2025 16:19:59 +0100 Subject: [PATCH 14/16] Simplify data parsing and tests --- climada/hazard/forecast.py | 83 +----- climada/hazard/test/test_forecast.py | 373 ++++++++++----------------- 2 files changed, 148 insertions(+), 308 deletions(-) diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py index 48c0c14f6e..b3fd1a0618 100644 --- a/climada/hazard/forecast.py +++ b/climada/hazard/forecast.py @@ -57,74 +57,9 @@ def __init__( **hazard_kwargs keyword arguments to pass to :py:class:`~climada.hazard.base.Hazard` See py:meth`~climada.hazard.base.Hazard.__init__` for details. - - Notes - ----- - If event_name or date are not provided in hazard_kwargs, they will - be automatically generated from the Cartesian product of lead_time and member. - Event names are formatted as "lt_{hours}h_m_{member}" where hours is the - lead time converted to hours. Dates are set to 0 (undefined) by default - since forecast events don't have historical dates. event_id is preserved - from hazard_kwargs if provided. """ - auto_generate = ( - "event_name" not in hazard_kwargs and "date" not in hazard_kwargs - ) - super().__init__(lead_time=lead_time, member=member, **hazard_kwargs) - - # Validate that lead_time and member have matching lengths - if len(self.lead_time) > 0 and len(self.member) > 0: - if len(self.lead_time) != len(self.member): - raise ValueError( - f"Forecast.lead_time and Forecast.member must have the same length. " - f"Got {len(self.lead_time)} lead times and {len(self.member)} members." - ) - - if auto_generate and len(self.lead_time) > 0 and len(self.member) > 0: - self._set_event_attrs_from_forecast_dims() - - def _set_event_attrs_from_forecast_dims(self) -> None: - """ - Set event_name and date from lead_time and member dimensions. - - This method generates event attributes based on the Cartesian product of - lead_time and member arrays. It should have the same length as the number - of events in the hazard intensity matrix. event_id is left unchanged. - """ - n_events = len(self.lead_time) - if n_events != len(self.member): - raise ValueError( - f"Length mismatch: lead_time has {len(self.lead_time)} elements " - f"but member has {len(self.member)} elements. They should be equal " - f"in a stacked forecast hazard." - ) - - self.event_name = [ - f"lt_{self._format_lead_time(lt)}_m_{m}" - for lt, m in zip(self.lead_time, self.member) - ] - - self.date = np.zeros(n_events, dtype=int) - - @staticmethod - def _format_lead_time(lead_time: np.timedelta64) -> str: - """ - Format lead_time as hours for event names. - - Parameters - ---------- - lead_time : np.timedelta64 - Lead time to format - - Returns - ------- - str - Formatted lead time as "{hours}h" - """ - # Convert to hours - hours = lead_time / np.timedelta64(1, "h") - return f"{hours:.0f}h" + self._check_sizes() @classmethod def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): @@ -146,7 +81,6 @@ def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): A HazardForecast object with the same attributes as the input hazard, but with lead_time and member attributes set from instance of HazardForecast. """ - # Keep event_id from hazard but let event_name and date be auto-generated return cls( lead_time=lead_time, member=member, @@ -158,6 +92,8 @@ def from_hazard(cls, hazard: Hazard, lead_time: np.ndarray, member: np.ndarray): frequency=hazard.frequency, frequency_unit=hazard.frequency_unit, orig=hazard.orig, + event_name=hazard.event_name, + date=hazard.date, intensity=hazard.intensity, fraction=hazard.fraction, ) @@ -461,13 +397,16 @@ def from_xarray_raster( kwargs = reader.get_hazard_kwargs() | { "haz_type": hazard_type, "units": intensity_unit, - "lead_time": reader.data_stacked[leadtime_var].values, - "member": reader.data_stacked[member_var].values, + "lead_time": reader.data_stacked[leadtime_var].to_numpy(), + "member": reader.data_stacked[member_var].to_numpy(), } - # Remove event_name and date so they get auto-generated from lead_time/member - kwargs.pop("event_name", None) - kwargs.pop("date", None) + # Generate from lead_time/member + kwargs["event_name"] = [ + f"lt_{lt / np.timedelta64(1, 'h'):.0f}h_m_{m}" + for lt, m in zip(kwargs["lead_time"], kwargs["member"]) + ] + kwargs["date"] = np.zeros_like(kwargs["date"], dtype=int) # Convert to HazardForecast with forecast attributes return cls(**Hazard._check_and_cast_attrs(kwargs)) diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index bf4150d036..a9c6fd8775 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -27,7 +27,6 @@ import pandas as pd import pytest import xarray as xr -from scipy import sparse from scipy.sparse import csr_matrix from climada.hazard.base import Hazard @@ -98,20 +97,7 @@ def test_from_hazard(lead_time, member, hazard, haz_kwargs): assert isinstance(haz_fc_from_haz, HazardForecast) npt.assert_array_equal(haz_fc_from_haz.lead_time, lead_time) npt.assert_array_equal(haz_fc_from_haz.member, member) - - # Check most hazard kwargs (excluding event_name and date which are auto-generated) - check_kwargs = { - k: v for k, v in haz_kwargs.items() if k not in ["event_name", "date"] - } - assert_hazard_kwargs(haz_fc_from_haz, **check_kwargs) - - # Check that event_name and date are auto-generated from lead_time and member - assert len(haz_fc_from_haz.event_name) == len(lead_time) - assert len(haz_fc_from_haz.date) == len(lead_time) - # Date should be all zeros for forecast - npt.assert_array_equal(haz_fc_from_haz.date, np.zeros(len(lead_time), dtype=int)) - # Event names should be formatted with lead_time and member - assert haz_fc_from_haz.event_name[0] == f"lt_1h_m_{member[0]}" + assert_hazard_kwargs(haz_fc_from_haz, **haz_kwargs) class TestHazardForecastConcat: @@ -147,150 +133,151 @@ def test_type_fail(self, haz_fc, hazard): Hazard.concat([haz_fc, hazard]) -@pytest.fixture(scope="module") -def forecast_netcdf_file(tmp_path_factory): - """Create a NetCDF file with forecast data structure""" - tmpdir = tmp_path_factory.mktemp("forecast_data") - netcdf_path = tmpdir / "forecast_data.nc" - - crs = "EPSG:4326" - - n_eps = 5 - n_lead_time = 4 - n_lat = 3 - n_lon = 4 - - eps = np.array([3, 8, 13, 16, 20]) - ref_time = np.array([dt.datetime(2025, 12, 8, 6, 0, 0)], dtype="datetime64[ns]") - lead_time_vals = pd.timedelta_range("3h", periods=n_lead_time, freq="2h").to_numpy() - lon = np.array([10.0, 10.5, 11.0, 11.5]) - lat = np.array([45.0, 45.5, 46.0]) - - valid_time = ref_time[0] + lead_time_vals - - np.random.seed(42) - intensity = np.random.rand(n_eps, 1, n_lead_time, n_lat, n_lon) * 10 - - # Create xarray Dataset - dset = xr.Dataset( - { - "__xarray_dataarray_variable__": ( - ["eps", "ref_time", "lead_time", "lat", "lon"], - intensity, - ), - }, - coords={ +class TestXarrayReader: + + @pytest.fixture() + def forecast_netcdf_file(self, tmp_path_factory): + """Create a NetCDF file with forecast data structure""" + tmpdir = tmp_path_factory.mktemp("forecast_data") + netcdf_path = tmpdir / "forecast_data.nc" + + crs = "EPSG:4326" + + n_eps = 5 + n_lead_time = 4 + n_lat = 3 + n_lon = 4 + + eps = np.array([3, 8, 13, 16, 20]) + ref_time = np.array([dt.datetime(2025, 12, 8, 6, 0, 0)], dtype="datetime64[ns]") + lead_time_vals = pd.timedelta_range( + "3h", periods=n_lead_time, freq="2h" + ).to_numpy() + lon = np.array([10.0, 10.5, 11.0, 11.5]) + lat = np.array([45.0, 45.5, 46.0]) + + valid_time = ref_time[0] + lead_time_vals + + np.random.seed(42) + intensity = np.random.rand(n_eps, 1, n_lead_time, n_lat, n_lon) * 10 + + # Create xarray Dataset + dset = xr.Dataset( + { + "__xarray_dataarray_variable__": ( + ["eps", "ref_time", "lead_time", "lat", "lon"], + intensity, + ), + }, + coords={ + "eps": eps, + "ref_time": ref_time, + "lead_time": lead_time_vals, + "lon": lon, + "lat": lat, + "valid_time": (["lead_time"], valid_time), + }, + ) + dset.to_netcdf(netcdf_path) + + return { + "path": netcdf_path, + "n_eps": n_eps, + "n_lead_time": n_lead_time, + "n_lat": n_lat, + "n_lon": n_lon, "eps": eps, - "ref_time": ref_time, "lead_time": lead_time_vals, "lon": lon, "lat": lat, - "valid_time": (["lead_time"], valid_time), - }, - ) - dset.to_netcdf(netcdf_path) - - return { - "path": netcdf_path, - "n_eps": n_eps, - "n_lead_time": n_lead_time, - "n_lat": n_lat, - "n_lon": n_lon, - "eps": eps, - "lead_time": lead_time_vals, - "lon": lon, - "lat": lat, - "crs": crs, - } - - -def test_from_xarray_raster_basic(forecast_netcdf_file): - """Test basic loading of forecast hazard from xarray""" - haz_fc = HazardForecast.from_xarray_raster( - forecast_netcdf_file["path"], - hazard_type="PR", - intensity_unit="mm/h", - coordinate_vars={ - "longitude": "lon", - "latitude": "lat", - "lead_time": "lead_time", - "member": "eps", - }, - ) + "crs": crs, + } + + def test_from_xarray_raster_basic(self, forecast_netcdf_file): + """Test basic loading of forecast hazard from xarray""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + ) - # Check that it's a HazardForecast instance - assert isinstance(haz_fc, HazardForecast) + # Check that it's a HazardForecast instance + assert isinstance(haz_fc, HazardForecast) - # Check dimensions - after stacking, we should have n_eps * n_lead_time events - expected_n_events = ( - forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] - ) - assert len(haz_fc.event_id) == expected_n_events - assert len(haz_fc.lead_time) == expected_n_events - assert len(haz_fc.member) == expected_n_events - - # Check that lead_time and member are correctly extracted - npt.assert_array_equal(np.unique(haz_fc.member), forecast_netcdf_file["eps"]) - - # Check intensity shape (events x centroids) - expected_n_centroids = forecast_netcdf_file["n_lat"] * forecast_netcdf_file["n_lon"] - assert haz_fc.intensity.shape == (expected_n_events, expected_n_centroids) - - # Check centroids - assert len(haz_fc.centroids.lat) == expected_n_centroids - assert len(haz_fc.centroids.lon) == expected_n_centroids - - -def test_from_xarray_raster_event_names(forecast_netcdf_file): - """Test that event names are auto-generated from lead_time and member""" - haz_fc = HazardForecast.from_xarray_raster( - forecast_netcdf_file["path"], - hazard_type="PR", - intensity_unit="mm/h", - coordinate_vars={ - "longitude": "lon", - "latitude": "lat", - "lead_time": "lead_time", - "member": "eps", - }, - crs=forecast_netcdf_file["crs"], - ) + # Check dimensions - after stacking, we should have n_eps * n_lead_time events + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_id) == expected_n_events + assert len(haz_fc.lead_time) == expected_n_events + assert len(haz_fc.member) == expected_n_events - # Check that event names are generated with lead_time in hours - expected_n_events = ( - forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] - ) - assert len(haz_fc.event_name) == expected_n_events + # Check that lead_time and member are correctly extracted + npt.assert_array_equal(np.unique(haz_fc.member), forecast_netcdf_file["eps"]) - # First event should be for first lead_time and first member - # Lead time should be in hours (e.g., "lt_3h_m_3") - first_lead_hours = forecast_netcdf_file["lead_time"][0] / np.timedelta64(1, "h") - expected_first_name = ( - f"lt_{first_lead_hours:.0f}h_m_{forecast_netcdf_file['eps'][0]}" - ) - assert haz_fc.event_name[0] == expected_first_name - - -def test_from_xarray_raster_dates(forecast_netcdf_file): - """Test that dates are set to 0 for forecast events""" - haz_fc = HazardForecast.from_xarray_raster( - forecast_netcdf_file["path"], - hazard_type="PR", - intensity_unit="mm/h", - coordinate_vars={ - "longitude": "lon", - "latitude": "lat", - "lead_time": "lead_time", - "member": "eps", - }, - crs=forecast_netcdf_file["crs"], - ) + # Check intensity shape (events x centroids) + expected_n_centroids = ( + forecast_netcdf_file["n_lat"] * forecast_netcdf_file["n_lon"] + ) + assert haz_fc.intensity.shape == (expected_n_events, expected_n_centroids) + + # Check centroids + assert len(haz_fc.centroids.lat) == expected_n_centroids + assert len(haz_fc.centroids.lon) == expected_n_centroids + + def test_from_xarray_raster_event_names(self, forecast_netcdf_file): + """Test that event names are auto-generated from lead_time and member""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + crs=forecast_netcdf_file["crs"], + ) - # Check that all dates are 0 (undefined for forecast) - expected_n_events = ( - forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] - ) - npt.assert_array_equal(haz_fc.date, np.zeros(expected_n_events, dtype=int)) + # Check that event names are generated with lead_time in hours + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + assert len(haz_fc.event_name) == expected_n_events + + event_names_expected = [ + f"lt_{lt / np.timedelta64(1, 'h'):.0f}h_m_{mm}" + for lt, mm in zip(haz_fc.lead_time, haz_fc.member) + ] + npt.assert_array_equal(haz_fc.event_name, event_names_expected) + + def test_from_xarray_raster_dates(self, forecast_netcdf_file): + """Test that dates are set to 0 for forecast events""" + haz_fc = HazardForecast.from_xarray_raster( + forecast_netcdf_file["path"], + hazard_type="PR", + intensity_unit="mm/h", + coordinate_vars={ + "longitude": "lon", + "latitude": "lat", + "lead_time": "lead_time", + "member": "eps", + }, + crs=forecast_netcdf_file["crs"], + ) + + # Check that all dates are 0 (undefined for forecast) + expected_n_events = ( + forecast_netcdf_file["n_eps"] * forecast_netcdf_file["n_lead_time"] + ) + npt.assert_array_equal(haz_fc.date, np.zeros(expected_n_events, dtype=int)) class TestSelect: @@ -383,92 +370,6 @@ def test_derived_select_null(self, haz_fc, haz_kwargs): ) -def test_check_sizes(haz_fc): - """Test that _check_sizes validates matching lengths""" - # Should pass with matching lengths - haz_fc._check_sizes() - - # Test with mismatched member length - manipulate after creation - haz_fc_bad = HazardForecast( - lead_time=haz_fc.lead_time, - member=haz_fc.member, - event_id=haz_fc.event_id, - event_name=haz_fc.event_name, - date=haz_fc.date, - haz_type=haz_fc.haz_type, - units=haz_fc.units, - centroids=haz_fc.centroids, - intensity=haz_fc.intensity, - fraction=haz_fc.fraction, - ) - # Manipulate member array directly to bypass __init__ validation - haz_fc_bad.member = haz_fc.member[:-1] - with pytest.raises(ValueError, match="Forecast.member"): - haz_fc_bad._check_sizes() - - # Test with mismatched lead_time length - haz_fc_bad2 = HazardForecast( - lead_time=haz_fc.lead_time, - member=haz_fc.member, - event_id=haz_fc.event_id, - event_name=haz_fc.event_name, - date=haz_fc.date, - haz_type=haz_fc.haz_type, - units=haz_fc.units, - centroids=haz_fc.centroids, - intensity=haz_fc.intensity, - fraction=haz_fc.fraction, - ) - # Manipulate lead_time array directly to bypass __init__ validation - haz_fc_bad2.lead_time = haz_fc.lead_time[:-1] - with pytest.raises(ValueError, match="Forecast.lead_time"): - haz_fc_bad2._check_sizes() - - -def test_set_event_attrs_from_forecast_dims(): - """Test that _set_event_attrs_from_forecast_dims generates event attributes correctly""" - lead_time = pd.timedelta_range("3h", periods=4, freq="2h").to_numpy() - member = np.array([1, 2, 3, 4]) - - # Create a HazardForecast without event_name and date (they will be auto-generated) - haz_fc = HazardForecast( - lead_time=lead_time, - member=member, - haz_type="TC", - units="m/s", - event_id=np.array([10, 20, 30, 40]), - intensity=sparse.csr_matrix(np.random.rand(4, 3)), - centroids=Centroids(lat=np.array([1, 2, 3]), lon=np.array([4, 5, 6])), - ) - - # Check that event_name was auto-generated - assert len(haz_fc.event_name) == 4 - assert haz_fc.event_name[0] == "lt_3h_m_1" - assert haz_fc.event_name[1] == "lt_5h_m_2" - assert haz_fc.event_name[2] == "lt_7h_m_3" - assert haz_fc.event_name[3] == "lt_9h_m_4" - - # Check that date was set to zeros - npt.assert_array_equal(haz_fc.date, np.zeros(4, dtype=int)) - - # Test that it raises error when lead_time and member have different lengths - haz_fc_bad = HazardForecast( - lead_time=lead_time, - member=member, - haz_type="TC", - units="m/s", - event_id=np.array([10, 20, 30, 40]), - event_name=["a", "b", "c", "d"], # Provide event_name to bypass auto-generation - date=np.array([1, 2, 3, 4]), - intensity=sparse.csr_matrix(np.random.rand(4, 3)), - centroids=Centroids(lat=np.array([1, 2, 3]), lon=np.array([4, 5, 6])), - ) - # Now manipulate arrays to create mismatch and call the method directly - haz_fc_bad.member = member[:-1] - with pytest.raises(ValueError, match="Length mismatch"): - haz_fc_bad._set_event_attrs_from_forecast_dims() - - def test_write_read_hazard_forecast(haz_fc, tmp_path): file_name = tmp_path / "test_hazard_forecast.h5" From b5e7d7afe263a142c1f29fa9f92c93c380312754 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Wed, 10 Dec 2025 16:27:18 +0100 Subject: [PATCH 15/16] Restore test_impact_forecast.py --- climada/engine/test/test_impact_forecast.py | 27 +++++---------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py index c17dadb2f1..f571018598 100644 --- a/climada/engine/test/test_impact_forecast.py +++ b/climada/engine/test/test_impact_forecast.py @@ -45,13 +45,15 @@ def impact(impact_kwargs): @pytest.fixture -def lead_time(): - return pd.timedelta_range(start="1 day", periods=6).to_numpy() +def lead_time(impact_kwargs): + return pd.timedelta_range( + start="1 day", periods=len(impact_kwargs["event_id"]) + ).to_numpy() @pytest.fixture -def member(): - return np.arange(6) +def member(impact_kwargs): + return np.arange(len(impact_kwargs["event_id"])) @pytest.fixture @@ -59,23 +61,6 @@ def impact_forecast(impact, lead_time, member): return ImpactForecast.from_impact(impact, lead_time=lead_time, member=member) -class TestImpactForecastInit: - - @pytest.fixture - def lead_time(impact_kwargs): - return pd.timedelta_range( - start="1 day", periods=len(impact_kwargs["event_id"]) - ).to_numpy() - - @pytest.fixture - def member(impact_kwargs): - return np.arange(len(self, impact_kwargs["event_id"])) - - @pytest.fixture - def impact_forecast(impact, lead_time, member): - return ImpactForecast.from_impact(impact, lead_time=lead_time, member=member) - - class TestImpactForecastInit: def assert_impact_kwargs(self, impact: Impact, **kwargs): From 9685a810eb1b2b1d3291931425e9936b874b8149 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 15 Dec 2025 16:05:40 +0100 Subject: [PATCH 16/16] Skip tests for incompatible xarray versions --- climada/hazard/test/test_forecast.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py index 6dbf175974..c667db664c 100644 --- a/climada/hazard/test/test_forecast.py +++ b/climada/hazard/test/test_forecast.py @@ -27,6 +27,7 @@ import pandas as pd import pytest import xarray as xr +from packaging.version import Version from scipy.sparse import csr_matrix from climada.hazard.base import Hazard @@ -34,6 +35,13 @@ from climada.hazard.forecast import HazardForecast from climada.hazard.test.test_base import hazard_kwargs +# See https://docs.xarray.dev/en/stable/whats-new.html#id80 +xarray_leadtime = pytest.mark.skipif( + (Version(xr.__version__) < Version("2025.07.0")) + and (Version(xr.__version__) >= Version("2025.04.0")), + reason="xarray timedelta bug", +) + @pytest.fixture def haz_kwargs(): @@ -193,6 +201,7 @@ def forecast_netcdf_file(self, tmp_path_factory): "crs": crs, } + @xarray_leadtime def test_from_xarray_raster_basic(self, forecast_netcdf_file): """Test basic loading of forecast hazard from xarray""" haz_fc = HazardForecast.from_xarray_raster( @@ -231,6 +240,7 @@ def test_from_xarray_raster_basic(self, forecast_netcdf_file): assert len(haz_fc.centroids.lat) == expected_n_centroids assert len(haz_fc.centroids.lon) == expected_n_centroids + @xarray_leadtime def test_from_xarray_raster_event_names(self, forecast_netcdf_file): """Test that event names are auto-generated from lead_time and member""" haz_fc = HazardForecast.from_xarray_raster( @@ -258,6 +268,7 @@ def test_from_xarray_raster_event_names(self, forecast_netcdf_file): ] npt.assert_array_equal(haz_fc.event_name, event_names_expected) + @xarray_leadtime def test_from_xarray_raster_dates(self, forecast_netcdf_file): """Test that dates are set to 0 for forecast events""" haz_fc = HazardForecast.from_xarray_raster(