From 45a57af4611bf43ebaaeaf06ee456c8b8be98e2b Mon Sep 17 00:00:00 2001 From: Ingmar Nitze Date: Wed, 11 Dec 2024 15:00:19 +0100 Subject: [PATCH 1/3] changed to pyproject.toml --- environment.yml | 13 ------------- noaaplotter/download_utils.py | 7 ++----- pyproject.toml | 34 ++++++++++++++++++++++++++++++++++ setup.py | 27 --------------------------- 4 files changed, 36 insertions(+), 45 deletions(-) delete mode 100644 environment.yml create mode 100755 pyproject.toml delete mode 100644 setup.py diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 9de09dc..0000000 --- a/environment.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: noaaplotter -channels: - - conda-forge - - defaults -dependencies: - - matplotlib>=3.9 - - numpy>=2.2 - - pandas>=2.2 - - python>=3.10 - - requests - - joblib>=1.4 - - tqdm>=4.67 - - geemap>=0.35 diff --git a/noaaplotter/download_utils.py b/noaaplotter/download_utils.py index b7c7389..6a60dbf 100644 --- a/noaaplotter/download_utils.py +++ b/noaaplotter/download_utils.py @@ -13,7 +13,7 @@ from noaaplotter.utils import dl_noaa_api, assign_numeric_datatypes -def download_from_noaa(output_file, start_date, end_date, datatypes, loc_name, station_id, noaa_api_token, n_jobs=4): +def download_from_noaa(output_file, start_date, end_date, station_id, noaa_api_token, datatypes=['TMIN', 'TMAX', 'PRCP', 'SNOW'], loc_name='', n_jobs=4): # remove file if exists if os.path.exists(output_file): os.remove(output_file) @@ -34,9 +34,6 @@ def download_from_noaa(output_file, start_date, end_date, datatypes, loc_name, s delayed(dl_noaa_api)(i, datatypes, station_id, noaa_api_token, start_date, end_date, split_size) for i in tqdm.tqdm(split_range[:]) ) - # drop empty/None from datasets_list - datasets_list = [i for i in datasets_list if i is not None] - # Merge subsets and create DataFrame df = pd.concat(datasets_list) @@ -92,4 +89,4 @@ def download_era5_from_gee(latitude, longitude, end_date, start_date, output_fil df_renamed['SNWD'] = '' output_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] df_save = df_renamed[output_cols].astype(str) - df_save.to_csv(output_file, index=False) + df_save.to_csv(output_file, index=False) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 0000000..a8aea2d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>=75.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "noaaplotter" +version = "0.5.3" +description = "Package to plot fancy climate/weather data of NOAA" +authors = [ + { name = "Ingmar Nitze", email = "ingmar.nitze@awi.de" } +] +readme = "README.md" # Specify a README file if available +# homepage = "https://github.com/initze/noaaplotter" +keywords = ["climate", "weather", "NOAA", "plotting"] + +requires-python = ">=3.10" +dependencies = [ + "pandas>=2.2", + "numpy>=2.2", + "matplotlib>=3.9", + "requests", + "joblib>=1.4", + "tqdm>=4.67", + "geemap>=0.35" +] + +[tool.setuptools.packages.find] +where = ["noaaplotter"] + +[project.scripts] +plot_daily = "scripts.plot_daily:main" +plot_monthly = "scripts.plot_monthly:main" +download_data = "scripts.download_data:main" +download_data_ERA5 = "scripts.download_data_ERA5:main" \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 259b0b8..0000000 --- a/setup.py +++ /dev/null @@ -1,27 +0,0 @@ -from distutils.core import setup - -setup( - name='noaaplotter', - version='0.5.2', - packages=['noaaplotter'], - url='https://github.com/initze/noaaplotter', - license='', - author='Ingmar Nitze', - author_email='ingmar.nitze@awi.de', - description='Package to plot fancy climate/weather data of NOAA', - install_requires=[ - 'pandas>=2.2', - 'numpy>=2.2', - 'matplotlib>=3.9', - 'requests', - 'joblib>=1.4', - 'tqdm>=4.67', - 'geemap>=0.35' - ], - scripts=[ - 'scripts/plot_daily.py', - 'scripts/plot_monthly.py', - 'scripts/download_data.py', - 'scripts/download_data_ERA5.py' - ] -) From e6c39989706ef8761b539944d7341dbab58d8ec0 Mon Sep 17 00:00:00 2001 From: Ingmar Nitze Date: Wed, 11 Dec 2024 15:49:02 +0100 Subject: [PATCH 2/3] major overhaul for toml conversion --- examples/example_daily_series.py | 2 +- examples/example_daily_series_winter.py | 2 +- examples/example_monthly_series.py | 2 +- noaaplotter/__init__.py | 1 - noaaplotter/dataset.py | 382 ------------------------ noaaplotter/download_utils.py | 92 ------ noaaplotter/noaaplotter.py | 16 +- noaaplotter/plot_utils.py | 49 --- noaaplotter/utils.py | 110 ------- pyproject.toml | 14 +- scripts/download_data.py | 51 ---- scripts/download_data_ERA5.py | 54 ---- scripts/download_data_SST.py | 100 ------- scripts/plot_daily.py | 102 ------- scripts/plot_monthly.py | 72 ----- 15 files changed, 18 insertions(+), 1031 deletions(-) delete mode 100644 noaaplotter/dataset.py delete mode 100644 noaaplotter/download_utils.py delete mode 100644 noaaplotter/plot_utils.py delete mode 100644 noaaplotter/utils.py delete mode 100644 scripts/download_data.py delete mode 100644 scripts/download_data_ERA5.py delete mode 100644 scripts/download_data_SST.py delete mode 100644 scripts/plot_daily.py delete mode 100644 scripts/plot_monthly.py diff --git a/examples/example_daily_series.py b/examples/example_daily_series.py index 2990dcc..4fe6395 100644 --- a/examples/example_daily_series.py +++ b/examples/example_daily_series.py @@ -6,7 +6,7 @@ author: Ingmar Nitze """ -from noaaplotter.noaaplotter import NOAAPlotter +from src.noaaplotter import NOAAPlotter import logging def main(): diff --git a/examples/example_daily_series_winter.py b/examples/example_daily_series_winter.py index cc7e510..27bb40a 100644 --- a/examples/example_daily_series_winter.py +++ b/examples/example_daily_series_winter.py @@ -6,7 +6,7 @@ author: Ingmar Nitze """ -from noaaplotter.noaaplotter import NOAAPlotter +from src.noaaplotter import NOAAPlotter import logging def main(): diff --git a/examples/example_monthly_series.py b/examples/example_monthly_series.py index e4eb25b..6e6048e 100644 --- a/examples/example_monthly_series.py +++ b/examples/example_monthly_series.py @@ -7,7 +7,7 @@ author: Ingmar Nitze """ -from noaaplotter.noaaplotter import NOAAPlotter +from src.noaaplotter import NOAAPlotter import logging def main(): diff --git a/noaaplotter/__init__.py b/noaaplotter/__init__.py index 3eb0b87..e69de29 100644 --- a/noaaplotter/__init__.py +++ b/noaaplotter/__init__.py @@ -1 +0,0 @@ -from noaaplotter import * \ No newline at end of file diff --git a/noaaplotter/dataset.py b/noaaplotter/dataset.py deleted file mode 100644 index aea6154..0000000 --- a/noaaplotter/dataset.py +++ /dev/null @@ -1,382 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -######################## -# Credits here -# author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research -# contact: ingmar.nitze@awi.de -# version: 2020-12-09 - -######################## -import numpy as np -import os -from .utils import * -numeric_only = True - -class NOAAPlotterDailySummariesDataset(object): - """ - This class/module creates nice plots of observed weather data from NOAA - """ - - def __init__(self, - input_filepath=None, - location=None, - remove_feb29=False): - self.input_switch = None - self.input_filepath = input_filepath - self.location = location - self.noaa_token = None - self.noaa_location = None - self.remove_feb29 = remove_feb29 - self.data = None - self._check_data_loading() - if self.input_switch == 'file': - self._load_file() - elif self.input_switch == 'noaa_api': - self._load_noaa() - self._validate_location() - self._update_datatypes() - self._get_datestring() - self._get_tmean() - self._remove_feb29() - self._filter_to_location() - - def print_locations(self): - """ - Print all locations names - """ - print(self.data['NAME'].unique()) - - def _check_data_loading(self): - """ - function check if all requirements for loading options are met - File loading: - * input_filepath - """ - if os.path.exists(self.input_filepath): - self.input_switch = 'file' - elif self.noaa_token and self.noaa_location: - self.input_switch = 'noaa_api' - else: - raise ImportError("Please enter either correct file path or noaa station_id and API token") - - def _load_file(self): - """ - load csv file into Pandas DataFrame - :return: - """ - self.data = pd.read_csv(self.input_filepath) - - def _load_noaa(self): - """ - load data through NOAA API - """ - pass - - def _save_noaa(self): - """ - save loaded NOAA API data to temporary csv file - """ - - def _validate_location(self): - """ - raise error and message if location name cannot be found - :return: - """ - if not self.location and len(pd.unique(self.data['NAME']) == 1): - pass - elif not self.location and len(pd.unique(self.data['NAME']) > 1): - raise ValueError( - 'There is more than one location in the dataset. Please choose a location using the -loc option! ' - 'Valid Location identifiers: {0} ' - .format(self.data['NAME'].unique())) - else: - filt = self.data['NAME'].str.lower().str.contains(self.location.lower()) - if filt.sum() == 0: - raise ValueError('Location Name is not valid! Valid Location identifiers: {0}' - .format(self.data['NAME'].unique())) - - def _update_datatypes(self): - """ - define 'DATE' as datetime - :return: - """ - self.data['DATE'] = pd.to_datetime(self.data['DATE']) - - def _get_datestring(self): - """ - write specific date formats - :return: - """ - self.data['DATE_MD'] = self.data['DATE'].dt.strftime('%m-%d') - self.data['DATE_YM'] = self.data['DATE'].dt.strftime('%Y-%m') - self.data['DATE_M'] = self.data['DATE'].dt.strftime('%m') - - def _get_tmean(self): - """ - calculate mean daily temperature from min and max - :return: - """ - # TODO: check for cases where TMIN and TMAX are empty (e.g. Schonefeld). There TAVG is the main field - self.data['TMEAN'] = self.data[['TMIN', 'TMAX']].mean(axis=1) - - def _remove_feb29(self): - """ - Function to remove February 29 from the data - :return: - """ - if self.remove_feb29: - self.data = self.data[self.data['DATE_MD'] != '02-29'] - - def _filter_to_location(self): - """ - Filter dataset to the defined location - :return: - """ - if self.location: - filt = self.data['NAME'].str.lower().str.contains(self.location.lower()) - if len(filt) > 0: - self.data = self.data.loc[filt] - else: - raise ValueError('Location Name is not valid') - - def filter_to_climate(self, climate_start, climate_end): - """ - Function to create filtered dataset covering the defined climate normal period - :return: - """ - df_clim = self.data[(self.data['DATE'] >= climate_start) & (self.data['DATE'] <= climate_end)] - return df_clim - - @staticmethod - def get_monthly_stats(df): - """ - calculate monthly statistics - :param df: - :type df: pandas.DataFrame - :return: - """ - df_out = pd.DataFrame() - df_out['tmean_doy_mean'] = df[['DATE', 'TMEAN']].groupby(df['DATE_YM']).mean(numeric_only=numeric_only).TMEAN - df_out['tmean_doy_std'] = df[['DATE', 'TMEAN']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMEAN - df_out['tmax_doy_max'] = df[['DATE', 'TMAX']].groupby(df['DATE_YM']).max(numeric_only=numeric_only).TMAX - df_out['tmax_doy_std'] = df[['DATE', 'TMAX']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMAX - df_out['tmin_doy_min'] = df[['DATE', 'TMIN']].groupby(df['DATE_YM']).min(numeric_only=numeric_only).TMIN - df_out['tmin_doy_std'] = df[['DATE', 'TMIN']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMIN - if 'SNOW' in df.columns: - df_out['snow_doy_mean'] = df[['DATE', 'SNOW']].groupby(df['DATE_YM']).mean(numeric_only=numeric_only).SNOW - df_out['prcp_sum'] = df[['DATE', 'PRCP']].groupby(df['DATE_YM']).sum(numeric_only=numeric_only).PRCP - return df_out - - @staticmethod - def get_monthy_climate(df): - """ - :param df: - :return: - """ - df_out = pd.DataFrame() - df = df.data - df['Month'] = df.reset_index().apply(lambda x: int(x['DATE_MD'][:2]), axis=1).values - df_out['tmean_mean'] = df[['Month', 'TMEAN']].groupby(df['Month']).mean(numeric_only=numeric_only).TMEAN - df_out['tmean_std'] = df[['Month', 'TMEAN']].groupby(df['Month']).std(numeric_only=numeric_only).TMEAN - df_out['tmax_max'] = df[['Month', 'TMAX']].groupby(df['Month']).max(numeric_only=numeric_only).TMAX - df_out['tmax_std'] = df[['Month', 'TMAX']].groupby(df['Month']).std(numeric_only=numeric_only).TMAX - df_out['tmin_min'] = df[['Month', 'TMIN']].groupby(df['Month']).min(numeric_only=numeric_only).TMIN - df_out['tmin_std'] = df[['Month', 'TMIN']].groupby(df['Month']).std(numeric_only=numeric_only).TMIN - if 'SNOW' in df.columns: - df_out['snow_mean'] = df[['Month', 'SNOW']].groupby(df['Month']).mean(numeric_only=numeric_only).SNOW - unique_years = len(np.unique(df.apply(lambda x: parse_dates_YM(x['DATE_YM']).year, axis=1))) - df_out['prcp_mean'] = df[['Month', 'PRCP']].groupby(df['Month']).mean(numeric_only=numeric_only).PRCP * unique_years - return df_out.reset_index(drop=False) - - -class NOAAPlotterDailyClimateDataset(object): - # TODO: make main class sub subclasses for daily/monthly - def __init__(self, daily_dataset, start='1981-01-01', end='2010-12-31', filtersize=7, impute_feb29=True): - """ - :param start: - :param end: - :param filtersize: - :param impute_feb29: - """ - self.start = parse_dates(start) - self.end = parse_dates(end) - self.filtersize = filtersize - self.impute_feb29 = impute_feb29 - self.daily_dataset = daily_dataset - self.data_daily = None - self.data = None - self.date_range_valid = False - - # validate date range - self._validate_date_range() - # filter daily to date range - self._filter_to_climate() - # calculate daily statistics - self._calculate_climate_statistics() - # mean imputation for 29 February - self._impute_feb29() - # filter if desired - self._run_filter() - # make completeness report - - def _validate_date_range(self): - if self.daily_dataset.data['DATE'].max() >= self.end: - if self.daily_dataset.data['DATE'].min() <= self.end: - self.date_range_valid = True - else: - raise ('Dataset is insufficient to calculate climate normals!') - - def _filter_to_climate(self): - """ - calculate climate dataset - :return: - """ - df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & - (self.daily_dataset.data['DATE'] <= self.end)] - df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] - self.data_daily = df_clim - - def _calculate_climate_statistics(self): - """ - Function to calculate major statistics - :param self.data_daily: - :type self.data_daily: pandas.DataFrame - :return: - """ - df_out = pd.DataFrame() - df_out['tmean_doy_mean'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).mean(numeric_only=numeric_only).TMEAN - df_out['tmean_doy_std'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).std().TMEAN - df_out['tmean_doy_max'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).max(numeric_only=numeric_only).TMEAN - df_out['tmean_doy_min'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).min(numeric_only=numeric_only).TMEAN - df_out['tmax_doy_max'] = self.data_daily[['DATE', 'TMAX']].groupby(self.data_daily['DATE_MD']).max(numeric_only=numeric_only).TMAX - df_out['tmax_doy_std'] = self.data_daily[['DATE', 'TMAX']].groupby(self.data_daily['DATE_MD']).std().TMAX - df_out['tmin_doy_min'] = self.data_daily[['DATE', 'TMIN']].groupby(self.data_daily['DATE_MD']).min(numeric_only=numeric_only).TMIN - df_out['tmin_doy_std'] = self.data_daily[['DATE', 'TMIN']].groupby(self.data_daily['DATE_MD']).std().TMIN - if 'SNOW' in self.data_daily.columns: - df_out['snow_doy_mean'] = self.data_daily[['DATE', 'SNOW']].groupby(self.data_daily['DATE_MD']).mean(numeric_only=numeric_only).SNOW - self.data = df_out - - def _impute_feb29(self): - """ - Function for mean imputation of February 29. - :return: - """ - if self.impute_feb29: - self.data.loc['02-29'] = self.data.loc['02-28':'03-01'].mean(axis=0) - self.data.sort_index(inplace=True) - - def _run_filter(self): - """ - Function to run rolling mean filter on climate series to smooth out short fluctuations - :return: - """ - if self.filtersize % 2 != 0: - data_roll = pd.concat([self.data.iloc[-self.filtersize:], - self.data, - self.data[:self.filtersize]]).rolling(self.filtersize).mean() - self.data = data_roll[self.filtersize: -self.filtersize] - - def _make_report(self): - """ - Function to create report on climate data completeness - :return: - """ - # input climate series (e.g. 1981-01-01 - 2010-12-31) - pass - - -class NOAAPlotterMonthlyClimateDataset(object): - def __init__(self, daily_dataset, start='1981-01-01', end='2010-12-31', impute_feb29=True): - self.daily_dataset = daily_dataset - self.monthly_aggregate = None - self.start = parse_dates(start) - self.end = parse_dates(end) - self.impute_feb29 = impute_feb29 - self._validate_date_range() - - def _validate_date_range(self): - if self.daily_dataset.data['DATE'].max() >= self.end: - if self.daily_dataset.data['DATE'].min() <= self.end: - self.date_range_valid = True - else: - raise ('Dataset is insufficient to calculate climate normals!') - - def _filter_to_climate(self): - """ - calculate climate dataset - :return: - """ - df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & - (self.daily_dataset.data['DATE'] <= self.end)] - df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] - self.data_daily = df_clim - - def filter_to_date(self): - """ - calculate climate dataset - :return: - """ - df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & - (self.daily_dataset.data['DATE'] <= self.end)] - df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] - return df_clim - - def _impute_feb29(self): - """ - Function for mean imputation of February 29. - :return: - """ - pass - - def calculate_monthly_statistics(self): - """ - Function to calculate monthly statistics. - :return: - """ - - df_out = pd.DataFrame() - data_filtered = self.filter_to_date() - df_out['tmean_doy_mean'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['DATE_YM']).mean(numeric_only=numeric_only).TMEAN - df_out['tmean_doy_std'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMEAN - df_out['tmax_doy_max'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['DATE_YM']).max(numeric_only=numeric_only).TMAX - df_out['tmax_doy_std'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMAX - df_out['tmin_doy_min'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['DATE_YM']).min(numeric_only=numeric_only).TMIN - df_out['tmin_doy_std'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMIN - if 'SNOW' in data_filtered.columns: - df_out['snow_doy_mean'] = data_filtered[['DATE', 'SNOW']].groupby(data_filtered['DATE_YM']).mean(numeric_only=numeric_only).SNOW - df_out['prcp_sum'] = data_filtered[['DATE', 'PRCP']].groupby(data_filtered['DATE_YM']).sum(numeric_only=numeric_only).PRCP - self.monthly_aggregate = df_out - - def calculate_monthly_climate(self): - """ - Function to calculate monthly climate statistics. - :return: - """ - df_out = pd.DataFrame() - data_filtered = self.filter_to_date() - - data_filtered['DATE'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']), axis=1) - data_filtered['Month'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']).month, axis=1) - data_filtered['Year'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']).year, axis=1) - - df_out['tmean_doy_mean'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).TMEAN - df_out['tmean_doy_std'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMEAN - df_out['tmax_doy_max'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['Month']).max(numeric_only=numeric_only).TMAX - df_out['tmax_doy_std'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMAX - df_out['tmin_doy_min'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['Month']).min(numeric_only=numeric_only).TMIN - df_out['tmin_doy_std'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMIN - if 'SNOW' in data_filtered.columns: - df_out['snow_doy_mean'] = data_filtered[['DATE', 'SNOW']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).SNOW - df_out['prcp_sum'] = data_filtered[['DATE', 'PRCP']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).PRCP * 30 - # df_out = df_out.set_index('DATE_YM', drop=False) - self.monthly_climate = df_out - - def _make_report(self): - """ - Function to create report on climate data completeness - :return: - """ - # input climate series (e.g. 1981-01-01 - 2010-12-31) - - pass \ No newline at end of file diff --git a/noaaplotter/download_utils.py b/noaaplotter/download_utils.py deleted file mode 100644 index 6a60dbf..0000000 --- a/noaaplotter/download_utils.py +++ /dev/null @@ -1,92 +0,0 @@ -import csv -import os -from datetime import datetime - -import numpy as np -import pandas as pd -import tqdm -from joblib import Parallel, delayed - -import geemap -import ee - -from noaaplotter.utils import dl_noaa_api, assign_numeric_datatypes - - -def download_from_noaa(output_file, start_date, end_date, station_id, noaa_api_token, datatypes=['TMIN', 'TMAX', 'PRCP', 'SNOW'], loc_name='', n_jobs=4): - # remove file if exists - if os.path.exists(output_file): - os.remove(output_file) - # Make query string - dtypes_string = '&'.join([f'datatypeid={dt}' for dt in datatypes]) - # convert datestring to dt - dt_start = datetime.strptime(start_date, '%Y-%m-%d') - dt_end = datetime.strptime(end_date, '%Y-%m-%d') - # calculate number of days - n_days = (dt_end - dt_start).days - # calculate number of splits to fit into 1000 lines/rows - split_size = np.floor(1000 / len(datatypes)) - # calculate splits - split_range = np.arange(0, n_days, split_size) - # Data Loading - print('Downloading data through NOAA API') - datasets_list = Parallel(n_jobs=n_jobs)( - delayed(dl_noaa_api)(i, datatypes, station_id, noaa_api_token, start_date, end_date, split_size) - for i in tqdm.tqdm(split_range[:]) - ) - # Merge subsets and create DataFrame - df = pd.concat(datasets_list) - - df_pivot = assign_numeric_datatypes(df) - df_pivot['DATE'] = df_pivot.apply(lambda x: datetime.fromisoformat(x['DATE']).strftime('%Y-%m-%d'), axis=1) - - df_pivot = df_pivot.reset_index(drop=False) - dr = pd.DataFrame(pd.date_range(start=start_date, end=end_date), columns=['DATE']) - dr['DATE'] = dr['DATE'].astype(str) - df_merged = pd.concat([df_pivot.set_index('DATE'), dr.set_index('DATE')], join='outer', axis=1, - sort=True) - df_merged['DATE'] = df_merged.index - df_merged['NAME'] = loc_name - df_merged['TAVG'] = None - df_merged['SNWD'] = None - final_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] - df_final = df_merged[final_cols] - df_final = df_final.replace({np.nan: None}) - print(f'Saving data to {output_file}') - df_final.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL) - return 0 - - -def download_era5_from_gee(latitude, longitude, end_date, start_date, output_file): - ee.Initialize() - EE_LAYER = 'ECMWF/ERA5/DAILY' - location = ee.Geometry.Point([longitude, latitude]) - # load ImageCollection - col = ee.ImageCollection(EE_LAYER).filterBounds(location).filterDate(start_date, end_date) - # Download data - print("Start downloading daily ERA5 data.") - print("Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min") - result = geemap.extract_pixel_values(col, region=location) - out_dict = result.getInfo() - df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T - # parse dates and values - df_gee['time'] = df_gee[0].apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}') - df_gee['feature'] = df_gee[0].apply(lambda x: x[9:]) - df_gee['value'] = df_gee[1] - df = df_gee.pivot_table(values='value', columns=['feature'], index='time') # .reset_index(drop=False) - # #### recalculate values - df_new = pd.DataFrame(index=df.index) - temperature_cols = ['mean_2m_air_temperature', 'minimum_2m_air_temperature', 'maximum_2m_air_temperature', - 'dewpoint_2m_temperature'] - precipitation_cols = ['total_precipitation'] - df_joined = df_new.join(df[temperature_cols] - 273.15).join(df[precipitation_cols] * 1e3).reset_index(drop=False) - # Create Output - rename_dict = {'time': 'DATE', 'total_precipitation': 'PRCP', 'mean_2m_air_temperature': 'TAVG', - 'maximum_2m_air_temperature': 'TMAX', 'minimum_2m_air_temperature': 'TMIN'} - df_renamed = df_joined.rename(columns=rename_dict) - df_renamed['NAME'] = '' - df_renamed['STATION'] = '' - df_renamed['SNWD'] = '' - output_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] - df_save = df_renamed[output_cols].astype(str) - df_save.to_csv(output_file, index=False) \ No newline at end of file diff --git a/noaaplotter/noaaplotter.py b/noaaplotter/noaaplotter.py index 1cd726a..d9245d7 100644 --- a/noaaplotter/noaaplotter.py +++ b/noaaplotter/noaaplotter.py @@ -8,16 +8,16 @@ # version: 2021-09-06 import numpy as np +from matplotlib import dates ######################## -from matplotlib import pyplot as plt, dates -import matplotlib.dates as mdates - -from .dataset import NOAAPlotterDailyClimateDataset as DS_daily -from .dataset import NOAAPlotterDailySummariesDataset as Dataset -from .dataset import NOAAPlotterMonthlyClimateDataset as DS_monthly -from .plot_utils import * -from .utils import * +from matplotlib import pyplot as plt + +from noaaplotter.utils.dataset import NOAAPlotterDailyClimateDataset as DS_daily +from noaaplotter.utils.dataset import NOAAPlotterDailySummariesDataset as Dataset +from noaaplotter.utils.dataset import NOAAPlotterMonthlyClimateDataset as DS_monthly +from noaaplotter.utils.plot_utils import * +from noaaplotter.utils.utils import * pd.plotting.register_matplotlib_converters() numeric_only = True diff --git a/noaaplotter/plot_utils.py b/noaaplotter/plot_utils.py deleted file mode 100644 index 34ae3fc..0000000 --- a/noaaplotter/plot_utils.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -######################## -# Credits here -# author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research -# contact: ingmar.nitze@awi.de -# version: 2021-09-11 - -######################## - -# TODO: move to external file -def setup_monthly_plot_props(information, anomaly): - plot_kwargs = {} - if information == 'Temperature': - plot_kwargs['cmap'] = 'RdBu_r' - plot_kwargs['fc_low'] = '#4393c3' - plot_kwargs['fc_high'] = '#d6604d' - if anomaly: - plot_kwargs['value_column'] = 'tmean_diff' - plot_kwargs['y_label'] = 'Temperature departure [°C]' - plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' - plot_kwargs['legend_label_above'] = 'Above average' - plot_kwargs['legend_label_below'] = 'Below average' - else: - plot_kwargs['value_column'] = 'tmean_doy_mean' - plot_kwargs['y_label'] = 'Temperature [°C]' - plot_kwargs['title'] = 'Monthly Mean Temperature' - plot_kwargs['legend_label_above'] = 'Above freezing' - plot_kwargs['legend_label_below'] = 'Below freezing' - - elif information == 'Precipitation': - plot_kwargs['fc_low'] = '#d6604d' - plot_kwargs['fc_high'] = '#4393c3' - if anomaly: - plot_kwargs['cmap'] = 'RdBu' - plot_kwargs['value_column'] = 'prcp_diff' - plot_kwargs['y_label'] = 'Precipitation departure [mm]' - plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' - plot_kwargs['legend_label_above'] = 'Above average' - plot_kwargs['legend_label_below'] = 'Below average' - else: - plot_kwargs['cmap'] = 'Blues' - plot_kwargs['value_column'] = 'prcp_sum' - plot_kwargs['y_label'] = 'Precipitation [mm]' - plot_kwargs['title'] = 'Monthly Precipitation' - plot_kwargs['legend_label_below'] = '' - plot_kwargs['legend_label_above'] = 'Monthly Precipitation' - return plot_kwargs \ No newline at end of file diff --git a/noaaplotter/utils.py b/noaaplotter/utils.py deleted file mode 100644 index ee9367c..0000000 --- a/noaaplotter/utils.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -######################## -# Credits here -# author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research -# contact: ingmar.nitze@awi.de -# version: 2020-12-09 - -######################## -import datetime as dt -from datetime import timedelta -import requests, json -import pandas as pd - - -#import datetime - - -def parse_dates(date): - """ - - :param date: - :return: - """ - if isinstance(date, str): - return dt.datetime.strptime(date, '%Y-%m-%d') - elif isinstance(date, dt.datetime) or isinstance(date, dt.date): - return date - else: - raise ('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') - - -def calc_trailing_mean(df, length, feature, new_feature): - """ - :param df: - :param length: - :param feature: - :param new_feature: - :return: - - """ - df[new_feature] = df[feature].rolling(length).mean() - return df - - -def parse_dates_YM(date): - """ - :param date: - :return: - """ - if isinstance(date, str): - return dt.datetime.strptime(date, '%Y-%m') - elif isinstance(date, dt.datetime): - return date - else: - raise('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') - - -def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size): - """ - function to download from NOAA API - """ - dt_start = dt.datetime.strptime(date_start, '%Y-%m-%d') - dt_end = dt.datetime.strptime(date_end, '%Y-%m-%d') - - split_start = dt_start + timedelta(days=i) - split_end = dt_start + timedelta(days=i + split_size - 1) - if split_end > dt_end: - split_end = dt_end - - date_start_split = split_start.strftime('%Y-%m-%d') - date_end_split = split_end.strftime('%Y-%m-%d') - - # make the api call - request_url = 'https://www.ncei.noaa.gov/access/services/data/v1' - request_params = dict( - dataset = 'daily-summaries', - dataTypes = dtypes,#['PRCP', 'TMIN', 'TMAX'], - stations = station_id, - limit = 1000, - startDate = date_start_split, - endDate= date_end_split, - units='metric', - format='json' - ) - r = requests.get( - request_url, - params=request_params, - headers={'token': Token}) - - # workaround to skip empty returns (no data within period) - try: - # load the api response as a json - d = json.loads(r.text) - result = pd.DataFrame(d) - except json.JSONDecodeError: - print(f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping.") - result = None - return result - - -def assign_numeric_datatypes(df): - for col in df.columns: - if df[col].dtype == 'object': - try: - df[col] = pd.to_numeric(df[col]) - except: - pass - return df \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a8aea2d..b2d2c8a 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=75.0", "wheel"] +requires = ["setuptools>=42", "wheel"] build-backend = "setuptools.build_meta" [project] @@ -9,11 +9,11 @@ description = "Package to plot fancy climate/weather data of NOAA" authors = [ { name = "Ingmar Nitze", email = "ingmar.nitze@awi.de" } ] +license = { text = "" } readme = "README.md" # Specify a README file if available # homepage = "https://github.com/initze/noaaplotter" keywords = ["climate", "weather", "NOAA", "plotting"] -requires-python = ">=3.10" dependencies = [ "pandas>=2.2", "numpy>=2.2", @@ -25,10 +25,10 @@ dependencies = [ ] [tool.setuptools.packages.find] -where = ["noaaplotter"] +include = ["noaaplotter*"] [project.scripts] -plot_daily = "scripts.plot_daily:main" -plot_monthly = "scripts.plot_monthly:main" -download_data = "scripts.download_data:main" -download_data_ERA5 = "scripts.download_data_ERA5:main" \ No newline at end of file +plot_daily = "noaaplotter.scripts.plot_daily:main" # Adjust if necessary +plot_monthly = "noaaplotter.scripts.plot_monthly:main" # Adjust if necessary +download_data = "noaaplotter.scripts.download_data:main" # Adjust if necessary +download_data_ERA5 = "noaaplotter.scripts.download_data_ERA5:main" # Adjust if necessary diff --git a/scripts/download_data.py b/scripts/download_data.py deleted file mode 100644 index f459b21..0000000 --- a/scripts/download_data.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Imports -import argparse -from noaaplotter.utils import dl_noaa_api -from noaaplotter.download_utils import download_from_noaa - - -def main(): - """ - Main Function - :return: - """ - ##### Parse arguments ##### - parser = argparse.ArgumentParser(description='Parse arguments.') - - parser.add_argument('-o', dest='output_file', type=str, required=True, - default='data/data.csv', - help='csv file to save results') - - parser.add_argument('-t', dest='token', type=str, required=False, - default='', - help='NOAA API token') - - parser.add_argument('-sid', dest='station_id', type=str, required=False, - default='', - help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API') - - parser.add_argument('-loc', dest='loc_name', type=str, required=False, - default='', - help='Location name') - - parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) - - parser.add_argument('-start', dest='start_date', type=str, required=True, - help='start date of plot ("yyyy-mm-dd")') - - parser.add_argument('-end', dest='end_date', type=str, required=True, - help='end date of plot ("yyyy-mm-dd")') - - parser.add_argument('-n_jobs', dest='n_jobs', type=int, required=False, default=1, - help='number of parallel processes') - - args = parser.parse_args() - - download_from_noaa(output_file=args.output_file, start_date=args.start_date, end_date=args.end_date, \ - datatypes=args.datatypes, noaa_api_token=args.token, loc_name=args.loc_name, \ - station_id=args.station_id, n_jobs=args.n_jobs) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/download_data_ERA5.py b/scripts/download_data_ERA5.py deleted file mode 100644 index ac96b46..0000000 --- a/scripts/download_data_ERA5.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Imports -import argparse -import os - -from noaaplotter.download_utils import download_era5_from_gee - - -def main(): - """ - Main Function - :return: - """ - ##### Parse arguments ##### - parser = argparse.ArgumentParser(description='Parse arguments.') - - parser.add_argument('-o', dest='output_file', type=str, required=True, - default='data/data.csv', - help='csv file to save results') - - parser.add_argument('-lat', dest='lat', type=float, required=True, - help='Latitude of selected location') - - parser.add_argument('-lon', dest='lon', type=float, required=True, - help='Longitude of selected location') - - parser.add_argument('-loc', dest='loc_name', type=str, required=False, - default='', - help='Location name') - - parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) - - parser.add_argument('-start', dest='start_date', type=str, required=True, - help='start date of plot ("yyyy-mm-dd")') - - parser.add_argument('-end', dest='end_date', type=str, required=True, - help='end date of plot ("yyyy-mm-dd")') - - args = parser.parse_args() - - # remove file if exists - if os.path.exists(args.output_file): - os.remove(args.output_file) - - download_era5_from_gee(latitude=args.lat, - longitude = args.lon, - end_date= args.end_date, - start_date = args.start_date, - output_file = args.output_file) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/download_data_SST.py b/scripts/download_data_SST.py deleted file mode 100644 index 06aa660..0000000 --- a/scripts/download_data_SST.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Imports -import argparse -import csv -from datetime import datetime -import numpy as np -import os -import pandas as pd -import tqdm -from joblib import delayed, Parallel -from noaaplotter.utils import dl_noaa_api -import ee -import geemap - -def main(): - """ - Main Function - :return: - """ - ##### Parse arguments ##### - parser = argparse.ArgumentParser(description='Parse arguments.') - - parser.add_argument('-o', dest='output_file', type=str, required=True, - default='data/data.csv', - help='csv file to save results') - - parser.add_argument('-lat', dest='lat', type=float, required=True, - help='Latitude of selected location') - - parser.add_argument('-lon', dest='lon', type=float, required=True, - help='Longitude of selected location') - - parser.add_argument('-loc', dest='loc_name', type=str, required=False, - default='', - help='Location name') - - #parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) - - parser.add_argument('-start', dest='start_date', type=str, required=True, - help='start date of plot ("yyyy-mm-dd")') - - parser.add_argument('-end', dest='end_date', type=str, required=True, - help='end date of plot ("yyyy-mm-dd")') - - args = parser.parse_args() - - # remove file if exists - if os.path.exists(args.output_file): - os.remove(args.output_file) - - ee.Initialize() - - EE_LAYER = "NOAA/CDR/OISST/V2_1" - - location = ee.Geometry.Point([args.lon, args.lat]) - - # load ImageCollection - col = ee.ImageCollection(EE_LAYER).filterBounds(location).filterDate(args.start_date, args.end_date).select('sst') - - # Download data - print("Start downloading NOAA CDR OISST v02r01 data.") - print("Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min") - - out_dict = geemap.extract_pixel_values(col, location, getInfo=True) - df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T - - # parse dates and values - df_gee['time'] = df_gee[0].apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}') - df_gee['feature'] = df_gee[0].apply(lambda x: x[9:]) - df_gee['value'] = df_gee[1] - - df = df_gee.pivot_table(values='value', columns=['feature'], index='time')#.reset_index(drop=False) - - # #### recalculate values - df_new = pd.DataFrame(index=df.index) - - temperature_cols = ['sst'] - #precipitation_cols = ['total_precipitation'] - df_joined = df_new.join(df[temperature_cols]*0.01)#.join(df[precipitation_cols] *1e3).reset_index(drop=False) - - # Create Output - df_joined.reset_index(drop=False, inplace=True) - rename_dict = {'time': 'DATE', 'sst': 'TMAX'} - df_renamed = df_joined.rename(columns=rename_dict) - df_renamed['NAME'] = '' - df_renamed['STATION'] = '' - df_renamed['SNWD'] = '' - df_renamed['PRCP'] = '' - df_renamed['TAVG'] = df_renamed['TMAX'] - df_renamed['TMIN'] = df_renamed['TMAX'] - - output_cols = ["STATION","NAME","DATE","PRCP","SNWD","TAVG","TMAX","TMIN"] - df_save = df_renamed[output_cols].astype(str) - - df_save.to_csv(args.output_file, index=False) - - -if __name__ == "__main__": - main() diff --git a/scripts/plot_daily.py b/scripts/plot_daily.py deleted file mode 100644 index b4dafd2..0000000 --- a/scripts/plot_daily.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -from noaaplotter.noaaplotter import NOAAPlotter -import argparse - -def main(): - """ - Main Function - :return: - """ - ##### Parse arguments ##### - parser = argparse.ArgumentParser(description='Parse arguments.') - - parser.add_argument('-infile', dest='infile', type=str, required=True, - default='data/temp.csv', - help='input file with climate data') - - parser.add_argument('-t', dest='token', type=str, required=False, - default='', - help='NOAA API token, only if loading through NOAA API') - - parser.add_argument('-sid', dest='station_id', type=str, required=False, - default='', - help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API') - - parser.add_argument('-start', dest='start_date', type=str, required=True, - help='start date of plot ("yyyy-mm-dd")') - - parser.add_argument('-end', dest='end_date', type=str, required=True, - help='end date of plot ("yyyy-mm-dd")') - - parser.add_argument('-loc', dest='location', required=False, - type=str, default=None, - help='Location name, must be in data file') - - parser.add_argument('-save_plot', dest='save_path', type=str, required=False, - default=None, - help='filepath for plot') - - parser.add_argument('-t_range', dest='t_range', type=float, nargs=2, required=False, - default=[None, None], - help='temperature range in plot') - - parser.add_argument('-p_range', dest='p_range', type=float, required=False, - default=None, - help='maximum precipitation value in plot') - - parser.add_argument('-s_range', dest='s_range', type=float, required=False, - default=None, - help='maximum snow accumulation value in plot') - - parser.add_argument('-snow_acc', dest='snow_acc', required=False, - default=False, action='store_true', - help='show snow accumulation, only useful for plotting winter season (e.g. July to June') - - parser.add_argument('-filtersize', dest='filtersize', type=int, required=False, - default=7, - help='parameter to smooth climate temperature series by n days for smoother visual appearance. ' - 'default value: 7') - - parser.add_argument('-dpi', dest='dpi', type=float, required=False, - default=100, - help='dpi for plot output') - - parser.add_argument('-plot', dest='show_plot', required=False, - default=False, action='store_true', - help='Location name, must be in data file') - - parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, - default=[9, 6], - help='figure size in inches width x height. 15 10 recommended for 1 year, 30 10 for 2 years ...') - - parser.add_argument('-title', dest='title', type=str, required=False, - default=None, - help='Plot title') - - args = parser.parse_args() - - ##### Download from NOAA ##### - - ##### Run Plotting function ##### - n = NOAAPlotter(args.infile, - location=args.location, - climate_filtersize=args.filtersize) - - n.plot_weather_series(start_date=args.start_date, - end_date=args.end_date, - show_snow_accumulation=args.snow_acc, - #kwargs_fig={'dpi':args.dpi, 'figsize':args.figsize}, - plot_extrema=True, - show_plot=args.show_plot, - save_path=args.save_path, - plot_tmin=args.t_range[0], - plot_tmax=args.t_range[1], - plot_pmax=args.p_range, - plot_snowmax=args.s_range, - dpi=args.dpi, - figsize=args.figsize, - title=args.title) - -if __name__ == "__main__": - main() diff --git a/scripts/plot_monthly.py b/scripts/plot_monthly.py deleted file mode 100644 index 3a3b52a..0000000 --- a/scripts/plot_monthly.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -from noaaplotter.noaaplotter import NOAAPlotter -import argparse - -def main(): - """ - Main Function - :return: - """ - ##### Parse arguments ##### - parser = argparse.ArgumentParser(description='Parse arguments.') - - parser.add_argument('-infile', dest='infile', type=str, required=True, - help='input file with climate data') - - parser.add_argument('-start', dest='start_date', type=str, required=True, - help='start date of plot ("yyyy-mm-dd")') - - parser.add_argument('-end', dest='end_date', type=str, required=True, - help='end date of plot ("yyyy-mm-dd")') - - parser.add_argument('-loc', dest='location', required=False, - type=str, default=None, - help='Location name, must be in data file') - - parser.add_argument('-save_plot', dest='save_path', type=str, required=False, - default=None, - help='filepath for plot') - - parser.add_argument('-type', dest='type', type=str, required=True, - help='Attribute Type: {Temperature, Precipitation}', - default='Temperature') - - parser.add_argument('-trail', dest='trailing_mean', type=int, required=False, - default=None, - help='trailing/rolling mean value in months') - - parser.add_argument('-anomaly', dest='anomaly', required=False, - default=False, action='store_true', - help='show anomaly from climate') - - parser.add_argument('-dpi', dest='dpi', type=float, required=False, - default=100, - help='dpi for plot output') - - parser.add_argument('-plot', dest='show_plot', required=False, - default=False, action='store_true', - help='Location name, must be in data file') - - parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, - default=[9, 4], - help='figure size in inches width x height. 9 4 recommended 30 years') - - args = parser.parse_args() - - ##### Run Plotting function ##### - n = NOAAPlotter(args.infile, - location=args.location) - - n.plot_monthly_barchart(args.start_date, - args.end_date, - information=args.type, - anomaly=args.anomaly, - trailing_mean=args.trailing_mean, - show_plot=args.show_plot, - dpi=args.dpi, - figsize=args.figsize, - save_path=args.save_path) - -if __name__ == "__main__": - main() From 39fbb2b333918f2aee8275f907cd7d39cbc7efc5 Mon Sep 17 00:00:00 2001 From: Ingmar Nitze Date: Wed, 11 Dec 2024 15:49:48 +0100 Subject: [PATCH 3/3] moved files --- .../__pycache__/download_data.cpython-310.pyc | Bin 0 -> 1554 bytes .../__pycache__/plot_daily.cpython-310.pyc | Bin 0 -> 2657 bytes noaaplotter/scripts/download_data.py | 96 +++++ noaaplotter/scripts/download_data_ERA5.py | 54 +++ noaaplotter/scripts/download_data_SST.py | 100 +++++ noaaplotter/scripts/plot_daily.py | 102 +++++ noaaplotter/scripts/plot_monthly.py | 72 ++++ noaaplotter/utils/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 192 bytes .../utils/__pycache__/dataset.cpython-310.pyc | Bin 0 -> 13759 bytes .../download_utils.cpython-310.pyc | Bin 0 -> 5066 bytes .../__pycache__/plot_utils.cpython-310.pyc | Bin 0 -> 1148 bytes .../utils/__pycache__/utils.cpython-310.pyc | Bin 0 -> 2347 bytes noaaplotter/utils/dataset.py | 382 ++++++++++++++++++ noaaplotter/utils/download_utils.py | 177 ++++++++ noaaplotter/utils/plot_utils.py | 49 +++ noaaplotter/utils/utils.py | 110 +++++ 17 files changed, 1142 insertions(+) create mode 100644 noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc create mode 100644 noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc create mode 100644 noaaplotter/scripts/download_data.py create mode 100644 noaaplotter/scripts/download_data_ERA5.py create mode 100644 noaaplotter/scripts/download_data_SST.py create mode 100644 noaaplotter/scripts/plot_daily.py create mode 100644 noaaplotter/scripts/plot_monthly.py create mode 100755 noaaplotter/utils/__init__.py create mode 100644 noaaplotter/utils/__pycache__/__init__.cpython-310.pyc create mode 100644 noaaplotter/utils/__pycache__/dataset.cpython-310.pyc create mode 100644 noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc create mode 100644 noaaplotter/utils/__pycache__/plot_utils.cpython-310.pyc create mode 100644 noaaplotter/utils/__pycache__/utils.cpython-310.pyc create mode 100755 noaaplotter/utils/dataset.py create mode 100755 noaaplotter/utils/download_utils.py create mode 100755 noaaplotter/utils/plot_utils.py create mode 100755 noaaplotter/utils/utils.py diff --git a/noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc b/noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0173ecf6ac851b1d5aec194aefb920f55231436d GIT binary patch literal 1554 zcmaJ>&2QX96t}&zUhii@_(%$cPM0EKsk@s-B8LbeR;gMd=r+n0T70pbeX|>HZI785 z7sYB%aNv^v0T37dC0{x9!WorAdE?C{TLB&G{p|O9Gw);Ojd{D>FmT-Z?b})G7{(u3 zSsfNCpQFef1Tesi3{StU$c!z|G7YEz`O)wyj6n6VB{o^*2LoAiT#9Qy$92ZYHH!`4 z-zO0dz@G{p`w0(%iMB0=j-+$hN0EObC}VDd3D%kU%y??dEgeZ!pb9qBz&W$c1zU#( zvQ20~dxh=5+Bv(v!oC3;=j@v+>|5}*BG8-}@XoI$Y@SumJ#WF4GTyuJ-UW9R-Y>Zi zR=8_GRTVS14&9~KpY30aXQrw>HRldW9i@TNL}{V4Q93AV@Zp(*yLDr1xF6*=8WhL9 zAWZ0;ESac~Cnd8dn94-5S1S4XVIU-GLO#&|){gWESL>(BbM zd-sd|(e1sj#}Bu+w|91TZ|!#JlneSLS2;V**iFil=#++2s@o<^W>lRBp3P3^G9t!` z_@rl43Zcqkg!0i5%deLd)!&p)X&P}wKklC5=*4jlpzHqI8Qv@db7ct>4#L9G zHT-&=o4nRb{5e0Cg}rz`H?^CqNfsY7QN|8L5JfDaso)bPC06K&iq5{HVNF9D!RzST z<_Br$mq#R8+S1gpjxci93kUlt)s5wKhUcAC_L=Zn0f4{kwxS}HC`c4ylT^Y)d5vEk`3-2jQyjr ze_=RCW^o{Vu1=U3==$_YC?@>vPj*)I!Qy7f!Nvclya?%OL1MgWIyd+3Y_$*EfwF<*Tee~BxYOyT&x&d&GUs@Oonqbnu$ xGZjX%e^F4WF1CNUShv6LmwUPbTcztyV-8uwKGW?a^|3gWmgQ8OZyF|f`47u?ldb>& literal 0 HcmV?d00001 diff --git a/noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc b/noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27301b6d4c23ae5608ab1d59b164e96a142c8d79 GIT binary patch literal 2657 zcma)8Npl-T6rLH47He_5BzEkCq~ojtYq66!U_yvVNN@mSVkn3%Qxm&GWyT`Vs zk`rA0AE4mEKceQ!$!BgH2(Mcsd7M;HjOx94{raulU-!tbREiottzVyYZ%u33pK_3X z3>@4?;;YC&1HG@gdP~=(Z1jzm0U0oV(pnj6LiUBh=4s|f4QGuVMb+F9`e{`?E^U3a zv9aCvg`jM!EtR7ugTg7K2T1%6UDJfNtAh^4q5evHsqLy1A_G~-K^_W+Ien;|fFf#3 zP=-oMI|-)}?NmxT4KpgjESx^fCjMt&PU+@Ty0fqVCDbm$IXFMg^bTB5F&E)tDkgzT z!h|KPU=bF(23&?KhxsJ-Rk((8a^`x<;|5d{?Yk-Md$5d_C8$9i8snTzSP@y&uENcf z?Jc;iBCNrkl=gl2Ac=4;6=5AdOzOJ}A7Op>QuTcd_mhq`Msa@5{i40nMgFC}TR@sX zD#9m+nODY3W4DB(GExO;60*ooAx$Ao!-EA)y&tOg49c@er;*Me&B3RK6$HfMj#hmX zT`3ZLzO)^WJPN(GaC}c`))^Hc^VZcMn%cG*r^II6(50T>^{U<~0p)Gx1kxqS*Br0o z^l6+MzQ2hoU<bQup@M8aTqd;Mt&^q4pFP-mxc=?Vcg<#Vb!}~BZHaVz zMn3mNM4yNBf8@&5Fbxy2%*KR;ZPMtHl4Z-3_#G19;UhPe2Kdxmw+66Ot(wflQ^&HB zU0!@{WtaRJ#d&_$H~D-1gH{dQbHrdr*KE`O1T#Is$j-wnAS&ndxumph;~rkWil zS9w(h`Edos8n^eTB}LUh@+4;(jiNx*2^37ljP_w0VC+DeMyBfmt5 zu8;h<=kF7{-45NbuU3hi^qr_l3kxmc!bp)@bKfI36xN=Eh+5&3C!t4A?11b(&e`fr4q;HL;c{aD?nE>$9?M74G;#u) z;oiyw7>>KXpE_pg#pjecl#}NJA~%1)RgFV?^xboKX!nU71Qg?WZCWSEDIBiPI=O@J zg20J0$c{j!o<(WgDe*&rXFbZ+613ndDc~S9I!;#=pHGCN33NY_fQz1?N@K1^MW|^5*PGw`UDf7j;_q)cfpyA)$hQplz8UgcnXq9VG&D*Q#abx(^;SE(k*iL^?4+gR6;x|kJiT{SI zoGTlKZs@sx%I4ee-0N}%=Ux{x<_VigGk0P<@-3Pt?aJ9<%#r?0zk->|n!1VE3+Rcy zhU5kO$@lPA{EJ<~LgRvE$;vIOnr+Re+ODVM;r?Af^RbV-#Vt#%af)445%0P_gnfEX SVwX#JmkrEq6s)3dzWx`{fbtyx literal 0 HcmV?d00001 diff --git a/noaaplotter/scripts/download_data.py b/noaaplotter/scripts/download_data.py new file mode 100644 index 0000000..2c9b7ea --- /dev/null +++ b/noaaplotter/scripts/download_data.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Imports +import argparse + +from noaaplotter.utils.download_utils import download_from_noaa + + +def main(): + """ + Main Function + :return: + """ + ##### Parse arguments ##### + parser = argparse.ArgumentParser(description="Parse arguments.") + + parser.add_argument( + "-o", + dest="output_file", + type=str, + required=True, + default="data/data.csv", + help="csv file to save results", + ) + + parser.add_argument( + "-t", dest="token", type=str, required=False, default="", help="NOAA API token" + ) + + parser.add_argument( + "-sid", + dest="station_id", + type=str, + required=False, + default="", + help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API', + ) + + parser.add_argument( + "-loc", + dest="loc_name", + type=str, + required=False, + default="", + help="Location name", + ) + + parser.add_argument( + "-dt", + dest="datatypes", + type=list, + required=False, + default=["TMIN", "TMAX", "PRCP", "SNOW"], + ) + + parser.add_argument( + "-start", + dest="start_date", + type=str, + required=True, + help='start date of plot ("yyyy-mm-dd")', + ) + + parser.add_argument( + "-end", + dest="end_date", + type=str, + required=True, + help='end date of plot ("yyyy-mm-dd")', + ) + + parser.add_argument( + "-n_jobs", + dest="n_jobs", + type=int, + required=False, + default=1, + help="number of parallel processes", + ) + + args = parser.parse_args() + + download_from_noaa( + output_file=args.output_file, + start_date=args.start_date, + end_date=args.end_date, + datatypes=args.datatypes, + noaa_api_token=args.token, + loc_name=args.loc_name, + station_id=args.station_id, + n_jobs=args.n_jobs, + ) + + +if __name__ == "__main__": + main() diff --git a/noaaplotter/scripts/download_data_ERA5.py b/noaaplotter/scripts/download_data_ERA5.py new file mode 100644 index 0000000..f84400f --- /dev/null +++ b/noaaplotter/scripts/download_data_ERA5.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Imports +import argparse +import os + +from src.download_utils import download_era5_from_gee + + +def main(): + """ + Main Function + :return: + """ + ##### Parse arguments ##### + parser = argparse.ArgumentParser(description='Parse arguments.') + + parser.add_argument('-o', dest='output_file', type=str, required=True, + default='data/data.csv', + help='csv file to save results') + + parser.add_argument('-lat', dest='lat', type=float, required=True, + help='Latitude of selected location') + + parser.add_argument('-lon', dest='lon', type=float, required=True, + help='Longitude of selected location') + + parser.add_argument('-loc', dest='loc_name', type=str, required=False, + default='', + help='Location name') + + parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) + + parser.add_argument('-start', dest='start_date', type=str, required=True, + help='start date of plot ("yyyy-mm-dd")') + + parser.add_argument('-end', dest='end_date', type=str, required=True, + help='end date of plot ("yyyy-mm-dd")') + + args = parser.parse_args() + + # remove file if exists + if os.path.exists(args.output_file): + os.remove(args.output_file) + + download_era5_from_gee(latitude=args.lat, + longitude = args.lon, + end_date= args.end_date, + start_date = args.start_date, + output_file = args.output_file) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/noaaplotter/scripts/download_data_SST.py b/noaaplotter/scripts/download_data_SST.py new file mode 100644 index 0000000..a51f8d0 --- /dev/null +++ b/noaaplotter/scripts/download_data_SST.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Imports +import argparse +import csv +from datetime import datetime +import numpy as np +import os +import pandas as pd +import tqdm +from joblib import delayed, Parallel +from noaaplotter.utils. import dl_noaa_api +import ee +import geemap + +def main(): + """ + Main Function + :return: + """ + ##### Parse arguments ##### + parser = argparse.ArgumentParser(description='Parse arguments.') + + parser.add_argument('-o', dest='output_file', type=str, required=True, + default='data/data.csv', + help='csv file to save results') + + parser.add_argument('-lat', dest='lat', type=float, required=True, + help='Latitude of selected location') + + parser.add_argument('-lon', dest='lon', type=float, required=True, + help='Longitude of selected location') + + parser.add_argument('-loc', dest='loc_name', type=str, required=False, + default='', + help='Location name') + + #parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) + + parser.add_argument('-start', dest='start_date', type=str, required=True, + help='start date of plot ("yyyy-mm-dd")') + + parser.add_argument('-end', dest='end_date', type=str, required=True, + help='end date of plot ("yyyy-mm-dd")') + + args = parser.parse_args() + + # remove file if exists + if os.path.exists(args.output_file): + os.remove(args.output_file) + + ee.Initialize() + + EE_LAYER = "NOAA/CDR/OISST/V2_1" + + location = ee.Geometry.Point([args.lon, args.lat]) + + # load ImageCollection + col = ee.ImageCollection(EE_LAYER).filterBounds(location).filterDate(args.start_date, args.end_date).select('sst') + + # Download data + print("Start downloading NOAA CDR OISST v02r01 data.") + print("Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min") + + out_dict = geemap.extract_pixel_values(col, location, getInfo=True) + df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T + + # parse dates and values + df_gee['time'] = df_gee[0].apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}') + df_gee['feature'] = df_gee[0].apply(lambda x: x[9:]) + df_gee['value'] = df_gee[1] + + df = df_gee.pivot_table(values='value', columns=['feature'], index='time')#.reset_index(drop=False) + + # #### recalculate values + df_new = pd.DataFrame(index=df.index) + + temperature_cols = ['sst'] + #precipitation_cols = ['total_precipitation'] + df_joined = df_new.join(df[temperature_cols]*0.01)#.join(df[precipitation_cols] *1e3).reset_index(drop=False) + + # Create Output + df_joined.reset_index(drop=False, inplace=True) + rename_dict = {'time': 'DATE', 'sst': 'TMAX'} + df_renamed = df_joined.rename(columns=rename_dict) + df_renamed['NAME'] = '' + df_renamed['STATION'] = '' + df_renamed['SNWD'] = '' + df_renamed['PRCP'] = '' + df_renamed['TAVG'] = df_renamed['TMAX'] + df_renamed['TMIN'] = df_renamed['TMAX'] + + output_cols = ["STATION","NAME","DATE","PRCP","SNWD","TAVG","TMAX","TMIN"] + df_save = df_renamed[output_cols].astype(str) + + df_save.to_csv(args.output_file, index=False) + + +if __name__ == "__main__": + main() diff --git a/noaaplotter/scripts/plot_daily.py b/noaaplotter/scripts/plot_daily.py new file mode 100644 index 0000000..b4dafd2 --- /dev/null +++ b/noaaplotter/scripts/plot_daily.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from noaaplotter.noaaplotter import NOAAPlotter +import argparse + +def main(): + """ + Main Function + :return: + """ + ##### Parse arguments ##### + parser = argparse.ArgumentParser(description='Parse arguments.') + + parser.add_argument('-infile', dest='infile', type=str, required=True, + default='data/temp.csv', + help='input file with climate data') + + parser.add_argument('-t', dest='token', type=str, required=False, + default='', + help='NOAA API token, only if loading through NOAA API') + + parser.add_argument('-sid', dest='station_id', type=str, required=False, + default='', + help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API') + + parser.add_argument('-start', dest='start_date', type=str, required=True, + help='start date of plot ("yyyy-mm-dd")') + + parser.add_argument('-end', dest='end_date', type=str, required=True, + help='end date of plot ("yyyy-mm-dd")') + + parser.add_argument('-loc', dest='location', required=False, + type=str, default=None, + help='Location name, must be in data file') + + parser.add_argument('-save_plot', dest='save_path', type=str, required=False, + default=None, + help='filepath for plot') + + parser.add_argument('-t_range', dest='t_range', type=float, nargs=2, required=False, + default=[None, None], + help='temperature range in plot') + + parser.add_argument('-p_range', dest='p_range', type=float, required=False, + default=None, + help='maximum precipitation value in plot') + + parser.add_argument('-s_range', dest='s_range', type=float, required=False, + default=None, + help='maximum snow accumulation value in plot') + + parser.add_argument('-snow_acc', dest='snow_acc', required=False, + default=False, action='store_true', + help='show snow accumulation, only useful for plotting winter season (e.g. July to June') + + parser.add_argument('-filtersize', dest='filtersize', type=int, required=False, + default=7, + help='parameter to smooth climate temperature series by n days for smoother visual appearance. ' + 'default value: 7') + + parser.add_argument('-dpi', dest='dpi', type=float, required=False, + default=100, + help='dpi for plot output') + + parser.add_argument('-plot', dest='show_plot', required=False, + default=False, action='store_true', + help='Location name, must be in data file') + + parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, + default=[9, 6], + help='figure size in inches width x height. 15 10 recommended for 1 year, 30 10 for 2 years ...') + + parser.add_argument('-title', dest='title', type=str, required=False, + default=None, + help='Plot title') + + args = parser.parse_args() + + ##### Download from NOAA ##### + + ##### Run Plotting function ##### + n = NOAAPlotter(args.infile, + location=args.location, + climate_filtersize=args.filtersize) + + n.plot_weather_series(start_date=args.start_date, + end_date=args.end_date, + show_snow_accumulation=args.snow_acc, + #kwargs_fig={'dpi':args.dpi, 'figsize':args.figsize}, + plot_extrema=True, + show_plot=args.show_plot, + save_path=args.save_path, + plot_tmin=args.t_range[0], + plot_tmax=args.t_range[1], + plot_pmax=args.p_range, + plot_snowmax=args.s_range, + dpi=args.dpi, + figsize=args.figsize, + title=args.title) + +if __name__ == "__main__": + main() diff --git a/noaaplotter/scripts/plot_monthly.py b/noaaplotter/scripts/plot_monthly.py new file mode 100644 index 0000000..3a3b52a --- /dev/null +++ b/noaaplotter/scripts/plot_monthly.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from noaaplotter.noaaplotter import NOAAPlotter +import argparse + +def main(): + """ + Main Function + :return: + """ + ##### Parse arguments ##### + parser = argparse.ArgumentParser(description='Parse arguments.') + + parser.add_argument('-infile', dest='infile', type=str, required=True, + help='input file with climate data') + + parser.add_argument('-start', dest='start_date', type=str, required=True, + help='start date of plot ("yyyy-mm-dd")') + + parser.add_argument('-end', dest='end_date', type=str, required=True, + help='end date of plot ("yyyy-mm-dd")') + + parser.add_argument('-loc', dest='location', required=False, + type=str, default=None, + help='Location name, must be in data file') + + parser.add_argument('-save_plot', dest='save_path', type=str, required=False, + default=None, + help='filepath for plot') + + parser.add_argument('-type', dest='type', type=str, required=True, + help='Attribute Type: {Temperature, Precipitation}', + default='Temperature') + + parser.add_argument('-trail', dest='trailing_mean', type=int, required=False, + default=None, + help='trailing/rolling mean value in months') + + parser.add_argument('-anomaly', dest='anomaly', required=False, + default=False, action='store_true', + help='show anomaly from climate') + + parser.add_argument('-dpi', dest='dpi', type=float, required=False, + default=100, + help='dpi for plot output') + + parser.add_argument('-plot', dest='show_plot', required=False, + default=False, action='store_true', + help='Location name, must be in data file') + + parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, + default=[9, 4], + help='figure size in inches width x height. 9 4 recommended 30 years') + + args = parser.parse_args() + + ##### Run Plotting function ##### + n = NOAAPlotter(args.infile, + location=args.location) + + n.plot_monthly_barchart(args.start_date, + args.end_date, + information=args.type, + anomaly=args.anomaly, + trailing_mean=args.trailing_mean, + show_plot=args.show_plot, + dpi=args.dpi, + figsize=args.figsize, + save_path=args.save_path) + +if __name__ == "__main__": + main() diff --git a/noaaplotter/utils/__init__.py b/noaaplotter/utils/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/noaaplotter/utils/__pycache__/__init__.cpython-310.pyc b/noaaplotter/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa5ac97d18675176c424d7be9e74a065dde40f23 GIT binary patch literal 192 zcmd1j<>g`k0*6JB=^*+sh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o10bKeITqAVt5R zC_gJTxujS>D7CmCKd(4d-`z7fJ}5XIF5#J%o|{+{pI?%ZTBM(spO{#XlV4JjS`=?! sYJ|+!FD=Q;Db|mV&&lHNU$&f0Jozug8%>k literal 0 HcmV?d00001 diff --git a/noaaplotter/utils/__pycache__/dataset.cpython-310.pyc b/noaaplotter/utils/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d331c29b2927454d4eb0945f6ac6460e326b65ef GIT binary patch literal 13759 zcmbVTOOqT|R<6v-s>-geez%^MHEEB8-L~GwW~32D!jg=@Dr>A}uzG-`R5!c3TJ`eH zEJU6gBE88KJLaRG<_Gc=yO94|K3vQsAhvg&nZsoYn_nx*op@Kt6lr))Kab6yqIG|mN8QZqQ) zYF5qRJf-H<0?tLXsFrY^R?BJ+&Ly>1?ZbIS?Nd3cD>%=oL+VwW=e-4WSbgiO z%mZU>5q(F!CjBL4VjcnYjrQQo2^R4t+Z<^ z^I@shYeKl{v(HVhR`CJC%A3pdJ9?_bs@cwu}%%u-*3j zaL(QE0%G+1Ku7pE5H!3B=Hw2{Vu^17Be+2;#kpdImhaWqwT+$8Q*26To@DfXoSxk} zS@Uacb+WBnTV6HrPk!S0?N-zGPJVdxhWp74H@$JSxdFy%f+la}jozPa6y?u0dVeLOyRsW=pGfdlDagr> zeM+ye<6d+EvX`NZ1DQwWW5^L0gR+zDzAfBL>9`Kc22Q13cap7uTvQsKpF)II`Qo+9 z<&|uhYZF+{0CSk{G;5!AyeEdYmzlaYGMJUgLYf z7+{5uAZlCL$A&H`ik_vc8(eu#??ao{`xHe{MuURNJ}|yCR&w3X(-p3FnpHBrBRuTX z)`cG0`>ay~lX*?(hxL{QfrPD{R-3@SQ_-H&@PZW06-ZG6bS@oz%Ndlh^oGu1_BxNI zd*yoF1Isvg74(i*6ZKPVX$`(~C_WCQ)4?3beU1+a!3^A*aw<*bT)ckO5kW1l*rD0- zLrY{i@7=5Ufgf>yf$yT>(bH(c>8p))O9z*=#!C)!m?F{JKs}4gGdO%cGn+3OCGe?j zc7Ju(%^RZ#CU4*(um_n(*t9M4F}E%&R?6&NNy$X2J5~QqL1V z_>^WMUe~d7uCgww3Mbgqt_Bnd;QT8#O zrpic0whFY=?Ar%7WFt1`N4z82Ij$LiYzLnRKN_sizc*Btz}`l?{&? zDAC5EjGbzw*=z;QEf0#Q(^M&f5r)^VfC8E{>3*>aa|K+!(`exwY*w01tI6$*OfK{i z20d|eXs(=C2~{^+EzGLY1MK*)z5;t356AlNHRt;@;?74YItBGrTd#TAKj-}L)O|-& z0J`5z0m=|nPKPTI2Vhqe<&|l%+hMluH8strewB?_pd#Mi@kE7%dFbaxB?zYju3qblk9R5KxhjW&3*f8-oZ(2V;oGTfYF>hGk zSl@Kt*6f6c$`+Q=h&!_vWy!9Hk?u|hS?Untxdx*N*)sWjn#kD$5kAnRu1O6XLeuku-i7W-fXg7iaS;aRd_&b zwi1<(>+Jc#af_w^nd^}zNg=~n@RQLUqLU(m7@EpJGRl$N)p%0BVn4;&&j7rNX zOsShY+Bkkxg$2^fy>wsmg8SW+$l^%f4F z7Jh$jzj4TT0oP~9<(*7S2UsIwDs#gmK~+{+4^zU$x@I*cP_5j za8FlYG@)4-=`^4e=u3*DhZymsiY) zN)}%=mO9I%3=gU*C;uYqKlS}56NMUrMA7qTNec0Oq5882Fz$jxH^6P+9IN|ry1Xli z_Xp1+iYc5c-YwmsVLNj+B1@tt(}zZAqEF-9&vK{E96xiG>u01fSZN;PcUkKXc?PIJDyDj+j0>T{(S!n9FoiH#<|B*akx9F61i2j;Hc-dXuMu)9zXQn> zI>CQVJZ1XQ@RoaC3EW(Isq_M1HsEeKzYO&-;N~hHR z6Azil?@QhJv6q!!I7011C#HA z$MP)_YOY zIp$wDg0Pd3)`?5IM_QVErq8lr+z5+MYIa~6YgM<^tZ#?2lD2VGYuk?DD@$ZE*ezSGA_ZI8=Cm&o7?Y6GAT{wX& zbK(qzQ|W@j!Ulq`_N{Gwj#K|lHe3WfNfl->pJ-}XyegVQq1vi<8ckncCG2rFzk_DQ z3{ABj<{>_<&ai_SF3@;2qD`RG1^!R*j87$H+eeJvUs3umBu^tK2jq$Rib4Mee9spH z^0Z~%hct*qKyai?q=@M{n2@H}=VD@6cnx8Y%8A4k0$UXzaZ}*6NK!^&ihVGVq~ug2 zDLp*XF%0=Sl9W-H1P@^#DV&3f9djA7sFo>_TUoV7?R`ke+}AyxO6IvS zLN}bd7}(AvaJa$kr1Qko9?gwHu<#=&wFi7fwk&SQ*K`Mv-OnOh0zowH4J&i|e7(}R zr7G|LGtm)xIn%wotN)R5+_snqSfLZ=VT3Dn|9x~KjN%XKUjxc8ca01m>4b|ISSv;{ zPx)V)iu#G{uh}B|khi#($UHA0lucGy*cw~rPdJSX$N_-^MY$Wh&Q=(P){wtuhrF5&kvPblb zzD67pCroP(P7;#9%DWflD(yB@VIDDJ$MZG)v>~;lKjzqVHq?lkL8+)6{Rw-ba>V&D zo9!qR7TuH8T&-S3vsQ`{vo06o4=p5I9Nofy#6$ieG6s~HsQZ?WOtX(r_KJRAC1 znt{4zkgAv1P!KeIX-!S4FQ5raV&kev%x<=n{#}l5u<7}S(qo!h(|aLu$xquE%P@0{ z`-?xf_t|-S#x9`E*^4+&+5DAEyJ(n8vxYob#ghX+vfTw7J~MM^Rfp>qh@g~s5v!{v#l1?BFn< zivOM!&zw4a>iFq1$KN^qHMRe0mCHbJ2S(Lhp+9zS}>i=$`mAVzSrJFo?= zC55b<=^YFw_${Lu-w5vPK7|RNZ7uY9#hmB_AvkBOA*z~ws z7@E@a?>HopRV)0S+Pv6H|80PfKOSsD3&gobCMyO>dcf?EFiA}Kl=2^u zp+^k#%qTL9NEQ)BqC+H);V;05t1L4WOeRzM7k~-~5*7Q3#y#pUcszwq(h{jrlZ(TqIy6@ApQBOiOL(%%831uFRb5G0>-kO{PY(LJ^sf%~oU070qi>0Jpy=|_>E%Girg z>~>b43gYh(wj|p+4XsWB1BQYKG83938~9@3k>m<-&P%QfdOyk}P74G*vA&%3c1pv3 z0fo2V=vKYXM4{|Kq;pa0OHC#|Y{F+zG^c)}h0jz!)Qksa&H35}SM z?_-s+Gv$>+W!f+pGX1F)>>SuF% zQEZ#V?vzaP7yGl;|8?KmZI^n^(!d^(>3N8jU|su(*}XZIesT6yd;B7zpK6@5Q*AZc zbuaLmD9xsikFfA_>fI%`QMnDzQPA(_O2t(eQ}P&gIr^T%cMQ6TMd%irHk#FyqTb|1 zjSUM;QRqYmN7Gzusu@kz*QBw2$foBmN>6;o25$Tt4xf>bohh33Ap|@{d)b~UO>5q* z;u!d4adiQQ&k!~ZboRs_24>?A1FiVOKqPqN!@zv}VPGNtFwl-a44jHT3@oZ;*rB}x zANENKp}svxnjR@ut1SH|KlVvnk;^mah#wVwl!wm&;#en(UKHzC66-($DSh!ra|Ril z?smA4#L4c%K_C9ykdOy(HS6&kf)2kzA-v5B+s=fz&17pYsE-JVWgM zkG{q(dv~u~|1l@{cxrG{S_B0JjeZclRD$H5f(9Bn%8go!FPS@fd-AaUB0JoGr2 z1P@`DhbByvhwkAi#_yu@%slism;?`DqKZxAUjKzaM;YlJ#(vkD8EIln9PniJv+-N! z`rL1P9WCznN2z(~x9+Kzhp%{sYdFvGsUq$@i*1r@r@qLZNa!W@-eYr_&2xQXsch(v z09RgF=u2(>HJ^OR^yP6r%dDkl0r5$;=O^P!aC!+5=X+{MZ-j?ZQVdj5q9n>kWNs=N zWbEKW_x_l}WG1uw>${5F^Y9M{;D7}D`B({npVwDe0WZ*@HZhv-)eu!gZ( zf-bafvg#t5>LP((PDy<&uFeEarHJ<>)fF{WI2Y^QB+}M@LyUvV3J;9xk9eD0EY{}) zQA{xQnoK($F$Yx?LVmM*?&UdQWJB=EUhJcPlFoOWOZX6X?{bdf9Td99cb!LnOv7*D z(%57bUu-@r?scJkJ}OVWPwGYSP?7&J10R5z Mna}6XEdSB}1I1v*9{>OV literal 0 HcmV?d00001 diff --git a/noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc b/noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e14e75f7eb3427a674c5cb1baf4c18d50a41876f GIT binary patch literal 5066 zcmaJ_TW{RP73Pp!F898=*p}=#*`$e^ZDYw!Y9KWf!%C#2N*vj>)VL;)67&wW=9){c zhqJP^;1(_H!s>&N_Blx*73hQiKz|G$`qG#B9|XD7{mxL56*$cbJaeBpJaf+ZJ}R!& z${K#3{^Mr%pT{-r-_$tzGtjt))|=r(f6|#`TB*RR+~UQD%$edg zFFn+pXzU0x>C2$+XC;!{52Z)l*F52cp&w>Nba`7oZ?kO1 zOOl`)xzVuii=g9TsVBDve)7b`ZXKKU-2Y9ZEaD-PB!6R?Cbb^h)l+SciHg*A4XQzD zwrg^o8xOT@ZAp8;_So2#gSKHOEX%pIIc*0x5Nvwyp0v7u+N_j z^i;T&R9g2_ zPJh70HC`cY(i*kLHQCs0rZs3)mlJ6nJ&m-{n}l&|4>exrjU7F0@aBSs-YGu8CwDMI zHhR-|j%}OUj8E<8yxC=ZdPg5O(X=8VWH>oEtyE;FHy^r$~ zJLY&Io#@S`6Y2~T<;(&!Mmf7*o?Otn8b9@SHl9o;(#ctEJe5xEo|Ja)6xRZ>=M=2* zLaKePeW{a8(y1O=d_tb?y~t1R7`Ouq4)NmZ*X&FO_jLHvQKM^J!;`?UVXlWTVP~9|ZiJ#~PeRpF5Z7T#6D_De-kMA0)wm&ksc0^E)z`zwRf4I7uc`ez)IBZd%}(7ZC}hsvF8oKSV~{G=PtZ?v^_tRL72?*_+Avo9(Pr9Zm^Y= z7s4Qsowz@^)FRdCJ&tRmGlw%bMBI02=edP2?s#Ex36+B-U)!(UJo{Sz>}&kVf8e`< zEHZj|Y5$ZU>bB@*Tk^Juhuzz))vJq(t;K6A2o5W9d-1L0IUAA@-XcX_hxjAeurX#* zlkm2bZEM?wZJj)2+jvm?L}S`rB#JfdF+*&M0#;-d9x82JZxB4z1!bU&1&^()bYS)* zv;BxuH2PVcUW}%?GQQ(SSveVmfpn8#}=LQh%lN@4Rx@*HaeZ0Pi?t0w8#y)d zhaibJV$t_xW+zf?P$qoLGIsx&3kzi+dm;4tx43udB9xc++E$jn|GMB*n{mYGQ`WM-}| ze!QHS>x-X!keO?%HVv%)|GkrZ@MnJMq`ex?t2W_9AI<4IvrcflJB!mX?# z{KS{8(k3e{A>`f{Uf*}h|0(Wq0w)mG*R2frSS{>23s}FHT~mI6;E)U1lRaxDk#Or>KuyS2Q7jy$bbQ z91J4?LJ?pTTA76!uFyd)AVWBnJo=QrScTVsnu$x<1_^S*vH6BO2sUF0dVqI83x{=m z?vyuz$O~QgL82z9X6}r~NDbwTlh6CzBnFcYA*z)!o7Ss(m6i2Ly{_B(3#@6{-`0y| z!`4k^v+~obd6G>s6OXR`tZ&PfjgkDX$?9rUtYW(kVfn$24xu#k9-ic15c}9}at6#U z?zal6Wv4|6tmpa;JE4tBc`LO}fV|8c^b)e$cT906wa37CFapA@N?P8pRk;ZyF7QIi zQk_s9xD8zAr5zi%Z1sw1Ev@r%Y6Of|c8qajR40Z2OfF-ermXB%Q{d}(;s%g&4S53S zSlfyivMo45ombP!=Nhl2Rml361wu*Q7*E_(v+rr`=ID)b3%`wF)S;Z&l5y+6^exIB zhcKX~<=KUaU&1ot6^POK+fojai}UmM?%kVAXpCt2%FE#_z=E+Cq zH_wk6+zZ03v&pazGz|PiG_a9l^?kTp2iYKrqqYr_n7b5Q2bFcAVh$3R%IwG%It+pGYLMEUTA>JkdK-I*H5Sht?4sxyV z?+$&+DMlABc9lj7gyu*&(!VdoC0g(e>QP#k_i&@(6BNA6=;eLGPvr2ieQv5d60@dxRUZ;Mr#eN$JN~!J+ zMd;KI8oBi{CNhS+h8#8wkFX#WwSuDOI-Z1Hu5Q*@T|bVHGZCm|W*T`Y9+lQt(`DVD zP&TdlH#%!F^QmQ+O#eph)ghK}@FR0j9#x_FJ3tQM;2zwZvvC1|se^a}7rVg2v4t@+ zEdU=2sRe1JMyjg-E^8lUKbkf+t%DWrnhti~s22<53&M{8;tt==@B=EsPR6OraKe3qZ&r@knFgoB!c ze7}IP34R(>`bAJ_Dqq?!TZ&S@{ESlPgq?_WZzeUdFK9n33Yyk_Wp&hCe(&QO@6Rt^ zU%W8Cw77ER=IE6*3b?}*fnY%7Aevh=M+MQ`=%b~5O4;grTP^9`@mpT&-ff`JT>1Q# zxY+vrg;oN0k9bl(e{M@8=y~f|*M}iSOW70!tVrwAW!1m4f;*qz1)Rn_~*iBm){q! zl9EIq#W@mmnc_T&HzC>u;Zl8rM2Cb=f|K|uM5g8BaGp98Eu2HGJ)7x1*wqS9A@D+c zEI9TDeoSby?37J;q=FGws_q~BL zO^Mv=ND!U-e(2_wc8W;&D-jUnFvVx)9e*nkw@690E-s^VDvBJrlHf*NNKQHzX%1Xo zaWhJFVu{2D5bbHl4uOAi$bF|2#!**Qb5^JL-3n$o_VTiOW%1_nbw%ZV5I-*cKrZY`&1li$0vypPx%k#?{-fCZ_VZ&^lv-p0T_`&2xy zh=Ifdv3};#fgM7{| z2mT2sZu|y*hOeABao>s8NxNl82v_m*+VA7{dHhz1Xf&n}jHRz1yeAeyKh2^z%COjm zC$7Up5iNZ|M~IdWQG3iaxzQMdz&yL~#FY|4KKfK10ROBkw`qA$rej-(RshEVCjhHk zXi$=msRi*Q>`ejI0PBDaz$V}=z}tY+zy(d(g4l3p01fvJpyAE}8txq6kUKzh9<)Da zT1L}o=bAmb0I~722x#iv1^l1(((ugk@XWp8nfrjo!vjFjre+OZjNi8XR@dIMAYziJ zM6#oa1GtnM2#h1;dg+(lP1minQWHgn0*S~uQF=K7JZi1J0KE2htS zGLmkT^)6k$kuS93I0#Pk_zpBS)>g)klOS!~q#IH?J2H18r_ncf6?DxHyFx-gje3=Y zA|Sl4YY`U;pIs2?#j*ug&ZbM|_^}^RCz9eLb5-mdus9N7%$(PIhxp(SUnTZJFCY>N z)nn2L1tF%OViI?ruNK!%qI@1ZnOqh=i~4#YW-5trkpKNSOG4!+ZkTuU4NR4iHT+#i zR;jXR{kqzpv3{Gln1_{RU5*$gC+DHq+jfrE7tNzN$Z2aU5H#WJC43oBY(55-F-Hl~_W@)w^-dxv%c7 z6JniTD6jns5K_WhU-@(Pffrure}F0~F|%hw5`oy$&Cblu&F#*|Z;MXHH(>no^UmmI z%P@XJ;q)*;xC^iN8VEHgNsR%4*-VYh9GIlmSp!S2*#le8&cLM>wSO=M4eC%AW{)u-DTbxC_AFUD*r zMeh+4WswWk`}{#adeo2pUU-m?vY1B&Zm^dZaa^W_l#EC3e{kot+?&WG6*XNik5mid zAy|a}78fzh_Tyq*P`|PgLR&Aa+0rzS z-f^>98vEd9S+;bHxQIEtc#HQ)( zJt<4E(d!)^9!aeJ7Y~O_h@N2lAOWri9TxWvZcmpe6xq?5m}Jne2@}ch z;8wXAlROzJdn}5)>!}8s3nl?Mfq8sXGBE(C0MLQ%S5BH_39NyF%PlCLL4)NK8Rbm! zL<;5Bkfj`1?6Vwt#MR9tRAZ0DlyNb=`6T8!6#GWF4X1{mgz*9FEPk45Fbhk@lY)j* z8>2F9t%v>nq%7fs_3iG``3l7QLPOc^9Lka*fE;JfCQQI?tPz~<3fedrTcK>pzLUx< zscOQmxS*t7ZOvD;vRMoG)$%{SBCQ<+3y*WTH9u7};pre) z2=nL?t_W2wXJV4d^B~irX@unw#bvSy!4RJS3G9Fb<|X1mPx_`$Y|A9)ANLR2N1aRD zH>TS*p?~e*%_Wd+V!q*bz|9$K!faq)>ps2j&M6YccPmE_)-9bCRJ%d#Ma&ud`#Z$b309_lyBAsMdhB_zwUHHSmmqnCMGANp1 z*W@dpQWn^qkHKmzL^8@3rg-nfI 1): + raise ValueError( + 'There is more than one location in the dataset. Please choose a location using the -loc option! ' + 'Valid Location identifiers: {0} ' + .format(self.data['NAME'].unique())) + else: + filt = self.data['NAME'].str.lower().str.contains(self.location.lower()) + if filt.sum() == 0: + raise ValueError('Location Name is not valid! Valid Location identifiers: {0}' + .format(self.data['NAME'].unique())) + + def _update_datatypes(self): + """ + define 'DATE' as datetime + :return: + """ + self.data['DATE'] = pd.to_datetime(self.data['DATE']) + + def _get_datestring(self): + """ + write specific date formats + :return: + """ + self.data['DATE_MD'] = self.data['DATE'].dt.strftime('%m-%d') + self.data['DATE_YM'] = self.data['DATE'].dt.strftime('%Y-%m') + self.data['DATE_M'] = self.data['DATE'].dt.strftime('%m') + + def _get_tmean(self): + """ + calculate mean daily temperature from min and max + :return: + """ + # TODO: check for cases where TMIN and TMAX are empty (e.g. Schonefeld). There TAVG is the main field + self.data['TMEAN'] = self.data[['TMIN', 'TMAX']].mean(axis=1) + + def _remove_feb29(self): + """ + Function to remove February 29 from the data + :return: + """ + if self.remove_feb29: + self.data = self.data[self.data['DATE_MD'] != '02-29'] + + def _filter_to_location(self): + """ + Filter dataset to the defined location + :return: + """ + if self.location: + filt = self.data['NAME'].str.lower().str.contains(self.location.lower()) + if len(filt) > 0: + self.data = self.data.loc[filt] + else: + raise ValueError('Location Name is not valid') + + def filter_to_climate(self, climate_start, climate_end): + """ + Function to create filtered dataset covering the defined climate normal period + :return: + """ + df_clim = self.data[(self.data['DATE'] >= climate_start) & (self.data['DATE'] <= climate_end)] + return df_clim + + @staticmethod + def get_monthly_stats(df): + """ + calculate monthly statistics + :param df: + :type df: pandas.DataFrame + :return: + """ + df_out = pd.DataFrame() + df_out['tmean_doy_mean'] = df[['DATE', 'TMEAN']].groupby(df['DATE_YM']).mean(numeric_only=numeric_only).TMEAN + df_out['tmean_doy_std'] = df[['DATE', 'TMEAN']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMEAN + df_out['tmax_doy_max'] = df[['DATE', 'TMAX']].groupby(df['DATE_YM']).max(numeric_only=numeric_only).TMAX + df_out['tmax_doy_std'] = df[['DATE', 'TMAX']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMAX + df_out['tmin_doy_min'] = df[['DATE', 'TMIN']].groupby(df['DATE_YM']).min(numeric_only=numeric_only).TMIN + df_out['tmin_doy_std'] = df[['DATE', 'TMIN']].groupby(df['DATE_YM']).std(numeric_only=numeric_only).TMIN + if 'SNOW' in df.columns: + df_out['snow_doy_mean'] = df[['DATE', 'SNOW']].groupby(df['DATE_YM']).mean(numeric_only=numeric_only).SNOW + df_out['prcp_sum'] = df[['DATE', 'PRCP']].groupby(df['DATE_YM']).sum(numeric_only=numeric_only).PRCP + return df_out + + @staticmethod + def get_monthy_climate(df): + """ + :param df: + :return: + """ + df_out = pd.DataFrame() + df = df.data + df['Month'] = df.reset_index().apply(lambda x: int(x['DATE_MD'][:2]), axis=1).values + df_out['tmean_mean'] = df[['Month', 'TMEAN']].groupby(df['Month']).mean(numeric_only=numeric_only).TMEAN + df_out['tmean_std'] = df[['Month', 'TMEAN']].groupby(df['Month']).std(numeric_only=numeric_only).TMEAN + df_out['tmax_max'] = df[['Month', 'TMAX']].groupby(df['Month']).max(numeric_only=numeric_only).TMAX + df_out['tmax_std'] = df[['Month', 'TMAX']].groupby(df['Month']).std(numeric_only=numeric_only).TMAX + df_out['tmin_min'] = df[['Month', 'TMIN']].groupby(df['Month']).min(numeric_only=numeric_only).TMIN + df_out['tmin_std'] = df[['Month', 'TMIN']].groupby(df['Month']).std(numeric_only=numeric_only).TMIN + if 'SNOW' in df.columns: + df_out['snow_mean'] = df[['Month', 'SNOW']].groupby(df['Month']).mean(numeric_only=numeric_only).SNOW + unique_years = len(np.unique(df.apply(lambda x: parse_dates_YM(x['DATE_YM']).year, axis=1))) + df_out['prcp_mean'] = df[['Month', 'PRCP']].groupby(df['Month']).mean(numeric_only=numeric_only).PRCP * unique_years + return df_out.reset_index(drop=False) + + +class NOAAPlotterDailyClimateDataset(object): + # TODO: make main class sub subclasses for daily/monthly + def __init__(self, daily_dataset, start='1981-01-01', end='2010-12-31', filtersize=7, impute_feb29=True): + """ + :param start: + :param end: + :param filtersize: + :param impute_feb29: + """ + self.start = parse_dates(start) + self.end = parse_dates(end) + self.filtersize = filtersize + self.impute_feb29 = impute_feb29 + self.daily_dataset = daily_dataset + self.data_daily = None + self.data = None + self.date_range_valid = False + + # validate date range + self._validate_date_range() + # filter daily to date range + self._filter_to_climate() + # calculate daily statistics + self._calculate_climate_statistics() + # mean imputation for 29 February + self._impute_feb29() + # filter if desired + self._run_filter() + # make completeness report + + def _validate_date_range(self): + if self.daily_dataset.data['DATE'].max() >= self.end: + if self.daily_dataset.data['DATE'].min() <= self.end: + self.date_range_valid = True + else: + raise ('Dataset is insufficient to calculate climate normals!') + + def _filter_to_climate(self): + """ + calculate climate dataset + :return: + """ + df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & + (self.daily_dataset.data['DATE'] <= self.end)] + df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] + self.data_daily = df_clim + + def _calculate_climate_statistics(self): + """ + Function to calculate major statistics + :param self.data_daily: + :type self.data_daily: pandas.DataFrame + :return: + """ + df_out = pd.DataFrame() + df_out['tmean_doy_mean'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).mean(numeric_only=numeric_only).TMEAN + df_out['tmean_doy_std'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).std().TMEAN + df_out['tmean_doy_max'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).max(numeric_only=numeric_only).TMEAN + df_out['tmean_doy_min'] = self.data_daily[['DATE', 'TMEAN']].groupby(self.data_daily['DATE_MD']).min(numeric_only=numeric_only).TMEAN + df_out['tmax_doy_max'] = self.data_daily[['DATE', 'TMAX']].groupby(self.data_daily['DATE_MD']).max(numeric_only=numeric_only).TMAX + df_out['tmax_doy_std'] = self.data_daily[['DATE', 'TMAX']].groupby(self.data_daily['DATE_MD']).std().TMAX + df_out['tmin_doy_min'] = self.data_daily[['DATE', 'TMIN']].groupby(self.data_daily['DATE_MD']).min(numeric_only=numeric_only).TMIN + df_out['tmin_doy_std'] = self.data_daily[['DATE', 'TMIN']].groupby(self.data_daily['DATE_MD']).std().TMIN + if 'SNOW' in self.data_daily.columns: + df_out['snow_doy_mean'] = self.data_daily[['DATE', 'SNOW']].groupby(self.data_daily['DATE_MD']).mean(numeric_only=numeric_only).SNOW + self.data = df_out + + def _impute_feb29(self): + """ + Function for mean imputation of February 29. + :return: + """ + if self.impute_feb29: + self.data.loc['02-29'] = self.data.loc['02-28':'03-01'].mean(axis=0) + self.data.sort_index(inplace=True) + + def _run_filter(self): + """ + Function to run rolling mean filter on climate series to smooth out short fluctuations + :return: + """ + if self.filtersize % 2 != 0: + data_roll = pd.concat([self.data.iloc[-self.filtersize:], + self.data, + self.data[:self.filtersize]]).rolling(self.filtersize).mean() + self.data = data_roll[self.filtersize: -self.filtersize] + + def _make_report(self): + """ + Function to create report on climate data completeness + :return: + """ + # input climate series (e.g. 1981-01-01 - 2010-12-31) + pass + + +class NOAAPlotterMonthlyClimateDataset(object): + def __init__(self, daily_dataset, start='1981-01-01', end='2010-12-31', impute_feb29=True): + self.daily_dataset = daily_dataset + self.monthly_aggregate = None + self.start = parse_dates(start) + self.end = parse_dates(end) + self.impute_feb29 = impute_feb29 + self._validate_date_range() + + def _validate_date_range(self): + if self.daily_dataset.data['DATE'].max() >= self.end: + if self.daily_dataset.data['DATE'].min() <= self.end: + self.date_range_valid = True + else: + raise ('Dataset is insufficient to calculate climate normals!') + + def _filter_to_climate(self): + """ + calculate climate dataset + :return: + """ + df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & + (self.daily_dataset.data['DATE'] <= self.end)] + df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] + self.data_daily = df_clim + + def filter_to_date(self): + """ + calculate climate dataset + :return: + """ + df_clim = self.daily_dataset.data[(self.daily_dataset.data['DATE'] >= self.start) & + (self.daily_dataset.data['DATE'] <= self.end)] + df_clim = df_clim[(df_clim['DATE_MD'] != '02-29')] + return df_clim + + def _impute_feb29(self): + """ + Function for mean imputation of February 29. + :return: + """ + pass + + def calculate_monthly_statistics(self): + """ + Function to calculate monthly statistics. + :return: + """ + + df_out = pd.DataFrame() + data_filtered = self.filter_to_date() + df_out['tmean_doy_mean'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['DATE_YM']).mean(numeric_only=numeric_only).TMEAN + df_out['tmean_doy_std'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMEAN + df_out['tmax_doy_max'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['DATE_YM']).max(numeric_only=numeric_only).TMAX + df_out['tmax_doy_std'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMAX + df_out['tmin_doy_min'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['DATE_YM']).min(numeric_only=numeric_only).TMIN + df_out['tmin_doy_std'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['DATE_YM']).std(numeric_only=numeric_only).TMIN + if 'SNOW' in data_filtered.columns: + df_out['snow_doy_mean'] = data_filtered[['DATE', 'SNOW']].groupby(data_filtered['DATE_YM']).mean(numeric_only=numeric_only).SNOW + df_out['prcp_sum'] = data_filtered[['DATE', 'PRCP']].groupby(data_filtered['DATE_YM']).sum(numeric_only=numeric_only).PRCP + self.monthly_aggregate = df_out + + def calculate_monthly_climate(self): + """ + Function to calculate monthly climate statistics. + :return: + """ + df_out = pd.DataFrame() + data_filtered = self.filter_to_date() + + data_filtered['DATE'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']), axis=1) + data_filtered['Month'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']).month, axis=1) + data_filtered['Year'] = data_filtered.apply(lambda x: parse_dates_YM(x['DATE_YM']).year, axis=1) + + df_out['tmean_doy_mean'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).TMEAN + df_out['tmean_doy_std'] = data_filtered[['DATE', 'TMEAN']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMEAN + df_out['tmax_doy_max'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['Month']).max(numeric_only=numeric_only).TMAX + df_out['tmax_doy_std'] = data_filtered[['DATE', 'TMAX']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMAX + df_out['tmin_doy_min'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['Month']).min(numeric_only=numeric_only).TMIN + df_out['tmin_doy_std'] = data_filtered[['DATE', 'TMIN']].groupby(data_filtered['Month']).std(numeric_only=numeric_only).TMIN + if 'SNOW' in data_filtered.columns: + df_out['snow_doy_mean'] = data_filtered[['DATE', 'SNOW']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).SNOW + df_out['prcp_sum'] = data_filtered[['DATE', 'PRCP']].groupby(data_filtered['Month']).mean(numeric_only=numeric_only).PRCP * 30 + # df_out = df_out.set_index('DATE_YM', drop=False) + self.monthly_climate = df_out + + def _make_report(self): + """ + Function to create report on climate data completeness + :return: + """ + # input climate series (e.g. 1981-01-01 - 2010-12-31) + + pass \ No newline at end of file diff --git a/noaaplotter/utils/download_utils.py b/noaaplotter/utils/download_utils.py new file mode 100755 index 0000000..39f8e98 --- /dev/null +++ b/noaaplotter/utils/download_utils.py @@ -0,0 +1,177 @@ +import csv +import datetime as dt +import json +import os +from datetime import datetime, timedelta + +import ee +import geemap +import numpy as np +import pandas as pd +import requests +import tqdm +from joblib import Parallel, delayed + +from noaaplotter.utils.utils import assign_numeric_datatypes + + +def download_from_noaa( + output_file, + start_date, + end_date, + datatypes, + loc_name, + station_id, + noaa_api_token, + n_jobs=4, +): + # remove file if exists + if os.path.exists(output_file): + os.remove(output_file) + # Make query string + dtypes_string = "&".join([f"datatypeid={dt}" for dt in datatypes]) + # convert datestring to dt + dt_start = datetime.strptime(start_date, "%Y-%m-%d") + dt_end = datetime.strptime(end_date, "%Y-%m-%d") + # calculate number of days + n_days = (dt_end - dt_start).days + # calculate number of splits to fit into 1000 lines/rows + split_size = np.floor(1000 / len(datatypes)) + # calculate splits + split_range = np.arange(0, n_days, split_size) + # Data Loading + print("Downloading data through NOAA API") + datasets_list = Parallel(n_jobs=n_jobs)( + delayed(dl_noaa_api)( + i, datatypes, station_id, noaa_api_token, start_date, end_date, split_size + ) + for i in tqdm.tqdm(split_range[:]) + ) + # drop empty/None from datasets_list + datasets_list = [i for i in datasets_list if i is not None] + + # Merge subsets and create DataFrame + df = pd.concat(datasets_list) + + df_pivot = assign_numeric_datatypes(df) + df_pivot["DATE"] = df_pivot.apply( + lambda x: datetime.fromisoformat(x["DATE"]).strftime("%Y-%m-%d"), axis=1 + ) + + df_pivot = df_pivot.reset_index(drop=False) + dr = pd.DataFrame(pd.date_range(start=start_date, end=end_date), columns=["DATE"]) + dr["DATE"] = dr["DATE"].astype(str) + df_merged = pd.concat( + [df_pivot.set_index("DATE"), dr.set_index("DATE")], + join="outer", + axis=1, + sort=True, + ) + df_merged["DATE"] = df_merged.index + df_merged["NAME"] = loc_name + df_merged["TAVG"] = None + df_merged["SNWD"] = None + final_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] + df_final = df_merged[final_cols] + df_final = df_final.replace({np.nan: None}) + print(f"Saving data to {output_file}") + df_final.to_csv(output_file, index=False, quoting=csv.QUOTE_ALL) + return 0 + + +def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size): + """ + function to download from NOAA API + """ + dt_start = dt.datetime.strptime(date_start, "%Y-%m-%d") + dt_end = dt.datetime.strptime(date_end, "%Y-%m-%d") + + split_start = dt_start + timedelta(days=i) + split_end = dt_start + timedelta(days=i + split_size - 1) + if split_end > dt_end: + split_end = dt_end + + date_start_split = split_start.strftime("%Y-%m-%d") + date_end_split = split_end.strftime("%Y-%m-%d") + + # make the api call + request_url = "https://www.ncei.noaa.gov/access/services/data/v1" + request_params = dict( + dataset="daily-summaries", + dataTypes=dtypes, # ['PRCP', 'TMIN', 'TMAX'], + stations=station_id, + limit=1000, + startDate=date_start_split, + endDate=date_end_split, + units="metric", + format="json", + ) + r = requests.get(request_url, params=request_params, headers={"token": Token}) + + # workaround to skip empty returns (no data within period) + try: + # load the api response as a json + d = json.loads(r.text) + result = pd.DataFrame(d) + except json.JSONDecodeError: + print( + f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping." + ) + result = None + return result + + +def download_era5_from_gee(latitude, longitude, end_date, start_date, output_file): + ee.Initialize() + EE_LAYER = "ECMWF/ERA5/DAILY" + location = ee.Geometry.Point([longitude, latitude]) + # load ImageCollection + col = ( + ee.ImageCollection(EE_LAYER) + .filterBounds(location) + .filterDate(start_date, end_date) + ) + # Download data + print("Start downloading daily ERA5 data.") + print( + "Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min" + ) + result = geemap.extract_pixel_values(col, region=location) + out_dict = result.getInfo() + df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T + # parse dates and values + df_gee["time"] = df_gee[0].apply(lambda x: f"{x[:4]}-{x[4:6]}-{x[6:8]}") + df_gee["feature"] = df_gee[0].apply(lambda x: x[9:]) + df_gee["value"] = df_gee[1] + df = df_gee.pivot_table( + values="value", columns=["feature"], index="time" + ) # .reset_index(drop=False) + # #### recalculate values + df_new = pd.DataFrame(index=df.index) + temperature_cols = [ + "mean_2m_air_temperature", + "minimum_2m_air_temperature", + "maximum_2m_air_temperature", + "dewpoint_2m_temperature", + ] + precipitation_cols = ["total_precipitation"] + df_joined = ( + df_new.join(df[temperature_cols] - 273.15) + .join(df[precipitation_cols] * 1e3) + .reset_index(drop=False) + ) + # Create Output + rename_dict = { + "time": "DATE", + "total_precipitation": "PRCP", + "mean_2m_air_temperature": "TAVG", + "maximum_2m_air_temperature": "TMAX", + "minimum_2m_air_temperature": "TMIN", + } + df_renamed = df_joined.rename(columns=rename_dict) + df_renamed["NAME"] = "" + df_renamed["STATION"] = "" + df_renamed["SNWD"] = "" + output_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] + df_save = df_renamed[output_cols].astype(str) + df_save.to_csv(output_file, index=False) diff --git a/noaaplotter/utils/plot_utils.py b/noaaplotter/utils/plot_utils.py new file mode 100755 index 0000000..08b591b --- /dev/null +++ b/noaaplotter/utils/plot_utils.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +######################## +# Credits here +# author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research +# contact: ingmar.nitze@awi.de +# version: 2021-09-11 + +######################## + +# TODO: move to external file +def setup_monthly_plot_props(information, anomaly): + plot_kwargs = {} + if information == 'Temperature': + plot_kwargs['cmap'] = 'RdBu_r' + plot_kwargs['fc_low'] = '#4393c3' + plot_kwargs['fc_high'] = '#d6604d' + if anomaly: + plot_kwargs['value_column'] = 'tmean_diff' + plot_kwargs['y_label'] = 'Temperature departure [°C]' + plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' + plot_kwargs['legend_label_above'] = 'Above average' + plot_kwargs['legend_label_below'] = 'Below average' + else: + plot_kwargs['value_column'] = 'tmean_doy_mean' + plot_kwargs['y_label'] = 'Temperature [°C]' + plot_kwargs['title'] = 'Monthly Mean Temperature' + plot_kwargs['legend_label_above'] = 'Above freezing' + plot_kwargs['legend_label_below'] = 'Below freezing' + + elif information == 'Precipitation': + plot_kwargs['fc_low'] = '#d6604d' + plot_kwargs['fc_high'] = '#4393c3' + if anomaly: + plot_kwargs['cmap'] = 'RdBu' + plot_kwargs['value_column'] = 'prcp_diff' + plot_kwargs['y_label'] = 'Precipitation departure [mm]' + plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' + plot_kwargs['legend_label_above'] = 'Above average' + plot_kwargs['legend_label_below'] = 'Below average' + else: + plot_kwargs['cmap'] = 'Blues' + plot_kwargs['value_column'] = 'prcp_sum' + plot_kwargs['y_label'] = 'Precipitation [mm]' + plot_kwargs['title'] = 'Monthly Precipitation' + plot_kwargs['legend_label_below'] = '' + plot_kwargs['legend_label_above'] = 'Monthly Precipitation' + return plot_kwargs \ No newline at end of file diff --git a/noaaplotter/utils/utils.py b/noaaplotter/utils/utils.py new file mode 100755 index 0000000..ad3b827 --- /dev/null +++ b/noaaplotter/utils/utils.py @@ -0,0 +1,110 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +######################## +# Credits here +# author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research +# contact: ingmar.nitze@awi.de +# version: 2020-12-09 + +######################## +import datetime as dt +from datetime import timedelta +import requests, json +import pandas as pd + + +#import datetime + + +def parse_dates(date): + """ + + :param date: + :return: + """ + if isinstance(date, str): + return dt.datetime.strptime(date, '%Y-%m-%d') + elif isinstance(date, dt.datetime) or isinstance(date, dt.date): + return date + else: + raise ('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') + + +def calc_trailing_mean(df, length, feature, new_feature): + """ + :param df: + :param length: + :param feature: + :param new_feature: + :return: + + """ + df[new_feature] = df[feature].rolling(length).mean() + return df + + +def parse_dates_YM(date): + """ + :param date: + :return: + """ + if isinstance(date, str): + return dt.datetime.strptime(date, '%Y-%m') + elif isinstance(date, dt.datetime): + return date + else: + raise('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') + + +def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size): + """ + function to download from NOAA API + """ + dt_start = dt.datetime.strptime(date_start, '%Y-%m-%d') + dt_end = dt.datetime.strptime(date_end, '%Y-%m-%d') + + split_start = dt_start + timedelta(days=i) + split_end = dt_start + timedelta(days=i + split_size - 1) + if split_end > dt_end: + split_end = dt_end + + date_start_split = split_start.strftime('%Y-%m-%d') + date_end_split = split_end.strftime('%Y-%m-%d') + + # make the api call + request_url = 'https://www.ncei.noaa.gov/access/services/data/v1' + request_params = dict( + dataset = 'daily-summaries', + dataTypes = dtypes,#['PRCP', 'TMIN', 'TMAX'], + stations = station_id, + limit = 1000, + startDate = date_start_split, + endDate= date_end_split, + units='metric', + format='json' + ) + r = requests.get( + request_url, + params=request_params, + headers={'token': Token}) + + # workaround to skip empty returns (no data within period) + try: + # load the api response as a json + d = json.loads(r.text) + result = pd.DataFrame(d) + except json.JSONDecodeError: + print(f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping.") + result = None + return result + + +def assign_numeric_datatypes(df): + for col in df.columns: + if df[col].dtype == 'object': + try: + df[col] = pd.to_numeric(df[col]) + except: + pass + return df \ No newline at end of file