Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
53eb712
initial commit
lixiliu Sep 16, 2025
ea1ce5b
clean up
lixiliu Sep 16, 2025
4e04cd0
Add time_wrap feature for tz col conversion
lixiliu Sep 17, 2025
ed297e1
Rename IndexTimeRangeLocalTime to IndexTimeRangeWithTZColumn
lixiliu Sep 17, 2025
87a116e
Rename INDEX_LOCAL to INDEX_TZ_COL
lixiliu Sep 17, 2025
d65a2b3
Rework time_zone_converter to output tz naive col
lixiliu Sep 24, 2025
38691a3
Common API
lixiliu Sep 24, 2025
db8ab6b
Revise DatetimeRangeWithTZColumn class and downstream funcs
lixiliu Oct 16, 2025
ceb6692
keep time_zone in output for convert by col
lixiliu Oct 18, 2025
977dfdd
some cleanup
lixiliu Oct 25, 2025
4d5922a
mostly changing from ZoneInfo to tzinfo as typehint
lixiliu Oct 30, 2025
329ed70
fix mypy errors
lixiliu Oct 30, 2025
63798a7
more mypy stuff
lixiliu Oct 30, 2025
aebd0a2
Add pytest for time_utils
lixiliu Oct 30, 2025
f2d245d
refactor time util funcs
lixiliu Oct 31, 2025
3d7464a
Fix pytest issue
lixiliu Oct 31, 2025
df784af
Apply suggestions from code review
lixiliu Oct 31, 2025
dda0b78
fix mypy!
lixiliu Oct 31, 2025
f95785f
Apply suggestions from code review
lixiliu Nov 7, 2025
9ac6e69
Apply code review suggestions 2
lixiliu Nov 7, 2025
191eacb
review suggestions
lixiliu Nov 8, 2025
72c886c
address comments v3
lixiliu Nov 13, 2025
5a0b441
potential parquet fix for spark hive
lixiliu Nov 18, 2025
b51571e
update sql functions
lixiliu Nov 19, 2025
177cbb3
Show error
lixiliu Nov 19, 2025
48f7ef2
temp
lixiliu Nov 19, 2025
15bb2a4
final
lixiliu Nov 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ Source = "https://github.com/NREL/chronify"
files = [
"src",
]
disable_error_code = ["no-untyped-call"]
strict = true

[tool.pytest.ini_options]
Expand Down
6 changes: 4 additions & 2 deletions src/chronify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from chronify.time_configs import (
AnnualTimeRange,
DatetimeRange,
DatetimeRangeWithTZColumn,
IndexTimeRangeNTZ,
IndexTimeRangeTZ,
IndexTimeRangeLocalTime,
IndexTimeRangeWithTZColumn,
RepresentativePeriodTimeNTZ,
RepresentativePeriodTimeTZ,
TimeBaseModel,
Expand All @@ -37,7 +38,8 @@
"ConflictingInputsError",
"CsvTableSchema",
"DatetimeRange",
"IndexTimeRangeLocalTime",
"DatetimeRangeWithTZColumn",
"IndexTimeRangeWithTZColumn",
"IndexTimeRangeNTZ",
"IndexTimeRangeTZ",
"InvalidOperation",
Expand Down
5 changes: 4 additions & 1 deletion src/chronify/annual_time_range_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ def __init__(self, model: AnnualTimeRange) -> None:
super().__init__()
self._model = model

def iter_timestamps(self) -> Generator[int, None, None]:
def _iter_timestamps(self) -> Generator[int, None, None]:
for i in range(1, self._model.length + 1):
yield i

def list_timestamps(self) -> list[int]:
return list(self._iter_timestamps())

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
raise NotImplementedError

Expand Down
5 changes: 4 additions & 1 deletion src/chronify/column_representative_time_range_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@ def __init__(self, model: ColumnRepresentativeBase):
msg = f"No time generator for ColumnRepresentative time with time_config {type(self._model)}"
raise exceptions.InvalidOperation(msg)

def iter_timestamps(self) -> Generator[tuple[int, ...], None, None]:
def _iter_timestamps(self) -> Generator[tuple[int, ...], None, None]:
yield from self._handler._iter_timestamps()

def list_timestamps(self) -> list[tuple[int, ...]]:
return list(self._iter_timestamps())

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[tuple[int, ...]]:
return self._handler.list_distinct_timestamps_from_dataframe(df)

Expand Down
13 changes: 10 additions & 3 deletions src/chronify/csv_time_series_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ def __init__(self, store: Store) -> None:
def _check_input_format(data_file: Path) -> None:
valid_extensions = [".csv"]
if data_file.suffix not in valid_extensions:
msg = f"{data_file.name} does not have a file extension in the supported extensions: {valid_extensions}"
msg = (
f"{data_file.name} does not have a file extension in the "
f"supported extensions: {valid_extensions}"
)
raise InvalidValue(msg)

@staticmethod
Expand All @@ -130,7 +133,10 @@ def _ingest_data(self, data: pd.DataFrame, table_name: str, year: int, length: i
def _create_schemas(
csv_fmt: CsvTimeSeriesFormats, name: str, year: int, length: int
) -> tuple[PivotedTableSchema | None, TableSchema]:
"""Create a PivotedTableSchema if necessary, and a TableSchema for both the time format and datetime format."""
"""
Create a PivotedTableSchema if necessary, and a TableSchema for both
the time format and datetime format.
"""
create_pivoted_schema = True
pivoted_dimension_name = "hour"
value_columns = [str(x) for x in range(1, 25)]
Expand Down Expand Up @@ -166,7 +172,8 @@ def ingest_to_datetime(
self, data_file: Path, table_name: str, data_year: int, length: int
) -> None:
"""
Given a file of csv time series data, convert the time format to datetime timestamps
Given a file of csv time series data, convert the time format to datetime
timestamps
and ingest into database
"""
self._check_input_format(data_file)
Expand Down
125 changes: 106 additions & 19 deletions src/chronify/datetime_range_generator.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,54 @@
from datetime import datetime, timedelta
from datetime import datetime, tzinfo
from typing import Generator, Optional
from zoneinfo import ZoneInfo
from itertools import chain

import pandas as pd

from chronify.time import (
LeapDayAdjustmentType,
)
from chronify.time_configs import (
DatetimeRange,
)
from chronify.time_utils import adjust_timestamp_by_dst_offset
from chronify.time_configs import DatetimeRanges, DatetimeRange, DatetimeRangeWithTZColumn
from chronify.time_utils import adjust_timestamp_by_dst_offset, get_tzname
from chronify.time_range_generator_base import TimeRangeGeneratorBase
from chronify.exceptions import InvalidValue


class DatetimeRangeGenerator(TimeRangeGeneratorBase):
"""Generates datetime ranges based on a DatetimeRange model."""
class DatetimeRangeGeneratorBase(TimeRangeGeneratorBase):
"""Base class that generates datetime ranges based on a DatetimeRange model."""

def __init__(
self,
model: DatetimeRange,
model: DatetimeRanges,
leap_day_adjustment: Optional[LeapDayAdjustmentType] = None,
) -> None:
self._model = model
self._adjustment = leap_day_adjustment or LeapDayAdjustmentType.NONE

def iter_timestamps(self) -> Generator[datetime, None, None]:
def _iter_timestamps(
self, start: Optional[datetime] = None
) -> Generator[datetime, None, None]:
"""
if start is supplied, override self._model.start
"""
if start is None:
start = self._model.start
tz = start.tzinfo

for i in range(self._model.length):
if self._model.start_time_is_tz_naive():
if not tz:
cur = adjust_timestamp_by_dst_offset(
self._model.start + i * self._model.resolution, self._model.resolution
start + i * self._model.resolution, self._model.resolution
)
else:
tz = self._model.start.tzinfo
# always step in standard time
cur_utc = (
self._model.start.astimezone(ZoneInfo("UTC")) + i * self._model.resolution
)
cur_utc = start.astimezone(ZoneInfo("UTC")) + i * self._model.resolution
cur = adjust_timestamp_by_dst_offset(
cur_utc.astimezone(tz), self._model.resolution
)

is_leap_year = (
pd.Timestamp(f"{cur.year}-01-01") + timedelta(days=365)
pd.Timestamp(f"{cur.year}-01-01") + pd.Timedelta(days=365)
).year == cur.year
if not is_leap_year:
yield pd.Timestamp(cur)
Expand All @@ -65,8 +71,89 @@ def iter_timestamps(self) -> Generator[datetime, None, None]:
):
yield pd.Timestamp(cur)

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[datetime]:
return sorted(df[self._model.time_column].unique())

def list_time_columns(self) -> list[str]:
return self._model.list_time_columns()

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[datetime]:
result = sorted(df[self._model.time_column].unique())
if not isinstance(result[0], datetime):
result = [pd.Timestamp(x) for x in result]
return result


class DatetimeRangeGenerator(DatetimeRangeGeneratorBase):
"""Generates datetime ranges based on a DatetimeRange model."""

def __init__(
self,
model: DatetimeRange,
leap_day_adjustment: Optional[LeapDayAdjustmentType] = None,
) -> None:
super().__init__(model, leap_day_adjustment=leap_day_adjustment)
assert isinstance(self._model, DatetimeRange)

def list_timestamps(self) -> list[datetime]:
return list(self._iter_timestamps())


class DatetimeRangeGeneratorExternalTimeZone(DatetimeRangeGeneratorBase):
"""Generates datetime ranges based on a DatetimeRangeWithTZColumn model.
datetime ranges will be tz-naive and can be listed by time_zone name using special class func
These ranges may be localized by the time_zone name.
# TODO: add offset as a column
"""

def __init__(
self,
model: DatetimeRangeWithTZColumn,
leap_day_adjustment: Optional[LeapDayAdjustmentType] = None,
) -> None:
super().__init__(model, leap_day_adjustment=leap_day_adjustment)
assert isinstance(self._model, DatetimeRangeWithTZColumn)
if self._model.get_time_zones() == []:
msg = (
f"DatetimeRangeWithTZColumn.time_zones needs to be instantiated for "
f"DatetimeRangeGeneratorExternalTimeZone: {self._model}"
)
raise InvalidValue(msg)

def _list_timestamps(self, time_zone: Optional[tzinfo]) -> list[datetime]:
"""always return tz-naive timestamps relative to input time_zone"""
if self._model.start_time_is_tz_naive():
if time_zone:
start = self._model.start.replace(tzinfo=time_zone)
else:
start = None
else:
if time_zone:
start = self._model.start.astimezone(time_zone)
else:
start = self._model.start.replace(tzinfo=None)
timestamps = list(self._iter_timestamps(start=start))
return [x.replace(tzinfo=None) for x in timestamps]

def list_timestamps(self) -> list[datetime]:
"""return ordered timestamps across all time zones in the order of the time zones."""
dct = self.list_timestamps_by_time_zone()
return list(chain(*dct.values()))

def list_timestamps_by_time_zone(self) -> dict[str, list[datetime]]:
"""for each time zone, returns full timestamp iteration (duplicates allowed)"""
dct = {}
for tz in self._model.get_time_zones():
tz_name = get_tzname(tz)
dct[tz_name] = self._list_timestamps(tz)

return dct

def list_distinct_timestamps_by_time_zone_from_dataframe(
self, df: pd.DataFrame
) -> dict[str, list[datetime]]:
tz_col = self._model.get_time_zone_column()
t_col = self._model.time_column
df[t_col] = pd.to_datetime(df[t_col])
df2 = df[[tz_col, t_col]].drop_duplicates()
dct = {}
for tz_name in sorted(df2[tz_col].unique()):
dct[tz_name] = sorted(df2.loc[df2[tz_col] == tz_name, t_col].tolist())
return dct
8 changes: 8 additions & 0 deletions src/chronify/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class InvalidOperation(ChronifyExceptionBase):
"""Raised when an invalid operation is requested."""


class InvalidModel(ChronifyExceptionBase):
"""Raised when an invalid model is passed."""


class InvalidParameter(ChronifyExceptionBase):
"""Raised when an invalid parameter is passed."""

Expand All @@ -22,6 +26,10 @@ class InvalidValue(ChronifyExceptionBase):
"""Raised when an invalid value is passed."""


class MissingValue(ChronifyExceptionBase):
"""Raised when an expecting value is missing."""


class MissingParameter(ChronifyExceptionBase):
"""Raised when a parameter is not found or missing."""

Expand Down
5 changes: 4 additions & 1 deletion src/chronify/index_time_range_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ def __init__(self, model: IndexTimeRangeBase) -> None:
super().__init__()
self._model = model

def iter_timestamps(self) -> Generator[int, None, None]:
def _iter_timestamps(self) -> Generator[int, None, None]:
yield from range(self._model.start, self._model.length + self._model.start)

def list_timestamps(self) -> list[int]:
return list(self._iter_timestamps())

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
return sorted(df[self._model.time_column].unique())

Expand Down
14 changes: 7 additions & 7 deletions src/chronify/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing_extensions import Annotated

from chronify.base_models import ChronifyBaseModel
from chronify.exceptions import InvalidParameter
from chronify.exceptions import InvalidParameter, InvalidValue
from chronify.time_configs import TimeConfig


Expand Down Expand Up @@ -67,7 +67,7 @@ def check_name(cls, name: str) -> str:
_check_name(name)
if name.lower() == "table":
msg = f"Table schema cannot use {name=}."
raise ValueError(msg)
raise InvalidValue(msg)
return name

@field_validator("value_column")
Expand Down Expand Up @@ -102,7 +102,7 @@ def check_column(cls, value_columns: str) -> str:
def check_time_array_id_columns(cls, value: list[str]) -> list[str]:
if value:
msg = f"PivotedTableSchema doesn't yet support time_array_id_columns: {value}"
raise ValueError(msg)
raise InvalidValue(msg)
return value

def list_columns(self) -> list[str]:
Expand All @@ -124,7 +124,7 @@ def check_name(cls, name: str) -> str:
_check_name(name)
if name.lower() == "table":
msg = f"Table schema cannot use {name=}."
raise ValueError(msg)
raise InvalidValue(msg)
return name

@field_validator("time_configs")
Expand Down Expand Up @@ -239,11 +239,11 @@ def fix_data_type(cls, data: dict[str, Any]) -> dict[str, Any]:
if val is None:
options = sorted(_COLUMN_TYPES.keys()) + list(_DB_TYPES)
msg = f"{dtype=} must be one of {options}"
raise ValueError(msg)
raise InvalidValue(msg)
data["dtype"] = val()
else:
msg = f"dtype is an unsupported type: {type(dtype)}. It must be a str or type."
raise ValueError(msg)
raise InvalidValue(msg)
return data


Expand Down Expand Up @@ -287,4 +287,4 @@ class CsvTableSchemaSingleTimeArrayPivotedByComponent(CsvTableSchema):
def _check_name(name: str) -> None:
if not REGEX_NAME_REQUIREMENT.search(name):
msg = f"A name can only have alphanumeric characters: {name=}"
raise ValueError(msg)
raise InvalidValue(msg)
13 changes: 8 additions & 5 deletions src/chronify/representative_time_range_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ def __init__(self, model: RepresentativePeriodTimeBase) -> None:
case RepresentativePeriodFormat.ONE_WEEKDAY_DAY_AND_ONE_WEEKEND_DAY_PER_MONTH_BY_HOUR:
self._handler = OneWeekdayDayAndWeekendDayPerMonthByHourHandler()

def iter_timestamps(self) -> Generator[NamedTuple, None, None]:
return self._handler.iter_timestamps()
def _iter_timestamps(self) -> Generator[NamedTuple, None, None]:
return self._handler._iter_timestamps()

def list_timestamps(self) -> list[NamedTuple]:
return list(self._iter_timestamps())

def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
columns = self._model.list_time_columns()
Expand Down Expand Up @@ -77,7 +80,7 @@ def get_time_type(self) -> str:
"""Return the time type name representing the data."""

@abc.abstractmethod
def iter_timestamps(self) -> Generator[Any, None, None]:
def _iter_timestamps(self) -> Generator[Any, None, None]:
"""Return an iterator over all time indexes in the table.
Type of the time is dependent on the class.
"""
Expand All @@ -97,7 +100,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
def get_time_type() -> str:
return OneWeekPerMonthByHour.__name__

def iter_timestamps(self) -> Generator[OneWeekPerMonthByHour, None, None]:
def _iter_timestamps(self) -> Generator[OneWeekPerMonthByHour, None, None]:
for month in range(1, 13):
for dow in range(7):
for hour in range(24):
Expand All @@ -123,7 +126,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
def get_time_type() -> str:
return OneWeekdayDayOneWeekendDayPerMonthByHour.__name__

def iter_timestamps(self) -> Generator[OneWeekdayDayOneWeekendDayPerMonthByHour, None, None]:
def _iter_timestamps(self) -> Generator[OneWeekdayDayOneWeekendDayPerMonthByHour, None, None]:
for month in range(1, 13):
for is_weekday in [False, True]:
for hour in range(24):
Expand Down
Loading