Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Literal,
Self,
TypeVar,
cast,
final,
overload,
)
Expand All @@ -38,10 +39,12 @@
from pandas.core.dtypes.common import (
ensure_str,
is_string_dtype,
is_timedelta64_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype

import pandas as pd
from pandas import (
ArrowDtype,
DataFrame,
Expand Down Expand Up @@ -71,6 +74,7 @@
)
from pandas.io.parsers.readers import validate_integer

DateUnit = Literal["s", "ms", "us", "ns"]
if TYPE_CHECKING:
from collections.abc import (
Callable,
Expand Down Expand Up @@ -222,6 +226,47 @@ def to_json(
return None


def _format_timedelta_labels(index, date_format: str, date_unit: str | None):
"""
Format TimedeltaIndex labels for JSON serialization.

Rules:
- Timedelta values → ISO 8601 (iso) or integer (epoch)
- NaT MUST stay missing so JSON encodes it as null
"""

# Fast-path: empty index
if len(index) == 0:
return index

values = index._values # ndarray[td64]
result: list[object] = []

if date_format == "iso":
for val in values:
if isna(val):
# critical: preserve missing → JSON null
result.append("null")
else:
td = pd.Timedelta(val)
result.append(td.isoformat())

else: # epoch
if date_unit is None:
unit: DateUnit = "ms"
else:
unit = cast(DateUnit, date_unit)

for val in values:
if isna(val):
result.append("null")
else:
td = pd.Timedelta(val).as_unit(unit)
result.append(int(td._value))

return Index(result, dtype=object)


class Writer(ABC):
_default_orient: str

Expand Down Expand Up @@ -287,6 +332,12 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:
def _format_axes(self) -> None:
if not self.obj.index.is_unique and self.orient == "index":
raise ValueError(f"Series index must be unique for orient='{self.orient}'")
# FIX:GH#63236 format TimedeltaIndex labels correctly before ujson_dumps
if is_timedelta64_dtype(self.obj.index.dtype):
self.obj = self.obj.copy(deep=False)
self.obj.index = _format_timedelta_labels(
self.obj.index, self.date_format, self.date_unit
)


class FrameWriter(Writer):
Expand Down Expand Up @@ -317,6 +368,29 @@ def _format_axes(self) -> None:
raise ValueError(
f"DataFrame columns must be unique for orient='{self.orient}'."
)
# FIX:GH#63236 format Timedelta labels (Index and Columns) correctly
if (
not isinstance(self.obj.index, MultiIndex)
and is_timedelta64_dtype(self.obj.index.dtype)
) or (
not isinstance(self.obj.columns, MultiIndex)
and is_timedelta64_dtype(self.obj.columns.dtype)
):
self.obj = self.obj.copy(deep=False)

if not isinstance(self.obj.index, MultiIndex) and is_timedelta64_dtype(
self.obj.index.dtype
):
self.obj.index = _format_timedelta_labels(
self.obj.index, self.date_format, self.date_unit
)

if not isinstance(self.obj.columns, MultiIndex) and is_timedelta64_dtype(
self.obj.columns.dtype
):
self.obj.columns = _format_timedelta_labels(
self.obj.columns, self.date_format, self.date_unit
)


class JSONTableWriter(FrameWriter):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1794,9 +1794,9 @@ def test_read_json_with_very_long_file_path(self, compression):
"date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
)
def test_timedelta_as_label(self, date_format, key, unit, request):
if unit != "ns":
mark = pytest.mark.xfail(reason="GH#63236 failure to round-trip")
request.applymarker(mark)
# if unit != "ns":
# mark = pytest.mark.xfail(reason="GH#63236 failure to round-trip")
# request.applymarker(mark)
df = DataFrame([[1]], columns=[pd.Timedelta("1D").as_unit(unit)])
expected = f'{{"{key}":{{"0":1}}}}'

Expand Down
Loading