diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index dda20fe8aeb21..7de1ffbacbde6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -820,6 +820,7 @@ Other API changes :meth:`~DataFrame.ffill`, :meth:`~DataFrame.bfill`, :meth:`~DataFrame.interpolate`, :meth:`~DataFrame.where`, :meth:`~DataFrame.mask`, :meth:`~DataFrame.clip`) now return the modified DataFrame or Series (``self``) instead of ``None`` when ``inplace=True`` (:issue:`63207`) +- All Index constructors now copy ``numpy.ndarray`` and ``ExtensionArray`` inputs by default when ``copy=None``, consistent with :class:`Series` behavior (:issue:`63388`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a07226ef0f50a..ecfd26a412c2d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -505,12 +505,8 @@ def __new__( if not copy and isinstance(data, (ABCSeries, Index)): refs = data._references - if isinstance(data, (ExtensionArray, np.ndarray)): - # GH 63306 - if copy is not False: - if dtype is None or astype_is_view(data.dtype, dtype): - data = data.copy() - copy = False + # GH 63306, GH 63388 + data, copy = cls._maybe_copy_array_input(data, copy, dtype) # range if isinstance(data, (range, RangeIndex)): @@ -5197,6 +5193,21 @@ def _raise_scalar_data_error(cls, data): "was passed" ) + @classmethod + def _maybe_copy_array_input( + cls, data, copy: bool | None, dtype + ) -> tuple[Any, bool]: + """ + Ensure that the input data is copied if necessary. + GH#63388 + """ + if isinstance(data, (ExtensionArray, np.ndarray)): + if copy is not False: + if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): + data = data.copy() + copy = False + return data, bool(copy) + def _validate_fill_value(self, value): """ Check if the value can be inserted into our array without casting, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a07e18b1892fd..72b009a344193 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -181,8 +181,13 @@ class DatetimeIndex(DatetimeTimedeltaMixin): If True parse dates in `data` with the year first order. dtype : numpy.dtype or DatetimeTZDtype or str, default None Note that the only NumPy dtype allowed is `datetime64[ns]`. - copy : bool, default False - Make a copy of input ndarray. + copy : bool, default None + Whether to copy input data, only relevant for array, Series, and Index + inputs (for other input, e.g. a list, a new array is created anyway). + Defaults to True for array input and False for Index/Series. + Set to False to avoid copying array input at your own risk (if you + know the input data won't be modified elsewhere). + Set to True to force copying Series/Index up front. name : label, default None Name to be stored in the index. @@ -669,7 +674,7 @@ def __new__( dayfirst: bool = False, yearfirst: bool = False, dtype: Dtype | None = None, - copy: bool = False, + copy: bool | None = None, name: Hashable | None = None, ) -> Self: if is_scalar(data): @@ -679,6 +684,9 @@ def __new__( name = maybe_extract_name(name, data, cls) + # GH#63388 + data, copy = cls._maybe_copy_array_input(data, copy, dtype) + if ( isinstance(data, DatetimeArray) and freq is lib.no_default diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 7bb64503a469e..1def317bc1a88 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -169,8 +169,13 @@ class IntervalIndex(ExtensionIndex): neither. dtype : dtype or None, default None If None, dtype will be inferred. - copy : bool, default False - Copy the input data. + copy : bool, default None + Whether to copy input data, only relevant for array, Series, and Index + inputs (for other input, e.g. a list, a new array is created anyway). + Defaults to True for array input and False for Index/Series. + Set to False to avoid copying array input at your own risk (if you + know the input data won't be modified elsewhere). + Set to True to force copying Series/Index input up front. name : object, optional Name to be stored in the index. verify_integrity : bool, default True @@ -252,12 +257,15 @@ def __new__( data, closed: IntervalClosedType | None = None, dtype: Dtype | None = None, - copy: bool = False, + copy: bool | None = None, name: Hashable | None = None, verify_integrity: bool = True, ) -> Self: name = maybe_extract_name(name, data, cls) + # GH#63388 + data, copy = cls._maybe_copy_array_input(data, copy, dtype) + with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray( data, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c3ad466a114a9..b8a25ab0da693 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -101,8 +101,13 @@ class PeriodIndex(DatetimeIndexOpsMixin): One of pandas period strings or corresponding objects. dtype : str or PeriodDtype, default None A dtype from which to extract a freq. - copy : bool - Make a copy of input ndarray. + copy : bool, default None + Whether to copy input data, only relevant for array, Series, and Index + inputs (for other input, e.g. a list, a new array is created anyway). + Defaults to True for array input and False for Index/Series. + Set to False to avoid copying array input at your own risk (if you + know the input data won't be modified elsewhere). + Set to True to force copying Series/Index input up front. name : str, default None Name of the resulting PeriodIndex. @@ -220,7 +225,7 @@ def __new__( data=None, freq=None, dtype: Dtype | None = None, - copy: bool = False, + copy: bool | None = None, name: Hashable | None = None, ) -> Self: refs = None @@ -231,6 +236,9 @@ def __new__( freq = validate_dtype_freq(dtype, freq) + # GH#63388 + data, copy = cls._maybe_copy_array_input(data, copy, dtype) + # PeriodIndex allow PeriodIndex(period_index, freq=different) # Let's not encourage that kind of behavior in PeriodArray. diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2a3d5137242d0..725ef8cae7120 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -81,8 +81,13 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): dtype : numpy.dtype or str, default None Valid ``numpy`` dtypes are ``timedelta64[ns]``, ``timedelta64[us]``, ``timedelta64[ms]``, and ``timedelta64[s]``. - copy : bool - Make a copy of input array. + copy : bool, default None + Whether to copy input data, only relevant for array, Series, and Index + inputs (for other input, e.g. a list, a new array is created anyway). + Defaults to True for array input and False for Index/Series. + Set to False to avoid copying array input at your own risk (if you + know the input data won't be modified elsewhere). + Set to True to force copying Series/Index input up front. name : object Name to be stored in the index. @@ -158,11 +163,14 @@ def __new__( data=None, freq=lib.no_default, dtype=None, - copy: bool = False, + copy: bool | None = None, name=None, ): name = maybe_extract_name(name, data, cls) + # GH#63388 + data, copy = cls._maybe_copy_array_input(data, copy, dtype) + if is_scalar(data): cls._raise_scalar_data_error(data) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 60a80f9af78c5..b4c6060b11403 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -707,7 +707,7 @@ def test_array_object_dtype(self, arr1d): def test_array_tz(self, arr1d): # GH#23524 arr = arr1d - dti = self.index_cls(arr1d) + dti = self.index_cls(arr1d, copy=False) copy_false = None if np_version_gt2 else False expected = dti.asi8.view("M8[ns]") diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index 6194ea8b122c9..ddbff4bafdeff 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -1,12 +1,15 @@ +import numpy as np import pytest from pandas import ( DatetimeIndex, Series, Timestamp, + array, date_range, ) import pandas._testing as tm +from pandas.tests.copy_view.util import get_array pytestmark = pytest.mark.filterwarnings( "ignore:Setting a value on a view:FutureWarning" @@ -54,3 +57,30 @@ def test_index_values(): idx = date_range("2019-12-31", periods=3, freq="D") result = idx.values assert result.flags.writeable is False + + +def test_constructor_copy_input_datetime_ndarray_default(): + # GH 63388 + arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]") + idx = DatetimeIndex(arr) + assert not np.shares_memory(arr, get_array(idx)) + + +def test_constructor_copy_input_datetime_ea_default(): + # GH 63388 + arr = array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]") + idx = DatetimeIndex(arr) + assert not tm.shares_memory(arr, idx.array) + + +def test_series_from_temporary_datetimeindex_readonly_data(): + # GH 63388 + arr = np.array(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]") + arr.flags.writeable = False + ser = Series(DatetimeIndex(arr)) + assert not np.shares_memory(arr, get_array(ser)) + ser.iloc[0] = Timestamp("2020-01-01") + expected = Series( + [Timestamp("2020-01-01"), Timestamp("2020-01-02")], dtype="datetime64[ns]" + ) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/copy_view/index/test_intervalindex.py b/pandas/tests/copy_view/index/test_intervalindex.py new file mode 100644 index 0000000000000..d30415d05e4e4 --- /dev/null +++ b/pandas/tests/copy_view/index/test_intervalindex.py @@ -0,0 +1,29 @@ +import numpy as np + +from pandas import ( + Interval, + IntervalIndex, + Series, + array, +) +import pandas._testing as tm +from pandas.tests.copy_view.util import get_array + + +def test_constructor_copy_input_interval_ea_default(): + # GH 63388 + arr = array([Interval(0, 1), Interval(1, 2)]) + idx = IntervalIndex(arr) + assert not tm.shares_memory(arr, idx.array) + + +def test_series_from_temporary_intervalindex_readonly_data(): + # GH 63388 + arr = array([Interval(0, 1), Interval(1, 2)]) + arr._left.flags.writeable = False + arr._right.flags.writeable = False + ser = Series(IntervalIndex(arr)) + assert not np.shares_memory(arr._left, get_array(ser)._left) + ser.iloc[0] = Interval(5, 6) + expected = Series([Interval(5, 6), Interval(1, 2)], dtype="interval[int64, right]") + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py index 2887b191038d2..5f741d123f4a8 100644 --- a/pandas/tests/copy_view/index/test_periodindex.py +++ b/pandas/tests/copy_view/index/test_periodindex.py @@ -1,12 +1,15 @@ +import numpy as np import pytest from pandas import ( Period, PeriodIndex, Series, + array, period_range, ) import pandas._testing as tm +from pandas.tests.copy_view.util import get_array pytestmark = pytest.mark.filterwarnings( "ignore:Setting a value on a view:FutureWarning" @@ -21,3 +24,24 @@ def test_periodindex(box): expected = idx.copy(deep=True) ser.iloc[0] = Period("2020-12-31") tm.assert_index_equal(idx, expected) + + +def test_constructor_copy_input_period_ea_default(): + # GH 63388 + arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]") + idx = PeriodIndex(arr) + assert not tm.shares_memory(arr, idx.array) + + +def test_series_from_temporary_periodindex_readonly_data(): + # GH 63388 + arr = array(["2020-01-01", "2020-01-02"], dtype="period[D]") + arr._ndarray.flags.writeable = False + ser = Series(PeriodIndex(arr)) + assert not np.shares_memory(arr._ndarray, get_array(ser)) + ser.iloc[0] = Period("2022-01-01", freq="D") + expected = Series( + [Period("2022-01-01", freq="D"), Period("2020-01-02", freq="D")], + dtype="period[D]", + ) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/copy_view/index/test_timedeltaindex.py b/pandas/tests/copy_view/index/test_timedeltaindex.py index 6984df86b00e3..9f4dcc39fe6f6 100644 --- a/pandas/tests/copy_view/index/test_timedeltaindex.py +++ b/pandas/tests/copy_view/index/test_timedeltaindex.py @@ -1,12 +1,15 @@ +import numpy as np import pytest from pandas import ( Series, Timedelta, TimedeltaIndex, + array, timedelta_range, ) import pandas._testing as tm +from pandas.tests.copy_view.util import get_array pytestmark = pytest.mark.filterwarnings( "ignore:Setting a value on a view:FutureWarning" @@ -27,3 +30,30 @@ def test_timedeltaindex(cons): expected = idx.copy(deep=True) ser.iloc[0] = Timedelta("5 days") tm.assert_index_equal(idx, expected) + + +def test_constructor_copy_input_timedelta_ndarray_default(): + # GH 63388 + arr = np.array([1, 2], dtype="timedelta64[ns]") + idx = TimedeltaIndex(arr) + assert not np.shares_memory(arr, get_array(idx)) + + +def test_constructor_copy_input_timedelta_ea_default(): + # GH 63388 + arr = array([1, 2], dtype="timedelta64[ns]") + idx = TimedeltaIndex(arr) + assert not tm.shares_memory(arr, idx.array) + + +def test_series_from_temporary_timedeltaindex_readonly_data(): + # GH 63388 + arr = np.array([1, 2], dtype="timedelta64[ns]") + arr.flags.writeable = False + ser = Series(TimedeltaIndex(arr)) + assert not np.shares_memory(arr, get_array(ser)) + ser.iloc[0] = Timedelta(days=1) + expected = Series( + [Timedelta(days=1), Timedelta(nanoseconds=2)], dtype="timedelta64[ns]" + ) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 4aa4c81558fa9..06ef4c057b5cb 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1138,7 +1138,7 @@ def test_index_cast_datetime64_other_units(self): def test_constructor_int64_nocopy(self): # GH#1624 arr = np.arange(1000, dtype=np.int64) - index = DatetimeIndex(arr) + index = DatetimeIndex(arr, copy=False) arr[50:100] = -1 assert (index.asi8[50:100] == -1).all()