done
This commit is contained in:
		| @ -0,0 +1,69 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Setting a value on a view:FutureWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "cons", | ||||
|     [ | ||||
|         lambda x: DatetimeIndex(x), | ||||
|         lambda x: DatetimeIndex(DatetimeIndex(x)), | ||||
|     ], | ||||
| ) | ||||
| def test_datetimeindex(using_copy_on_write, cons): | ||||
|     dt = date_range("2019-12-31", periods=3, freq="D") | ||||
|     ser = Series(dt) | ||||
|     idx = cons(ser) | ||||
|     expected = idx.copy(deep=True) | ||||
|     ser.iloc[0] = Timestamp("2020-12-31") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
|  | ||||
|  | ||||
| def test_datetimeindex_tz_convert(using_copy_on_write): | ||||
|     dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin") | ||||
|     ser = Series(dt) | ||||
|     idx = DatetimeIndex(ser).tz_convert("US/Eastern") | ||||
|     expected = idx.copy(deep=True) | ||||
|     ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
|  | ||||
|  | ||||
| def test_datetimeindex_tz_localize(using_copy_on_write): | ||||
|     dt = date_range("2019-12-31", periods=3, freq="D") | ||||
|     ser = Series(dt) | ||||
|     idx = DatetimeIndex(ser).tz_localize("Europe/Berlin") | ||||
|     expected = idx.copy(deep=True) | ||||
|     ser.iloc[0] = Timestamp("2020-12-31") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
|  | ||||
|  | ||||
| def test_datetimeindex_isocalendar(using_copy_on_write): | ||||
|     dt = date_range("2019-12-31", periods=3, freq="D") | ||||
|     ser = Series(dt) | ||||
|     df = DatetimeIndex(ser).isocalendar() | ||||
|     expected = df.index.copy(deep=True) | ||||
|     ser.iloc[0] = Timestamp("2020-12-31") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(df.index, expected) | ||||
|  | ||||
|  | ||||
| def test_index_values(using_copy_on_write): | ||||
|     idx = date_range("2019-12-31", periods=3, freq="D") | ||||
|     result = idx.values | ||||
|     if using_copy_on_write: | ||||
|         assert result.flags.writeable is False | ||||
|     else: | ||||
|         assert result.flags.writeable is True | ||||
| @ -0,0 +1,184 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def index_view(index_data=[1, 2]): | ||||
|     df = DataFrame({"a": index_data, "b": 1.5}) | ||||
|     view = df[:] | ||||
|     df = df.set_index("a", drop=True) | ||||
|     idx = df.index | ||||
|     # df = None | ||||
|     return idx, view | ||||
|  | ||||
|  | ||||
| def test_set_index_update_column(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2], "b": 1}) | ||||
|     df = df.set_index("a", drop=False) | ||||
|     expected = df.index.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(df.index, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(df.index, Index([100, 2], name="a")) | ||||
|  | ||||
|  | ||||
| def test_set_index_drop_update_column(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2], "b": 1.5}) | ||||
|     view = df[:] | ||||
|     df = df.set_index("a", drop=True) | ||||
|     expected = df.index.copy(deep=True) | ||||
|     view.iloc[0, 0] = 100 | ||||
|     tm.assert_index_equal(df.index, expected) | ||||
|  | ||||
|  | ||||
| def test_set_index_series(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2], "b": 1.5}) | ||||
|     ser = Series([10, 11]) | ||||
|     df = df.set_index(ser) | ||||
|     expected = df.index.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(df.index, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(df.index, Index([100, 11])) | ||||
|  | ||||
|  | ||||
| def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2], "b": 1.5}) | ||||
|     ser = Series([10, 11]) | ||||
|     df.index = ser | ||||
|     expected = df.index.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(df.index, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(df.index, Index([100, 11])) | ||||
|  | ||||
|  | ||||
| def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2], "b": 1.5}) | ||||
|     ser = Series([10, 11]) | ||||
|     rhs_index = Index(ser) | ||||
|     df.index = rhs_index | ||||
|     rhs_index = None  # overwrite to clear reference | ||||
|     expected = df.index.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(df.index, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(df.index, Index([100, 11])) | ||||
|  | ||||
|  | ||||
| def test_index_from_series(using_copy_on_write, warn_copy_on_write): | ||||
|     ser = Series([1, 2]) | ||||
|     idx = Index(ser) | ||||
|     expected = idx.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(idx, Index([100, 2])) | ||||
|  | ||||
|  | ||||
| def test_index_from_series_copy(using_copy_on_write): | ||||
|     ser = Series([1, 2]) | ||||
|     idx = Index(ser, copy=True)  # noqa: F841 | ||||
|     arr = get_array(ser) | ||||
|     ser.iloc[0] = 100 | ||||
|     assert np.shares_memory(get_array(ser), arr) | ||||
|  | ||||
|  | ||||
| def test_index_from_index(using_copy_on_write, warn_copy_on_write): | ||||
|     ser = Series([1, 2]) | ||||
|     idx = Index(ser) | ||||
|     idx = Index(idx) | ||||
|     expected = idx.copy(deep=True) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(idx, Index([100, 2])) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [ | ||||
|         lambda x: x._shallow_copy(x._values), | ||||
|         lambda x: x.view(), | ||||
|         lambda x: x.take([0, 1]), | ||||
|         lambda x: x.repeat([1, 1]), | ||||
|         lambda x: x[slice(0, 2)], | ||||
|         lambda x: x[[0, 1]], | ||||
|         lambda x: x._getitem_slice(slice(0, 2)), | ||||
|         lambda x: x.delete([]), | ||||
|         lambda x: x.rename("b"), | ||||
|         lambda x: x.astype("Int64", copy=False), | ||||
|     ], | ||||
|     ids=[ | ||||
|         "_shallow_copy", | ||||
|         "view", | ||||
|         "take", | ||||
|         "repeat", | ||||
|         "getitem_slice", | ||||
|         "getitem_list", | ||||
|         "_getitem_slice", | ||||
|         "delete", | ||||
|         "rename", | ||||
|         "astype", | ||||
|     ], | ||||
| ) | ||||
| def test_index_ops(using_copy_on_write, func, request): | ||||
|     idx, view_ = index_view() | ||||
|     expected = idx.copy(deep=True) | ||||
|     if "astype" in request.node.callspec.id: | ||||
|         expected = expected.astype("Int64") | ||||
|     idx = func(idx) | ||||
|     view_.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected, check_names=False) | ||||
|  | ||||
|  | ||||
| def test_infer_objects(using_copy_on_write): | ||||
|     idx, view_ = index_view(["a", "b"]) | ||||
|     expected = idx.copy(deep=True) | ||||
|     idx = idx.infer_objects(copy=False) | ||||
|     view_.iloc[0, 0] = "aaaa" | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected, check_names=False) | ||||
|  | ||||
|  | ||||
| def test_index_to_frame(using_copy_on_write): | ||||
|     idx = Index([1, 2, 3], name="a") | ||||
|     expected = idx.copy(deep=True) | ||||
|     df = idx.to_frame() | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "a"), idx._values) | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "a"), idx._values) | ||||
|  | ||||
|     df.iloc[0, 0] = 100 | ||||
|     tm.assert_index_equal(idx, expected) | ||||
|  | ||||
|  | ||||
| def test_index_values(using_copy_on_write): | ||||
|     idx = Index([1, 2, 3]) | ||||
|     result = idx.values | ||||
|     if using_copy_on_write: | ||||
|         assert result.flags.writeable is False | ||||
|     else: | ||||
|         assert result.flags.writeable is True | ||||
| @ -0,0 +1,30 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Period, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Setting a value on a view:FutureWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "cons", | ||||
|     [ | ||||
|         lambda x: PeriodIndex(x), | ||||
|         lambda x: PeriodIndex(PeriodIndex(x)), | ||||
|     ], | ||||
| ) | ||||
| def test_periodindex(using_copy_on_write, cons): | ||||
|     dt = period_range("2019-12-31", periods=3, freq="D") | ||||
|     ser = Series(dt) | ||||
|     idx = cons(ser) | ||||
|     expected = idx.copy(deep=True) | ||||
|     ser.iloc[0] = Period("2020-12-31") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
| @ -0,0 +1,30 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     TimedeltaIndex, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Setting a value on a view:FutureWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "cons", | ||||
|     [ | ||||
|         lambda x: TimedeltaIndex(x), | ||||
|         lambda x: TimedeltaIndex(TimedeltaIndex(x)), | ||||
|     ], | ||||
| ) | ||||
| def test_timedeltaindex(using_copy_on_write, cons): | ||||
|     dt = timedelta_range("1 day", periods=3) | ||||
|     ser = Series(dt) | ||||
|     idx = cons(ser) | ||||
|     expected = idx.copy(deep=True) | ||||
|     ser.iloc[0] = Timedelta("5 days") | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_index_equal(idx, expected) | ||||
| @ -0,0 +1,218 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
| # ----------------------------------------------------------------------------- | ||||
| # Copy/view behaviour for accessing underlying array of Series/DataFrame | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         lambda ser: ser.values, | ||||
|         lambda ser: np.asarray(ser), | ||||
|         lambda ser: np.array(ser, copy=False), | ||||
|     ], | ||||
|     ids=["values", "asarray", "array"], | ||||
| ) | ||||
| def test_series_values(using_copy_on_write, method): | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|     ser_orig = ser.copy() | ||||
|  | ||||
|     arr = method(ser) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # .values still gives a view but is read-only | ||||
|         assert np.shares_memory(arr, get_array(ser, "name")) | ||||
|         assert arr.flags.writeable is False | ||||
|  | ||||
|         # mutating series through arr therefore doesn't work | ||||
|         with pytest.raises(ValueError, match="read-only"): | ||||
|             arr[0] = 0 | ||||
|         tm.assert_series_equal(ser, ser_orig) | ||||
|  | ||||
|         # mutating the series itself still works | ||||
|         ser.iloc[0] = 0 | ||||
|         assert ser.values[0] == 0 | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|         arr[0] = 0 | ||||
|         assert ser.iloc[0] == 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         lambda df: df.values, | ||||
|         lambda df: np.asarray(df), | ||||
|         lambda ser: np.array(ser, copy=False), | ||||
|     ], | ||||
|     ids=["values", "asarray", "array"], | ||||
| ) | ||||
| def test_dataframe_values(using_copy_on_write, using_array_manager, method): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     arr = method(df) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # .values still gives a view but is read-only | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|         assert arr.flags.writeable is False | ||||
|  | ||||
|         # mutating series through arr therefore doesn't work | ||||
|         with pytest.raises(ValueError, match="read-only"): | ||||
|             arr[0, 0] = 0 | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|         # mutating the series itself still works | ||||
|         df.iloc[0, 0] = 0 | ||||
|         assert df.values[0, 0] == 0 | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|         arr[0, 0] = 0 | ||||
|         if not using_array_manager: | ||||
|             assert df.iloc[0, 0] == 0 | ||||
|         else: | ||||
|             tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_series_to_numpy(using_copy_on_write): | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|     ser_orig = ser.copy() | ||||
|  | ||||
|     # default: copy=False, no dtype or NAs | ||||
|     arr = ser.to_numpy() | ||||
|     if using_copy_on_write: | ||||
|         # to_numpy still gives a view but is read-only | ||||
|         assert np.shares_memory(arr, get_array(ser, "name")) | ||||
|         assert arr.flags.writeable is False | ||||
|  | ||||
|         # mutating series through arr therefore doesn't work | ||||
|         with pytest.raises(ValueError, match="read-only"): | ||||
|             arr[0] = 0 | ||||
|         tm.assert_series_equal(ser, ser_orig) | ||||
|  | ||||
|         # mutating the series itself still works | ||||
|         ser.iloc[0] = 0 | ||||
|         assert ser.values[0] == 0 | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|         arr[0] = 0 | ||||
|         assert ser.iloc[0] == 0 | ||||
|  | ||||
|     # specify copy=True gives a writeable array | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|     arr = ser.to_numpy(copy=True) | ||||
|     assert not np.shares_memory(arr, get_array(ser, "name")) | ||||
|     assert arr.flags.writeable is True | ||||
|  | ||||
|     # specifying a dtype that already causes a copy also gives a writeable array | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|     arr = ser.to_numpy(dtype="float64") | ||||
|     assert not np.shares_memory(arr, get_array(ser, "name")) | ||||
|     assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("order", ["F", "C"]) | ||||
| def test_ravel_read_only(using_copy_on_write, order): | ||||
|     ser = Series([1, 2, 3]) | ||||
|     with tm.assert_produces_warning(FutureWarning, match="is deprecated"): | ||||
|         arr = ser.ravel(order=order) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     assert np.shares_memory(get_array(ser), arr) | ||||
|  | ||||
|  | ||||
| def test_series_array_ea_dtypes(using_copy_on_write): | ||||
|     ser = Series([1, 2, 3], dtype="Int64") | ||||
|     arr = np.asarray(ser, dtype="int64") | ||||
|     assert np.shares_memory(arr, get_array(ser)) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|     arr = np.asarray(ser) | ||||
|     assert np.shares_memory(arr, get_array(ser)) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_dataframe_array_ea_dtypes(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") | ||||
|     arr = np.asarray(df, dtype="int64") | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|     arr = np.asarray(df) | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager): | ||||
|     df = DataFrame({"a": ["a", "b"]}, dtype="string") | ||||
|     arr = np.asarray(df) | ||||
|     if not using_array_manager: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_dataframe_multiple_numpy_dtypes(): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1.5}) | ||||
|     arr = np.asarray(df) | ||||
|     assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|     assert arr.flags.writeable is True | ||||
|  | ||||
|     if np_version_gt2: | ||||
|         # copy=False semantics are only supported in NumPy>=2. | ||||
|  | ||||
|         msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|         with pytest.raises(FutureWarning, match=msg): | ||||
|             arr = np.array(df, copy=False) | ||||
|  | ||||
|     arr = np.array(df, copy=True) | ||||
|     assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_dataframe_single_block_copy_true(): | ||||
|     # the copy=False/None cases are tested above in test_dataframe_values | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     arr = np.array(df, copy=True) | ||||
|     assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|     assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_values_is_ea(using_copy_on_write): | ||||
|     df = DataFrame({"a": date_range("2012-01-01", periods=3)}) | ||||
|     arr = np.asarray(df) | ||||
|     if using_copy_on_write: | ||||
|         assert arr.flags.writeable is False | ||||
|     else: | ||||
|         assert arr.flags.writeable is True | ||||
|  | ||||
|  | ||||
| def test_empty_dataframe(): | ||||
|     df = DataFrame() | ||||
|     arr = np.asarray(df) | ||||
|     assert arr.flags.writeable is True | ||||
| @ -0,0 +1,287 @@ | ||||
| import pickle | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import HAS_PYARROW | ||||
| from pandas.compat.pyarrow import pa_version_under12p0 | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def test_astype_single_dtype(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5}) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.astype("float64") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     # mutating df2 triggers a copy-on-write for that column/block | ||||
|     df2.iloc[0, 2] = 5.5 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     # mutating parent also doesn't update result | ||||
|     df2 = df.astype("float64") | ||||
|     df.iloc[0, 2] = 5.5 | ||||
|     tm.assert_frame_equal(df2, df_orig.astype("float64")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["int64", "Int64"]) | ||||
| @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"]) | ||||
| def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): | ||||
|     if new_dtype == "int64[pyarrow]": | ||||
|         pytest.importorskip("pyarrow") | ||||
|     df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.astype(new_dtype) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     # mutating df2 triggers a copy-on-write for that column/block | ||||
|     df2.iloc[0, 0] = 10 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     # mutating parent also doesn't update result | ||||
|     df2 = df.astype(new_dtype) | ||||
|     df.iloc[0, 0] = 100 | ||||
|     tm.assert_frame_equal(df2, df_orig.astype(new_dtype)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"]) | ||||
| def test_astype_different_target_dtype(using_copy_on_write, dtype): | ||||
|     if dtype == "int32[pyarrow]": | ||||
|         pytest.importorskip("pyarrow") | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.astype(dtype) | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert df2._mgr._has_no_reference(0) | ||||
|  | ||||
|     df2.iloc[0, 0] = 5 | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     # mutating parent also doesn't update result | ||||
|     df2 = df.astype(dtype) | ||||
|     df.iloc[0, 0] = 100 | ||||
|     tm.assert_frame_equal(df2, df_orig.astype(dtype)) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| def test_astype_numpy_to_ea(): | ||||
|     ser = Series([1, 2, 3]) | ||||
|     with pd.option_context("mode.copy_on_write", True): | ||||
|         result = ser.astype("Int64") | ||||
|     assert np.shares_memory(get_array(ser), get_array(result)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, new_dtype", [("object", "string"), ("string", "object")] | ||||
| ) | ||||
| def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.astype(new_dtype) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     df2.iloc[0, 0] = "x" | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, new_dtype", [("object", "string"), ("string", "object")] | ||||
| ) | ||||
| def test_astype_string_and_object_update_original( | ||||
|     using_copy_on_write, dtype, new_dtype | ||||
| ): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) | ||||
|     df2 = df.astype(new_dtype) | ||||
|     df_orig = df2.copy() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     df.iloc[0, 0] = "x" | ||||
|     tm.assert_frame_equal(df2, df_orig) | ||||
|  | ||||
|  | ||||
| def test_astype_str_copy_on_pickle_roundrip(): | ||||
|     # TODO(infer_string) this test can be removed after 3.0 (once str is the default) | ||||
|     # https://github.com/pandas-dev/pandas/issues/54654 | ||||
|     # ensure_string_array may alter array inplace | ||||
|     base = Series(np.array([(1, 2), None, 1], dtype="object")) | ||||
|     base_copy = pickle.loads(pickle.dumps(base)) | ||||
|     base_copy.astype(str) | ||||
|     tm.assert_series_equal(base, base_copy) | ||||
|  | ||||
|  | ||||
| def test_astype_string_copy_on_pickle_roundrip(any_string_dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/54654 | ||||
|     # ensure_string_array may alter array inplace | ||||
|     base = Series(np.array([(1, 2), None, 1], dtype="object")) | ||||
|     base_copy = pickle.loads(pickle.dumps(base)) | ||||
|     base_copy.astype(any_string_dtype) | ||||
|     tm.assert_series_equal(base, base_copy) | ||||
|  | ||||
|  | ||||
| def test_astype_string_read_only_on_pickle_roundrip(any_string_dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/54654 | ||||
|     # ensure_string_array may alter read-only array inplace | ||||
|     base = Series(np.array([(1, 2), None, 1], dtype="object")) | ||||
|     base_copy = pickle.loads(pickle.dumps(base)) | ||||
|     base_copy._values.flags.writeable = False | ||||
|     base_copy.astype(any_string_dtype) | ||||
|     tm.assert_series_equal(base, base_copy) | ||||
|  | ||||
|  | ||||
| def test_astype_dict_dtypes(using_copy_on_write): | ||||
|     df = DataFrame( | ||||
|         {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} | ||||
|     ) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.astype({"a": "float64", "c": "float64"}) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|         assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     # mutating df2 triggers a copy-on-write for that column/block | ||||
|     df2.iloc[0, 2] = 5.5 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|  | ||||
|     df2.iloc[0, 1] = 10 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_astype_different_datetime_resos(using_copy_on_write): | ||||
|     df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")}) | ||||
|     result = df.astype("datetime64[ms]") | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert result._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_astype_different_timezones(using_copy_on_write): | ||||
|     df = DataFrame( | ||||
|         {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} | ||||
|     ) | ||||
|     result = df.astype("datetime64[ns, Europe/Berlin]") | ||||
|     if using_copy_on_write: | ||||
|         assert not result._mgr._has_no_reference(0) | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|  | ||||
|  | ||||
| def test_astype_different_timezones_different_reso(using_copy_on_write): | ||||
|     df = DataFrame( | ||||
|         {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} | ||||
|     ) | ||||
|     result = df.astype("datetime64[ms, Europe/Berlin]") | ||||
|     if using_copy_on_write: | ||||
|         assert result._mgr._has_no_reference(0) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|  | ||||
|  | ||||
| def test_astype_arrow_timestamp(using_copy_on_write): | ||||
|     pytest.importorskip("pyarrow") | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": [ | ||||
|                 Timestamp("2020-01-01 01:01:01.000001"), | ||||
|                 Timestamp("2020-01-01 01:01:01.000001"), | ||||
|             ] | ||||
|         }, | ||||
|         dtype="M8[ns]", | ||||
|     ) | ||||
|     result = df.astype("timestamp[ns][pyarrow]") | ||||
|     if using_copy_on_write: | ||||
|         assert not result._mgr._has_no_reference(0) | ||||
|         if pa_version_under12p0: | ||||
|             assert not np.shares_memory( | ||||
|                 get_array(df, "a"), get_array(result, "a")._pa_array | ||||
|             ) | ||||
|         else: | ||||
|             assert np.shares_memory( | ||||
|                 get_array(df, "a"), get_array(result, "a")._pa_array | ||||
|             ) | ||||
|  | ||||
|  | ||||
| def test_convert_dtypes_infer_objects(using_copy_on_write): | ||||
|     ser = Series(["a", "b", "c"]) | ||||
|     ser_orig = ser.copy() | ||||
|     result = ser.convert_dtypes( | ||||
|         convert_integer=False, | ||||
|         convert_boolean=False, | ||||
|         convert_floating=False, | ||||
|         convert_string=False, | ||||
|     ) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert tm.shares_memory(get_array(ser), get_array(result)) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(ser), get_array(result)) | ||||
|  | ||||
|     result.iloc[0] = "x" | ||||
|     tm.assert_series_equal(ser, ser_orig) | ||||
|  | ||||
|  | ||||
| def test_convert_dtypes(using_copy_on_write, using_infer_string): | ||||
|     df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.convert_dtypes() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         if using_infer_string and HAS_PYARROW: | ||||
|             # TODO the default nullable string dtype still uses python storage | ||||
|             # this should be changed to pyarrow if installed | ||||
|             assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|         else: | ||||
|             assert tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|         assert tm.shares_memory(get_array(df2, "d"), get_array(df, "d")) | ||||
|         assert tm.shares_memory(get_array(df2, "b"), get_array(df, "b")) | ||||
|         assert tm.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) | ||||
|         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) | ||||
|         assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d")) | ||||
|  | ||||
|     df2.iloc[0, 0] = "x" | ||||
|     df2.iloc[0, 1] = 10 | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
| @ -0,0 +1,254 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
| from pandas.errors import ( | ||||
|     ChainedAssignmentError, | ||||
|     SettingWithCopyWarning, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_methods_iloc_warn(using_copy_on_write): | ||||
|     if not using_copy_on_write: | ||||
|         df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df.iloc[:, 0].replace(1, 5, inplace=True) | ||||
|  | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df.iloc[:, 0].fillna(1, inplace=True) | ||||
|  | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df.iloc[:, 0].interpolate(inplace=True) | ||||
|  | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df.iloc[:, 0].ffill(inplace=True) | ||||
|  | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df.iloc[:, 0].bfill(inplace=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func, args", | ||||
|     [ | ||||
|         ("replace", (4, 5)), | ||||
|         ("fillna", (1,)), | ||||
|         ("interpolate", ()), | ||||
|         ("bfill", ()), | ||||
|         ("ffill", ()), | ||||
|     ], | ||||
| ) | ||||
| def test_methods_iloc_getitem_item_cache( | ||||
|     func, args, using_copy_on_write, warn_copy_on_write | ||||
| ): | ||||
|     # ensure we don't incorrectly raise chained assignment warning because | ||||
|     # of the item cache / iloc not setting the item cache | ||||
|     df_orig = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     ser = df.iloc[:, 0] | ||||
|     getattr(ser, func)(*args, inplace=True) | ||||
|  | ||||
|     # parent that holds item_cache is dead, so don't increase ref count | ||||
|     df = df_orig.copy() | ||||
|     ser = df.copy()["a"] | ||||
|     getattr(ser, func)(*args, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     ser = df.iloc[:, 0]  # iloc creates a new object | ||||
|     getattr(ser, func)(*args, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     ser = df["a"] | ||||
|     getattr(ser, func)(*args, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     # TODO(CoW-warn) because of the usage of *args, this doesn't warn on Py3.11+ | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(not PY311): | ||||
|             getattr(df["a"], func)(*args, inplace=True) | ||||
|     else: | ||||
|         with tm.assert_cow_warning(not PY311, match="A value"): | ||||
|             getattr(df["a"], func)(*args, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     ser = df["a"]  # populate the item_cache and keep ref | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(not PY311): | ||||
|             getattr(df["a"], func)(*args, inplace=True) | ||||
|     else: | ||||
|         # ideally also warns on the default mode, but the ser' _cacher | ||||
|         # messes up the refcount + even in warning mode this doesn't trigger | ||||
|         # the warning of Py3.1+ (see above) | ||||
|         with tm.assert_cow_warning(warn_copy_on_write and not PY311, match="A value"): | ||||
|             getattr(df["a"], func)(*args, inplace=True) | ||||
|  | ||||
|  | ||||
| def test_methods_iloc_getitem_item_cache_fillna( | ||||
|     using_copy_on_write, warn_copy_on_write | ||||
| ): | ||||
|     # ensure we don't incorrectly raise chained assignment warning because | ||||
|     # of the item cache / iloc not setting the item cache | ||||
|     df_orig = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     ser = df.iloc[:, 0] | ||||
|     ser.fillna(1, inplace=True) | ||||
|  | ||||
|     # parent that holds item_cache is dead, so don't increase ref count | ||||
|     df = df_orig.copy() | ||||
|     ser = df.copy()["a"] | ||||
|     ser.fillna(1, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     ser = df.iloc[:, 0]  # iloc creates a new object | ||||
|     ser.fillna(1, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     ser = df["a"] | ||||
|     ser.fillna(1, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     df["a"]  # populate the item_cache | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].fillna(1, inplace=True) | ||||
|     else: | ||||
|         with tm.assert_cow_warning(match="A value"): | ||||
|             df["a"].fillna(1, inplace=True) | ||||
|  | ||||
|     df = df_orig.copy() | ||||
|     ser = df["a"]  # populate the item_cache and keep ref | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].fillna(1, inplace=True) | ||||
|     else: | ||||
|         # TODO(CoW-warn) ideally also warns on the default mode, but the ser' _cacher | ||||
|         # messes up the refcount | ||||
|         with tm.assert_cow_warning(warn_copy_on_write, match="A value"): | ||||
|             df["a"].fillna(1, inplace=True) | ||||
|  | ||||
|  | ||||
| # TODO(CoW-warn) expand the cases | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): | ||||
|     # ensure we only get a single warning for those typical cases of chained | ||||
|     # assignment | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     # using custom check instead of tm.assert_produces_warning because that doesn't | ||||
|     # fail if multiple warnings are raised | ||||
|     with pytest.warns() as record: | ||||
|         df["a"][indexer] = 0 | ||||
|     assert len(record) == 1 | ||||
|     if using_copy_on_write: | ||||
|         assert record[0].category == ChainedAssignmentError | ||||
|     else: | ||||
|         assert record[0].category == FutureWarning | ||||
|         assert "ChainedAssignmentError" in record[0].message.args[0] | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.SettingWithCopyWarning") | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_frame_setitem(indexer, using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) | ||||
|  | ||||
|     extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(extra_warnings=extra_warnings): | ||||
|             df[0:3][indexer] = 10 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_series_iloc_setitem(indexer): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].iloc[indexer] = 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_frame_iloc_setitem(indexer, using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) | ||||
|  | ||||
|     extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(extra_warnings=extra_warnings): | ||||
|             df[0:3].iloc[indexer] = 10 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_series_loc_setitem(indexer): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].loc[indexer] = 0 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [0, [0, 1], (0, "a"), slice(0, 2), np.array([True, False, True])] | ||||
| ) | ||||
| def test_frame_loc_setitem(indexer, using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) | ||||
|  | ||||
|     extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(extra_warnings=extra_warnings): | ||||
|             df[0:3].loc[indexer] = 10 | ||||
|  | ||||
|  | ||||
| def test_series_at_setitem(): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].at[0] = 0 | ||||
|  | ||||
|  | ||||
| def test_frame_at_setitem(): | ||||
|     df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df[0:3].at[0, "a"] = 10 | ||||
|  | ||||
|  | ||||
| def test_series_iat_setitem(): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].iat[0] = 0 | ||||
|  | ||||
|  | ||||
| def test_frame_iat_setitem(): | ||||
|     df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) | ||||
|  | ||||
|     with option_context("chained_assignment", "warn"): | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df[0:3].iat[0, 0] = 10 | ||||
							
								
								
									
										101
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_clip.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_clip.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     df_copy = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     if warn_copy_on_write: | ||||
|         with tm.assert_cow_warning(): | ||||
|             df.clip(lower=2, inplace=True) | ||||
|     else: | ||||
|         df.clip(lower=2, inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(df_copy, view) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|  | ||||
|  | ||||
| def test_clip_inplace_reference_no_op(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     df_copy = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     df.clip(lower=0, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|         assert not view._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(df_copy, view) | ||||
|  | ||||
|  | ||||
| def test_clip_inplace(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     df.clip(lower=2, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_clip(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.clip(lower=2) | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|  | ||||
|  | ||||
| def test_clip_no_op(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     df2 = df.clip(lower=0) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_clip_chained_inplace(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 4, 2], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].clip(1, 2, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df[["a"]].clip(1, 2, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         with tm.assert_produces_warning(FutureWarning, match="inplace method"): | ||||
|             df["a"].clip(1, 2, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[["a"]].clip(1, 2, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[df["a"] > 1].clip(1, 2, inplace=True) | ||||
| @ -0,0 +1,382 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Period, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     TimedeltaIndex, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
| # ----------------------------------------------------------------------------- | ||||
| # Copy/view behaviour for Series / DataFrame constructors | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [None, "int64"]) | ||||
| def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write): | ||||
|     # Case: constructing a Series from another Series object follows CoW rules: | ||||
|     # a new object is returned and thus mutations are not propagated | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|  | ||||
|     # default is copy=False -> new Series is a shallow copy / view of original | ||||
|     result = Series(ser, dtype=dtype) | ||||
|  | ||||
|     # the shallow copy still shares memory | ||||
|     assert np.shares_memory(get_array(ser), get_array(result)) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert result._mgr.blocks[0].refs.has_reference() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # mutating new series copy doesn't mutate original | ||||
|         result.iloc[0] = 0 | ||||
|         assert ser.iloc[0] == 1 | ||||
|         # mutating triggered a copy-on-write -> no longer shares memory | ||||
|         assert not np.shares_memory(get_array(ser), get_array(result)) | ||||
|     else: | ||||
|         # mutating shallow copy does mutate original | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             result.iloc[0] = 0 | ||||
|         assert ser.iloc[0] == 0 | ||||
|         # and still shares memory | ||||
|         assert np.shares_memory(get_array(ser), get_array(result)) | ||||
|  | ||||
|     # the same when modifying the parent | ||||
|     result = Series(ser, dtype=dtype) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # mutating original doesn't mutate new series | ||||
|         ser.iloc[0] = 0 | ||||
|         assert result.iloc[0] == 1 | ||||
|     else: | ||||
|         # mutating original does mutate shallow copy | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             ser.iloc[0] = 0 | ||||
|         assert result.iloc[0] == 0 | ||||
|  | ||||
|  | ||||
| def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write): | ||||
|     # Case: constructing a Series from another Series with specifying an index | ||||
|     # that potentially requires a reindex of the values | ||||
|     ser = Series([1, 2, 3], name="name") | ||||
|  | ||||
|     # passing an index that doesn't actually require a reindex of the values | ||||
|     # -> without CoW we get an actual mutating view | ||||
|     for index in [ | ||||
|         ser.index, | ||||
|         ser.index.copy(), | ||||
|         list(ser.index), | ||||
|         ser.index.rename("idx"), | ||||
|     ]: | ||||
|         result = Series(ser, index=index) | ||||
|         assert np.shares_memory(ser.values, result.values) | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             result.iloc[0] = 0 | ||||
|         if using_copy_on_write: | ||||
|             assert ser.iloc[0] == 1 | ||||
|         else: | ||||
|             assert ser.iloc[0] == 0 | ||||
|  | ||||
|     # ensure that if an actual reindex is needed, we don't have any refs | ||||
|     # (mutating the result wouldn't trigger CoW) | ||||
|     result = Series(ser, index=[0, 1, 2, 3]) | ||||
|     assert not np.shares_memory(ser.values, result.values) | ||||
|     if using_copy_on_write: | ||||
|         assert not result._mgr.blocks[0].refs.has_reference() | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fastpath", [False, True]) | ||||
| @pytest.mark.parametrize("dtype", [None, "int64"]) | ||||
| @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")] | ||||
| ) | ||||
| def test_series_from_array(using_copy_on_write, idx, dtype, fastpath, arr): | ||||
|     if idx is None or dtype is not None: | ||||
|         fastpath = False | ||||
|     msg = "The 'fastpath' keyword in pd.Series is deprecated" | ||||
|     with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|         ser = Series(arr, dtype=dtype, index=idx, fastpath=fastpath) | ||||
|     ser_orig = ser.copy() | ||||
|     data = getattr(arr, "_data", arr) | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(ser), data) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(ser), data) | ||||
|  | ||||
|     arr[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_series_equal(ser, ser_orig) | ||||
|     else: | ||||
|         expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype) | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("copy", [True, False, None]) | ||||
| def test_series_from_array_different_dtype(using_copy_on_write, copy): | ||||
|     arr = np.array([1, 2, 3], dtype="int64") | ||||
|     ser = Series(arr, dtype="int32", copy=copy) | ||||
|     assert not np.shares_memory(get_array(ser), arr) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "idx", | ||||
|     [ | ||||
|         Index([1, 2]), | ||||
|         DatetimeIndex([Timestamp("2019-12-31"), Timestamp("2020-12-31")]), | ||||
|         PeriodIndex([Period("2019-12-31"), Period("2020-12-31")]), | ||||
|         TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]), | ||||
|     ], | ||||
| ) | ||||
| def test_series_from_index(using_copy_on_write, idx): | ||||
|     ser = Series(idx) | ||||
|     expected = idx.copy(deep=True) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(ser), get_array(idx)) | ||||
|         assert not ser._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(ser), get_array(idx)) | ||||
|     ser.iloc[0] = ser.iloc[1] | ||||
|     tm.assert_index_equal(idx, expected) | ||||
|  | ||||
|  | ||||
| def test_series_from_index_different_dtypes(using_copy_on_write): | ||||
|     idx = Index([1, 2, 3], dtype="int64") | ||||
|     ser = Series(idx, dtype="int32") | ||||
|     assert not np.shares_memory(get_array(ser), get_array(idx)) | ||||
|     if using_copy_on_write: | ||||
|         assert ser._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
| @pytest.mark.parametrize("fastpath", [False, True]) | ||||
| @pytest.mark.parametrize("dtype", [None, "int64"]) | ||||
| @pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)]) | ||||
| def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath): | ||||
|     ser = Series([1, 2, 3], dtype="int64") | ||||
|     ser_orig = ser.copy() | ||||
|     msg = "The 'fastpath' keyword in pd.Series is deprecated" | ||||
|     with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|         ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx) | ||||
|     assert np.shares_memory(get_array(ser), get_array(ser2)) | ||||
|     if using_copy_on_write: | ||||
|         assert not ser2._mgr._has_no_reference(0) | ||||
|  | ||||
|     ser2.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_series_equal(ser, ser_orig) | ||||
|     else: | ||||
|         expected = Series([100, 2, 3]) | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|  | ||||
| def test_series_from_block_manager_different_dtype(using_copy_on_write): | ||||
|     ser = Series([1, 2, 3], dtype="int64") | ||||
|     msg = "Passing a SingleBlockManager to Series" | ||||
|     with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|         ser2 = Series(ser._mgr, dtype="int32") | ||||
|     assert not np.shares_memory(get_array(ser), get_array(ser2)) | ||||
|     if using_copy_on_write: | ||||
|         assert ser2._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("use_mgr", [True, False]) | ||||
| @pytest.mark.parametrize("columns", [None, ["a"]]) | ||||
| def test_dataframe_constructor_mgr_or_df( | ||||
|     using_copy_on_write, warn_copy_on_write, columns, use_mgr | ||||
| ): | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     if use_mgr: | ||||
|         data = df._mgr | ||||
|         warn = DeprecationWarning | ||||
|     else: | ||||
|         data = df | ||||
|         warn = None | ||||
|     msg = "Passing a BlockManager to DataFrame" | ||||
|     with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): | ||||
|         new_df = DataFrame(data) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write and not use_mgr): | ||||
|         new_df.iloc[0] = 100 | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) | ||||
|         tm.assert_frame_equal(df, new_df) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) | ||||
| @pytest.mark.parametrize("index", [None, [0, 1, 2]]) | ||||
| @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) | ||||
| def test_dataframe_from_dict_of_series( | ||||
|     request, using_copy_on_write, warn_copy_on_write, columns, index, dtype | ||||
| ): | ||||
|     # Case: constructing a DataFrame from Series objects with copy=False | ||||
|     # has to do a lazy following CoW rules | ||||
|     # (the default for DataFrame(dict) is still to copy to ensure consolidation) | ||||
|     s1 = Series([1, 2, 3]) | ||||
|     s2 = Series([4, 5, 6]) | ||||
|     s1_orig = s1.copy() | ||||
|     expected = DataFrame( | ||||
|         {"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype | ||||
|     ) | ||||
|  | ||||
|     result = DataFrame( | ||||
|         {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False | ||||
|     ) | ||||
|  | ||||
|     # the shallow copy still shares memory | ||||
|     assert np.shares_memory(get_array(result, "a"), get_array(s1)) | ||||
|  | ||||
|     # mutating the new dataframe doesn't mutate original | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         result.iloc[0, 0] = 10 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(s1)) | ||||
|         tm.assert_series_equal(s1, s1_orig) | ||||
|     else: | ||||
|         assert s1.iloc[0] == 10 | ||||
|  | ||||
|     # the same when modifying the parent series | ||||
|     s1 = Series([1, 2, 3]) | ||||
|     s2 = Series([4, 5, 6]) | ||||
|     result = DataFrame( | ||||
|         {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False | ||||
|     ) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         s1.iloc[0] = 10 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(s1)) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|     else: | ||||
|         assert result.iloc[0, 0] == 10 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [None, "int64"]) | ||||
| def test_dataframe_from_dict_of_series_with_reindex(dtype): | ||||
|     # Case: constructing a DataFrame from Series objects with copy=False | ||||
|     # and passing an index that requires an actual (no-view) reindex -> need | ||||
|     # to ensure the result doesn't have refs set up to unnecessarily trigger | ||||
|     # a copy on write | ||||
|     s1 = Series([1, 2, 3]) | ||||
|     s2 = Series([4, 5, 6]) | ||||
|     df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False) | ||||
|  | ||||
|     # df should own its memory, so mutating shouldn't trigger a copy | ||||
|     arr_before = get_array(df, "a") | ||||
|     assert not np.shares_memory(arr_before, get_array(s1)) | ||||
|     df.iloc[0, 0] = 100 | ||||
|     arr_after = get_array(df, "a") | ||||
|     assert np.shares_memory(arr_before, arr_after) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("cons", [Series, Index]) | ||||
| @pytest.mark.parametrize( | ||||
|     "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], object)] | ||||
| ) | ||||
| def test_dataframe_from_series_or_index( | ||||
|     using_copy_on_write, warn_copy_on_write, data, dtype, cons | ||||
| ): | ||||
|     obj = cons(data, dtype=dtype) | ||||
|     obj_orig = obj.copy() | ||||
|     df = DataFrame(obj, dtype=dtype) | ||||
|     assert np.shares_memory(get_array(obj), get_array(df, 0)) | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|  | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.iloc[0, 0] = data[-1] | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_equal(obj, obj_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("cons", [Series, Index]) | ||||
| def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, cons): | ||||
|     obj = cons([1, 2], dtype="int64") | ||||
|     df = DataFrame(obj, dtype="int32") | ||||
|     assert not np.shares_memory(get_array(obj), get_array(df, 0)) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_dataframe_from_series_infer_datetime(using_copy_on_write): | ||||
|     ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object) | ||||
|     with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): | ||||
|         df = DataFrame(ser) | ||||
|     assert not np.shares_memory(get_array(ser), get_array(df, 0)) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [None, [0, 1, 2]]) | ||||
| def test_dataframe_from_dict_of_series_with_dtype(index): | ||||
|     # Variant of above, but now passing a dtype that causes a copy | ||||
|     # -> need to ensure the result doesn't have refs set up to unnecessarily | ||||
|     # trigger a copy on write | ||||
|     s1 = Series([1.0, 2.0, 3.0]) | ||||
|     s2 = Series([4, 5, 6]) | ||||
|     df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False) | ||||
|  | ||||
|     # df should own its memory, so mutating shouldn't trigger a copy | ||||
|     arr_before = get_array(df, "a") | ||||
|     assert not np.shares_memory(arr_before, get_array(s1)) | ||||
|     df.iloc[0, 0] = 100 | ||||
|     arr_after = get_array(df, "a") | ||||
|     assert np.shares_memory(arr_before, arr_after) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("copy", [False, None, True]) | ||||
| def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager): | ||||
|     arr = np.array([[1, 2], [3, 4]]) | ||||
|     df = DataFrame(arr, copy=copy) | ||||
|  | ||||
|     if ( | ||||
|         using_copy_on_write | ||||
|         and copy is not False | ||||
|         or copy is True | ||||
|         or (using_array_manager and copy is None) | ||||
|     ): | ||||
|         assert not np.shares_memory(get_array(df, 0), arr) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, 0), arr) | ||||
|  | ||||
|  | ||||
| def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|     with tm.assert_produces_warning(FutureWarning): | ||||
|         df2 = DataFrame.from_records(df) | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|     assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df2.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         tm.assert_frame_equal(df, df2) | ||||
|  | ||||
|  | ||||
| def test_frame_from_dict_of_index(using_copy_on_write): | ||||
|     idx = Index([1, 2, 3]) | ||||
|     expected = idx.copy(deep=True) | ||||
|     df = DataFrame({"a": idx}, copy=False) | ||||
|     assert np.shares_memory(get_array(df, "a"), idx._values) | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|  | ||||
|         df.iloc[0, 0] = 100 | ||||
|         tm.assert_index_equal(idx, expected) | ||||
| @ -0,0 +1,106 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import DataFrame | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def test_assigning_to_same_variable_removes_references(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     df = df.reset_index() | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(1) | ||||
|     arr = get_array(df, "a") | ||||
|     df.iloc[0, 1] = 100  # Write into a | ||||
|  | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_setitem_dont_track_unnecessary_references(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) | ||||
|  | ||||
|     df["b"] = 100 | ||||
|     arr = get_array(df, "a") | ||||
|     # We split the block in setitem, if we are not careful the new blocks will | ||||
|     # reference each other triggering a copy | ||||
|     df.iloc[0, 0] = 100 | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_setitem_with_view_copies(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) | ||||
|     view = df[:] | ||||
|     expected = df.copy() | ||||
|  | ||||
|     df["b"] = 100 | ||||
|     arr = get_array(df, "a") | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.iloc[0, 0] = 100  # Check that we correctly track reference | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|         tm.assert_frame_equal(view, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_with_view_invalidated_does_not_copy( | ||||
|     using_copy_on_write, warn_copy_on_write, request | ||||
| ): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) | ||||
|     view = df[:] | ||||
|  | ||||
|     df["b"] = 100 | ||||
|     arr = get_array(df, "a") | ||||
|     view = None  # noqa: F841 | ||||
|     # TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100` | ||||
|     # which introduces additional refs, even when those of `view` go out of scopes | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         # Setitem split the block. Since the old block shared data with view | ||||
|         # all the new blocks are referencing view and each other. When view | ||||
|         # goes out of scope, they don't share data with any other block, | ||||
|         # so we should not trigger a copy | ||||
|         mark = pytest.mark.xfail( | ||||
|             reason="blk.delete does not track references correctly" | ||||
|         ) | ||||
|         request.applymarker(mark) | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_out_of_scope(using_copy_on_write): | ||||
|     def func(): | ||||
|         df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1}) | ||||
|         # create some subset | ||||
|         result = df[["a", "b"]] | ||||
|         return result | ||||
|  | ||||
|     result = func() | ||||
|     if using_copy_on_write: | ||||
|         assert not result._mgr.blocks[0].refs.has_reference() | ||||
|         assert not result._mgr.blocks[1].refs.has_reference() | ||||
|  | ||||
|  | ||||
| def test_delete(using_copy_on_write): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] | ||||
|     ) | ||||
|     del df["b"] | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr.blocks[0].refs.has_reference() | ||||
|         assert not df._mgr.blocks[1].refs.has_reference() | ||||
|  | ||||
|     df = df[["a"]] | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr.blocks[0].refs.has_reference() | ||||
|  | ||||
|  | ||||
| def test_delete_reference(using_copy_on_write): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] | ||||
|     ) | ||||
|     x = df[:] | ||||
|     del df["b"] | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr.blocks[0].refs.has_reference() | ||||
|         assert df._mgr.blocks[1].refs.has_reference() | ||||
|         assert x._mgr.blocks[0].refs.has_reference() | ||||
| @ -0,0 +1,397 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     concat, | ||||
|     merge, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def test_concat_frames(using_copy_on_write): | ||||
|     df = DataFrame({"b": ["a"] * 3}, dtype=object) | ||||
|     df2 = DataFrame({"a": ["a"] * 3}, dtype=object) | ||||
|     df_orig = df.copy() | ||||
|     result = concat([df, df2], axis=1) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     result.iloc[0, 0] = "d" | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     result.iloc[0, 1] = "d" | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_concat_frames_updating_input(using_copy_on_write): | ||||
|     df = DataFrame({"b": ["a"] * 3}, dtype=object) | ||||
|     df2 = DataFrame({"a": ["a"] * 3}, dtype=object) | ||||
|     result = concat([df, df2], axis=1) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     expected = result.copy() | ||||
|     df.iloc[0, 0] = "d" | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df, "b")) | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     df2.iloc[0, 0] = "d" | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a")) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_series(using_copy_on_write): | ||||
|     ser = Series([1, 2], name="a") | ||||
|     ser2 = Series([3, 4], name="b") | ||||
|     ser_orig = ser.copy() | ||||
|     ser2_orig = ser2.copy() | ||||
|     result = concat([ser, ser2], axis=1) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), ser.values) | ||||
|         assert np.shares_memory(get_array(result, "b"), ser2.values) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), ser.values) | ||||
|         assert not np.shares_memory(get_array(result, "b"), ser2.values) | ||||
|  | ||||
|     result.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), ser.values) | ||||
|         assert np.shares_memory(get_array(result, "b"), ser2.values) | ||||
|  | ||||
|     result.iloc[0, 1] = 1000 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), ser2.values) | ||||
|     tm.assert_series_equal(ser, ser_orig) | ||||
|     tm.assert_series_equal(ser2, ser2_orig) | ||||
|  | ||||
|  | ||||
| def test_concat_frames_chained(using_copy_on_write): | ||||
|     df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|     df2 = DataFrame({"c": [4, 5, 6]}) | ||||
|     df3 = DataFrame({"d": [4, 5, 6]}) | ||||
|     result = concat([concat([df1, df2], axis=1), df3], axis=1) | ||||
|     expected = result.copy() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(df2, "c")) | ||||
|         assert np.shares_memory(get_array(result, "d"), get_array(df3, "d")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c")) | ||||
|         assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d")) | ||||
|  | ||||
|     df1.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_series_chained(using_copy_on_write): | ||||
|     ser1 = Series([1, 2, 3], name="a") | ||||
|     ser2 = Series([4, 5, 6], name="c") | ||||
|     ser3 = Series([4, 5, 6], name="d") | ||||
|     result = concat([concat([ser1, ser2], axis=1), ser3], axis=1) | ||||
|     expected = result.copy() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c")) | ||||
|         assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c")) | ||||
|         assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d")) | ||||
|  | ||||
|     ser1.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a")) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_series_updating_input(using_copy_on_write): | ||||
|     ser = Series([1, 2], name="a") | ||||
|     ser2 = Series([3, 4], name="b") | ||||
|     expected = DataFrame({"a": [1, 2], "b": [3, 4]}) | ||||
|     result = concat([ser, ser2], axis=1) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(ser, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) | ||||
|  | ||||
|     ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     ser2.iloc[0] = 1000 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b")) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_mixed_series_frame(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "c": 1}) | ||||
|     ser = Series([4, 5, 6], name="d") | ||||
|     result = concat([df, ser], axis=1) | ||||
|     expected = result.copy() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(df, "c")) | ||||
|         assert np.shares_memory(get_array(result, "d"), get_array(ser, "d")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "c"), get_array(df, "c")) | ||||
|         assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d")) | ||||
|  | ||||
|     ser.iloc[0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d")) | ||||
|  | ||||
|     df.iloc[0, 0] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("copy", [True, None, False]) | ||||
| def test_concat_copy_keyword(using_copy_on_write, copy): | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|     df2 = DataFrame({"b": [1.5, 2.5]}) | ||||
|  | ||||
|     result = concat([df, df2], axis=1, copy=copy) | ||||
|  | ||||
|     if using_copy_on_write or copy is False: | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|         assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [ | ||||
|         lambda df1, df2, **kwargs: df1.merge(df2, **kwargs), | ||||
|         lambda df1, df2, **kwargs: merge(df1, df2, **kwargs), | ||||
|     ], | ||||
| ) | ||||
| def test_merge_on_key(using_copy_on_write, func): | ||||
|     df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]}) | ||||
|     df2 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "b": [4, 5, 6]}) | ||||
|     df1_orig = df1.copy() | ||||
|     df2_orig = df2.copy() | ||||
|  | ||||
|     result = func(df1, df2, on="key") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|         assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) | ||||
|         assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 1] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 2] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|     tm.assert_frame_equal(df1, df1_orig) | ||||
|     tm.assert_frame_equal(df2, df2_orig) | ||||
|  | ||||
|  | ||||
| def test_merge_on_index(using_copy_on_write): | ||||
|     df1 = DataFrame({"a": [1, 2, 3]}) | ||||
|     df2 = DataFrame({"b": [4, 5, 6]}) | ||||
|     df1_orig = df1.copy() | ||||
|     df2_orig = df2.copy() | ||||
|  | ||||
|     result = merge(df1, df2, left_index=True, right_index=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 0] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 1] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|     tm.assert_frame_equal(df1, df1_orig) | ||||
|     tm.assert_frame_equal(df2, df2_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func, how", | ||||
|     [ | ||||
|         (lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"), | ||||
|         (lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"), | ||||
|     ], | ||||
| ) | ||||
| def test_merge_on_key_enlarging_one(using_copy_on_write, func, how): | ||||
|     df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]}) | ||||
|     df2 = DataFrame({"key": Series(["a", "b"], dtype=object), "b": [4, 5]}) | ||||
|     df1_orig = df1.copy() | ||||
|     df2_orig = df2.copy() | ||||
|  | ||||
|     result = func(df1, df2, how=how) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|         assert df2._mgr._has_no_reference(1) | ||||
|         assert df2._mgr._has_no_reference(0) | ||||
|         assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is ( | ||||
|             how == "left" | ||||
|         ) | ||||
|         assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     if how == "left": | ||||
|         result.iloc[0, 1] = 0 | ||||
|     else: | ||||
|         result.iloc[0, 2] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|     tm.assert_frame_equal(df1, df1_orig) | ||||
|     tm.assert_frame_equal(df2, df2_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("copy", [True, None, False]) | ||||
| def test_merge_copy_keyword(using_copy_on_write, copy): | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|     df2 = DataFrame({"b": [3, 4.5]}) | ||||
|  | ||||
|     result = df.merge(df2, copy=copy, left_index=True, right_index=True) | ||||
|  | ||||
|     if using_copy_on_write or copy is False: | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|         assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [object, "str"]) | ||||
| def test_join_on_key(dtype, using_copy_on_write): | ||||
|     df_index = Index(["a", "b", "c"], name="key", dtype=dtype) | ||||
|  | ||||
|     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) | ||||
|     df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)) | ||||
|  | ||||
|     df1_orig = df1.copy() | ||||
|     df2_orig = df2.copy() | ||||
|  | ||||
|     result = df1.join(df2, on="key") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|         assert tm.shares_memory(get_array(result.index), get_array(df1.index)) | ||||
|         assert not np.shares_memory(get_array(result.index), get_array(df2.index)) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 0] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     result.iloc[0, 1] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     tm.assert_frame_equal(df1, df1_orig) | ||||
|     tm.assert_frame_equal(df2, df2_orig) | ||||
|  | ||||
|  | ||||
| def test_join_multiple_dataframes_on_key(using_copy_on_write): | ||||
|     df_index = Index(["a", "b", "c"], name="key", dtype=object) | ||||
|  | ||||
|     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True)) | ||||
|     dfs_list = [ | ||||
|         DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)), | ||||
|         DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)), | ||||
|     ] | ||||
|  | ||||
|     df1_orig = df1.copy() | ||||
|     dfs_list_orig = [df.copy() for df in dfs_list] | ||||
|  | ||||
|     result = df1.join(dfs_list) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) | ||||
|         assert np.shares_memory(get_array(result.index), get_array(df1.index)) | ||||
|         assert not np.shares_memory( | ||||
|             get_array(result.index), get_array(dfs_list[0].index) | ||||
|         ) | ||||
|         assert not np.shares_memory( | ||||
|             get_array(result.index), get_array(dfs_list[1].index) | ||||
|         ) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) | ||||
|         assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) | ||||
|  | ||||
|     result.iloc[0, 0] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) | ||||
|         assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) | ||||
|  | ||||
|     result.iloc[0, 1] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b")) | ||||
|         assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) | ||||
|  | ||||
|     result.iloc[0, 2] = 0 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c")) | ||||
|  | ||||
|     tm.assert_frame_equal(df1, df1_orig) | ||||
|     for df, df_orig in zip(dfs_list, dfs_list_orig): | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
							
								
								
									
										1266
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_indexing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1266
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_indexing.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,154 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| def test_consolidate(using_copy_on_write): | ||||
|     # create unconsolidated DataFrame | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|     df["c"] = [4, 5, 6] | ||||
|  | ||||
|     # take a viewing subset | ||||
|     subset = df[:] | ||||
|  | ||||
|     # each block of subset references a block of df | ||||
|     assert all(blk.refs.has_reference() for blk in subset._mgr.blocks) | ||||
|  | ||||
|     # consolidate the two int64 blocks | ||||
|     subset._consolidate_inplace() | ||||
|  | ||||
|     # the float64 block still references the parent one because it still a view | ||||
|     assert subset._mgr.blocks[0].refs.has_reference() | ||||
|     # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values) | ||||
|     # but avoids caching df["b"] | ||||
|     assert np.shares_memory(get_array(df, "b"), get_array(subset, "b")) | ||||
|  | ||||
|     # the new consolidated int64 block does not reference another | ||||
|     assert not subset._mgr.blocks[1].refs.has_reference() | ||||
|  | ||||
|     # the parent dataframe now also only is linked for the float column | ||||
|     assert not df._mgr.blocks[0].refs.has_reference() | ||||
|     assert df._mgr.blocks[1].refs.has_reference() | ||||
|     assert not df._mgr.blocks[2].refs.has_reference() | ||||
|  | ||||
|     # and modifying subset still doesn't modify parent | ||||
|     if using_copy_on_write: | ||||
|         subset.iloc[0, 1] = 0.0 | ||||
|         assert not df._mgr.blocks[1].refs.has_reference() | ||||
|         assert df.loc[0, "b"] == 0.1 | ||||
|  | ||||
|  | ||||
| @pytest.mark.single_cpu | ||||
| @td.skip_array_manager_invalid_test | ||||
| def test_switch_options(): | ||||
|     # ensure we can switch the value of the option within one session | ||||
|     # (assuming data is constructed after switching) | ||||
|  | ||||
|     # using the option_context to ensure we set back to global option value | ||||
|     # after running the test | ||||
|     with pd.option_context("mode.copy_on_write", False): | ||||
|         df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|         subset = df[:] | ||||
|         subset.iloc[0, 0] = 0 | ||||
|         # df updated with CoW disabled | ||||
|         assert df.iloc[0, 0] == 0 | ||||
|  | ||||
|         pd.options.mode.copy_on_write = True | ||||
|         df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|         subset = df[:] | ||||
|         subset.iloc[0, 0] = 0 | ||||
|         # df not updated with CoW enabled | ||||
|         assert df.iloc[0, 0] == 1 | ||||
|  | ||||
|         pd.options.mode.copy_on_write = False | ||||
|         df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|         subset = df[:] | ||||
|         subset.iloc[0, 0] = 0 | ||||
|         # df updated with CoW disabled | ||||
|         assert df.iloc[0, 0] == 0 | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test | ||||
| @pytest.mark.parametrize("dtype", [np.intp, np.int8]) | ||||
| @pytest.mark.parametrize( | ||||
|     "locs, arr", | ||||
|     [ | ||||
|         ([0], np.array([-1, -2, -3])), | ||||
|         ([1], np.array([-1, -2, -3])), | ||||
|         ([5], np.array([-1, -2, -3])), | ||||
|         ([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T), | ||||
|         ([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T), | ||||
|         ([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T), | ||||
|         ([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T), | ||||
|         ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T), | ||||
|         ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T), | ||||
|     ], | ||||
| ) | ||||
| def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): | ||||
|     # Nothing currently calls iset with | ||||
|     # more than 1 loc with inplace=True (only happens with inplace=False) | ||||
|     # but ensure that it works | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": [1, 2, 3], | ||||
|             "b": [4, 5, 6], | ||||
|             "c": [7, 8, 9], | ||||
|             "d": [10, 11, 12], | ||||
|             "e": [13, 14, 15], | ||||
|             "f": Series(["a", "b", "c"], dtype=object), | ||||
|         }, | ||||
|     ) | ||||
|     arr = arr.astype(dtype) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.copy(deep=None)  # Trigger a CoW (if enabled, otherwise makes copy) | ||||
|     df2._mgr.iset(locs, arr, inplace=True) | ||||
|  | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         for i, col in enumerate(df.columns): | ||||
|             if i not in locs: | ||||
|                 assert np.shares_memory(get_array(df, col), get_array(df2, col)) | ||||
|     else: | ||||
|         for col in df.columns: | ||||
|             assert not np.shares_memory(get_array(df, col), get_array(df2, col)) | ||||
|  | ||||
|  | ||||
| def test_exponential_backoff(): | ||||
|     # GH#55518 | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     for i in range(490): | ||||
|         df.copy(deep=False) | ||||
|  | ||||
|     assert len(df._mgr.blocks[0].refs.referenced_blocks) == 491 | ||||
|  | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     dfs = [df.copy(deep=False) for i in range(510)] | ||||
|  | ||||
|     for i in range(20): | ||||
|         df.copy(deep=False) | ||||
|     assert len(df._mgr.blocks[0].refs.referenced_blocks) == 531 | ||||
|     assert df._mgr.blocks[0].refs.clear_counter == 1000 | ||||
|  | ||||
|     for i in range(500): | ||||
|         df.copy(deep=False) | ||||
|  | ||||
|     # Don't reduce since we still have over 500 objects alive | ||||
|     assert df._mgr.blocks[0].refs.clear_counter == 1000 | ||||
|  | ||||
|     dfs = dfs[:300] | ||||
|     for i in range(500): | ||||
|         df.copy(deep=False) | ||||
|  | ||||
|     # Reduce since there are less than 500 objects alive | ||||
|     assert df._mgr.blocks[0].refs.clear_counter == 500 | ||||
| @ -0,0 +1,433 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     ArrowDtype, | ||||
|     DataFrame, | ||||
|     Interval, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     interval_range, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["pad", "nearest", "linear"]) | ||||
| def test_interpolate_no_op(using_copy_on_write, method): | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     warn = None | ||||
|     if method == "pad": | ||||
|         warn = FutureWarning | ||||
|     msg = "DataFrame.interpolate with method=pad is deprecated" | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = df.interpolate(method=method) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|  | ||||
|     result.iloc[0, 0] = 100 | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["ffill", "bfill"]) | ||||
| def test_interp_fill_functions(using_copy_on_write, func): | ||||
|     # Check that these takes the same code paths as interpolate | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     result = getattr(df, func)() | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|  | ||||
|     result.iloc[0, 0] = 100 | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["ffill", "bfill"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] | ||||
| ) | ||||
| def test_interpolate_triggers_copy(using_copy_on_write, vals, func): | ||||
|     df = DataFrame({"a": vals}) | ||||
|     result = getattr(df, func)() | ||||
|  | ||||
|     assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         # Check that we don't have references when triggering a copy | ||||
|         assert result._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] | ||||
| ) | ||||
| def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals): | ||||
|     df = DataFrame({"a": vals}) | ||||
|     arr = get_array(df, "a") | ||||
|     df.interpolate(method="linear", inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         # Check that we don't have references when triggering a copy | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] | ||||
| ) | ||||
| def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, np.nan, 2]}) | ||||
|     df_orig = df.copy() | ||||
|     arr = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.interpolate(method="linear", inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # Check that copy was triggered in interpolate and that we don't | ||||
|         # have any references left | ||||
|         assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["ffill", "bfill"]) | ||||
| @pytest.mark.parametrize("dtype", ["float64", "Float64"]) | ||||
| def test_interp_fill_functions_inplace( | ||||
|     using_copy_on_write, func, warn_copy_on_write, dtype | ||||
| ): | ||||
|     # Check that these takes the same code paths as interpolate | ||||
|     df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype) | ||||
|     df_orig = df.copy() | ||||
|     arr = get_array(df, "a") | ||||
|     view = df[:] | ||||
|  | ||||
|     with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"): | ||||
|         getattr(df, func)(inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # Check that copy was triggered in interpolate and that we don't | ||||
|         # have any references left | ||||
|         assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64") | ||||
|  | ||||
|  | ||||
| def test_interpolate_cannot_with_object_dtype(using_copy_on_write): | ||||
|     df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) | ||||
|     df["a"] = df["a"].astype(object) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     msg = "DataFrame.interpolate with object dtype" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = df.interpolate(method="linear") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|  | ||||
|     result.iloc[0, 0] = Timestamp("2021-12-31") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_interpolate_object_convert_no_op(using_copy_on_write, using_infer_string): | ||||
|     df = DataFrame({"a": ["a", "b", "c"], "b": 1}) | ||||
|     df["a"] = df["a"].astype(object) | ||||
|     arr_a = get_array(df, "a") | ||||
|     msg = "DataFrame.interpolate with method=pad is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.interpolate(method="pad", inplace=True) | ||||
|  | ||||
|     # Now CoW makes a copy, it should not! | ||||
|     if using_copy_on_write and not using_infer_string: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert np.shares_memory(arr_a, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_interpolate_object_convert_copies(using_copy_on_write): | ||||
|     df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     msg = "DataFrame.interpolate with method=pad is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.interpolate(method="pad", inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert not np.shares_memory(arr_a, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_interpolate_downcast(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     msg = "DataFrame.interpolate with method=pad is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.interpolate(method="pad", inplace=True, downcast="infer") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|     assert np.shares_memory(arr_a, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     msg = "DataFrame.interpolate with method=pad is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.interpolate(method="pad", inplace=True, downcast="infer") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert not np.shares_memory(arr_a, get_array(df, "a")) | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|     else: | ||||
|         tm.assert_frame_equal(df, view) | ||||
|  | ||||
|  | ||||
| def test_fillna(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, np.nan], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df2 = df.fillna(5.5) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     df2.iloc[0, 1] = 100 | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|  | ||||
|  | ||||
| def test_fillna_dict(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, np.nan], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df2 = df.fillna({"a": 100.5}) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     df2.iloc[0, 1] = 100 | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("downcast", [None, False]) | ||||
| def test_fillna_inplace(using_copy_on_write, downcast): | ||||
|     df = DataFrame({"a": [1.5, np.nan], "b": 1}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     arr_b = get_array(df, "b") | ||||
|  | ||||
|     msg = "The 'downcast' keyword in fillna is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.fillna(5.5, inplace=True, downcast=downcast) | ||||
|     assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|     assert np.shares_memory(get_array(df, "b"), arr_b) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert df._mgr._has_no_reference(1) | ||||
|  | ||||
|  | ||||
| def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, np.nan], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     arr_b = get_array(df, "b") | ||||
|     view = df[:] | ||||
|  | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.fillna(5.5, inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert np.shares_memory(get_array(df, "b"), arr_b) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert np.shares_memory(get_array(df, "b"), arr_b) | ||||
|     expected = DataFrame({"a": [1.5, 5.5], "b": 1}) | ||||
|     tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write): | ||||
|     # Set dtype explicitly to avoid implicit cast when setting nan | ||||
|     ser = Series( | ||||
|         interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" | ||||
|     ) | ||||
|     ser.iloc[1] = np.nan | ||||
|  | ||||
|     ser_orig = ser.copy() | ||||
|     view = ser[:] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser.fillna(value=Interval(left=0, right=5), inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory( | ||||
|             get_array(ser, "a").left.values, get_array(view, "a").left.values | ||||
|         ) | ||||
|         tm.assert_series_equal(view, ser_orig) | ||||
|     else: | ||||
|         assert np.shares_memory( | ||||
|             get_array(ser, "a").left.values, get_array(view, "a").left.values | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_fillna_series_empty_arg(using_copy_on_write): | ||||
|     ser = Series([1, np.nan, 2]) | ||||
|     ser_orig = ser.copy() | ||||
|     result = ser.fillna({}) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(ser), get_array(result)) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(ser), get_array(result)) | ||||
|  | ||||
|     ser.iloc[0] = 100.5 | ||||
|     tm.assert_series_equal(ser_orig, result) | ||||
|  | ||||
|  | ||||
| def test_fillna_series_empty_arg_inplace(using_copy_on_write): | ||||
|     ser = Series([1, np.nan, 2]) | ||||
|     arr = get_array(ser) | ||||
|     ser.fillna({}, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(ser), arr) | ||||
|     if using_copy_on_write: | ||||
|         assert ser._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_fillna_ea_noop_shares_memory( | ||||
|     using_copy_on_write, any_numeric_ea_and_arrow_dtype | ||||
| ): | ||||
|     df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.fillna(100) | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert not df2._mgr._has_no_reference(1) | ||||
|     elif isinstance(df.dtypes.iloc[0], ArrowDtype): | ||||
|         # arrow is immutable, so no-ops do not need to copy underlying array | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|  | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|  | ||||
|     df2.iloc[0, 1] = 100 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert df2._mgr._has_no_reference(1) | ||||
|         assert df._mgr._has_no_reference(1) | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|  | ||||
|  | ||||
| def test_fillna_inplace_ea_noop_shares_memory( | ||||
|     using_copy_on_write, warn_copy_on_write, any_numeric_ea_and_arrow_dtype | ||||
| ): | ||||
|     df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) | ||||
|     df_orig = df.copy() | ||||
|     view = df[:] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.fillna(100, inplace=True) | ||||
|  | ||||
|     if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) | ||||
|     else: | ||||
|         # MaskedArray can actually respect inplace=True | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(1) | ||||
|         assert not view._mgr._has_no_reference(1) | ||||
|  | ||||
|     with tm.assert_cow_warning( | ||||
|         warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype | ||||
|     ): | ||||
|         df.iloc[0, 1] = 100 | ||||
|     if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|     else: | ||||
|         # we actually have a view | ||||
|         tm.assert_frame_equal(df, view) | ||||
|  | ||||
|  | ||||
| def test_fillna_chained_assignment(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, np.nan, 2], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].fillna(100, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df[["a"]].fillna(100, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[["a"]].fillna(100, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[df.a > 5].fillna(100, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match="inplace method"): | ||||
|             df["a"].fillna(100, inplace=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"]) | ||||
| def test_interpolate_chained_assignment(using_copy_on_write, func): | ||||
|     df = DataFrame({"a": [1, np.nan, 2], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             getattr(df["a"], func)(inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             getattr(df[["a"]], func)(inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         with tm.assert_produces_warning(FutureWarning, match="inplace method"): | ||||
|             getattr(df["a"], func)(inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 getattr(df[["a"]], func)(inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 getattr(df[df["a"] > 1], func)(inplace=True) | ||||
							
								
								
									
										2068
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_methods.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2068
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/test_methods.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,490 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "replace_kwargs", | ||||
|     [ | ||||
|         {"to_replace": {"a": 1, "b": 4}, "value": -1}, | ||||
|         # Test CoW splits blocks to avoid copying unchanged columns | ||||
|         {"to_replace": {"a": 1}, "value": -1}, | ||||
|         {"to_replace": {"b": 4}, "value": -1}, | ||||
|         {"to_replace": {"b": {4: 1}}}, | ||||
|         # TODO: Add these in a further optimization | ||||
|         # We would need to see which columns got replaced in the mask | ||||
|         # which could be expensive | ||||
|         # {"to_replace": {"b": 1}}, | ||||
|         # 1 | ||||
|     ], | ||||
| ) | ||||
| def test_replace(using_copy_on_write, replace_kwargs): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df_replaced = df.replace(**replace_kwargs) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         if (df_replaced["b"] == df["b"]).all(): | ||||
|             assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b")) | ||||
|         assert tm.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) | ||||
|  | ||||
|     # mutating squeezed df triggers a copy-on-write for that column/block | ||||
|     df_replaced.loc[0, "c"] = -1 | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) | ||||
|  | ||||
|     if "a" in replace_kwargs["to_replace"]: | ||||
|         arr = get_array(df_replaced, "a") | ||||
|         df_replaced.loc[0, "a"] = 100 | ||||
|         assert np.shares_memory(get_array(df_replaced, "a"), arr) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": ["aaa", "bbb"]}) | ||||
|     df_orig = df.copy() | ||||
|     view = df[:] | ||||
|     arr = get_array(df, "a") | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) | ||||
|     if using_copy_on_write: | ||||
|         assert not tm.shares_memory(arr, get_array(df, "a")) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_regex_inplace(using_copy_on_write): | ||||
|     df = DataFrame({"a": ["aaa", "bbb"]}) | ||||
|     arr = get_array(df, "a") | ||||
|     df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|     assert tm.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True) | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|     assert not tm.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_regex_inplace_no_op(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|     arr = get_array(df, "a") | ||||
|     df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|     assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.replace(to_replace=r"^x.$", value="new", regex=True) | ||||
|     tm.assert_frame_equal(df_orig, df) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_mask_all_false_second_block(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df2 = df.replace(to_replace=1.5, value=55.5) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         # TODO: Block splitting would allow us to avoid copying b | ||||
|         assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     df2.loc[0, "c"] = 1 | ||||
|     tm.assert_frame_equal(df, df_orig)  # Original is unchanged | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) | ||||
|         # TODO: This should split and not copy the whole block | ||||
|         # assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) | ||||
|  | ||||
|  | ||||
| def test_replace_coerce_single_column(using_copy_on_write, using_array_manager): | ||||
|     df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df2 = df.replace(to_replace=1.5, value="a") | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     elif not using_array_manager: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         df2.loc[0, "b"] = 0.5 | ||||
|         tm.assert_frame_equal(df, df_orig)  # Original is unchanged | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|  | ||||
|  | ||||
| def test_replace_to_replace_wrong_dtype(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     df2 = df.replace(to_replace="xxx", value=1.5) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|         assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     df2.loc[0, "b"] = 0.5 | ||||
|     tm.assert_frame_equal(df, df_orig)  # Original is unchanged | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) | ||||
|  | ||||
|  | ||||
| def test_replace_list_categorical(using_copy_on_write): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") | ||||
|     arr = get_array(df, "a") | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.replace(["c"], value="a", inplace=True) | ||||
|     assert np.shares_memory(arr.codes, get_array(df, "a").codes) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|     df_orig = df.copy() | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df2 = df.replace(["b"], value="a") | ||||
|     assert not np.shares_memory(arr.codes, get_array(df2, "a").codes) | ||||
|  | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_replace_list_inplace_refs_categorical(using_copy_on_write): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") | ||||
|     view = df[:] | ||||
|     df_orig = df.copy() | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         df.replace(["c"], value="a", inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory( | ||||
|             get_array(view, "a").codes, get_array(df, "a").codes | ||||
|         ) | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|     else: | ||||
|         # This could be inplace | ||||
|         assert not np.shares_memory( | ||||
|             get_array(view, "a").codes, get_array(df, "a").codes | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("to_replace", [1.5, [1.5], []]) | ||||
| def test_replace_inplace(using_copy_on_write, to_replace): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     df.replace(to_replace=1.5, value=15.5, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) | ||||
| def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.replace(to_replace=to_replace, value=15.5, inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("to_replace", ["a", 100.5]) | ||||
| def test_replace_inplace_reference_no_op(using_copy_on_write, to_replace): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     df.replace(to_replace=to_replace, value=15.5, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|     if using_copy_on_write: | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|         assert not view._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("to_replace", [1, [1]]) | ||||
| @pytest.mark.parametrize("val", [1, 1.5]) | ||||
| def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_replace): | ||||
|     df = DataFrame({"a": Categorical([1, 2, 3])}) | ||||
|     df_orig = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     warn = FutureWarning if val == 1.5 else None | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         df.replace(to_replace=to_replace, value=val, inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [1, 1.5]) | ||||
| def test_replace_categorical_inplace(using_copy_on_write, val): | ||||
|     df = DataFrame({"a": Categorical([1, 2, 3])}) | ||||
|     arr_a = get_array(df, "a") | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     warn = FutureWarning if val == 1.5 else None | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         df.replace(to_replace=1, value=val, inplace=True) | ||||
|  | ||||
|     assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|     expected = DataFrame({"a": Categorical([val, 2, 3])}) | ||||
|     tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [1, 1.5]) | ||||
| def test_replace_categorical(using_copy_on_write, val): | ||||
|     df = DataFrame({"a": Categorical([1, 2, 3])}) | ||||
|     df_orig = df.copy() | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     warn = FutureWarning if val == 1.5 else None | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         df2 = df.replace(to_replace=1, value=val) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert df2._mgr._has_no_reference(0) | ||||
|     assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     arr_a = get_array(df2, "a").codes | ||||
|     df2.iloc[0, 0] = 2.0 | ||||
|     assert np.shares_memory(get_array(df2, "a").codes, arr_a) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["where", "mask"]) | ||||
| def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1.5, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|     arr_a = get_array(df, "a") | ||||
|     view = df[:] | ||||
|  | ||||
|     method = getattr(df, method) | ||||
|     if warn_copy_on_write: | ||||
|         with tm.assert_cow_warning(): | ||||
|             method(df["a"] > 1.6, -1, inplace=True) | ||||
|     else: | ||||
|         method(df["a"] > 1.6, -1, inplace=True) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert view._mgr._has_no_reference(0) | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|  | ||||
|  | ||||
| def test_replace_empty_list(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2]}) | ||||
|  | ||||
|     df2 = df.replace([], []) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|  | ||||
|     arr_a = get_array(df, "a") | ||||
|     df.replace([], []) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr_a) | ||||
|         assert not df._mgr._has_no_reference(0) | ||||
|         assert not df2._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("value", ["d", None]) | ||||
| def test_replace_object_list_inplace(using_copy_on_write, value): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}, dtype=object) | ||||
|     arr = get_array(df, "a") | ||||
|     df.replace(["c"], value, inplace=True) | ||||
|     if using_copy_on_write or value is None: | ||||
|         assert tm.shares_memory(arr, get_array(df, "a")) | ||||
|     else: | ||||
|         # This could be inplace | ||||
|         assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_replace_list_multiple_elements_inplace(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     arr = get_array(df, "a") | ||||
|     df.replace([1, 2], 4, inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|     else: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_list_none(using_copy_on_write): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}) | ||||
|  | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.replace(["b"], value=None) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) | ||||
|  | ||||
|     # replace multiple values that don't actually replace anything with None | ||||
|     # https://github.com/pandas-dev/pandas/issues/59770 | ||||
|     df3 = df.replace(["d", "e", "f"], value=None) | ||||
|     tm.assert_frame_equal(df3, df_orig) | ||||
|     if using_copy_on_write: | ||||
|         assert tm.shares_memory(get_array(df, "a"), get_array(df3, "a")) | ||||
|     else: | ||||
|         assert not tm.shares_memory(get_array(df, "a"), get_array(df3, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": ["a", "b", "c"]}) | ||||
|     arr = get_array(df, "a") | ||||
|     df_orig = df.copy() | ||||
|     view = df[:] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.replace(["a"], value=None, inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|         assert not np.shares_memory(arr, get_array(df, "a")) | ||||
|         tm.assert_frame_equal(df_orig, view) | ||||
|     else: | ||||
|         assert np.shares_memory(arr, get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_replace_columnwise_no_op_inplace(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||||
|     view = df[:] | ||||
|     df_orig = df.copy() | ||||
|     df.replace({"a": 10}, 100, inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) | ||||
|         df.iloc[0, 0] = 100 | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|  | ||||
|  | ||||
| def test_replace_columnwise_no_op(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|     df2 = df.replace({"a": 10}, 100) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) | ||||
|     df2.iloc[0, 0] = 100 | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_replace_chained_assignment(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, np.nan, 2], "b": 1}) | ||||
|     df_orig = df.copy() | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].replace(1, 100, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df[["a"]].replace(1, 100, inplace=True) | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|     else: | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[["a"]].replace(1, 100, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             with option_context("mode.chained_assignment", None): | ||||
|                 df[df.a > 5].replace(1, 100, inplace=True) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match="inplace method"): | ||||
|             df["a"].replace(1, 100, inplace=True) | ||||
|  | ||||
|  | ||||
| def test_replace_listlike(using_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||||
|     df_orig = df.copy() | ||||
|  | ||||
|     result = df.replace([200, 201], [11, 11]) | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|     else: | ||||
|         assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) | ||||
|  | ||||
|     result.iloc[0, 0] = 100 | ||||
|     tm.assert_frame_equal(df, df) | ||||
|  | ||||
|     result = df.replace([200, 2], [10, 10]) | ||||
|     assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) | ||||
|     tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|  | ||||
| def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) | ||||
|     arr = get_array(df, "a") | ||||
|     df.replace([200, 2], [10, 11], inplace=True) | ||||
|     assert np.shares_memory(get_array(df, "a"), arr) | ||||
|  | ||||
|     view = df[:] | ||||
|     df_orig = df.copy() | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         df.replace([200, 3], [10, 11], inplace=True) | ||||
|     if using_copy_on_write: | ||||
|         assert not np.shares_memory(get_array(df, "a"), arr) | ||||
|         tm.assert_frame_equal(view, df_orig) | ||||
|     else: | ||||
|         assert np.shares_memory(get_array(df, "a"), arr) | ||||
|         tm.assert_frame_equal(df, view) | ||||
| @ -0,0 +1,156 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
| # ----------------------------------------------------------------------------- | ||||
| # Copy/view behaviour for the values that are set in a DataFrame | ||||
|  | ||||
|  | ||||
| def test_set_column_with_array(): | ||||
|     # Case: setting an array as a new column (df[col] = arr) copies that data | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     arr = np.array([1, 2, 3], dtype="int64") | ||||
|  | ||||
|     df["c"] = arr | ||||
|  | ||||
|     # the array data is copied | ||||
|     assert not np.shares_memory(get_array(df, "c"), arr) | ||||
|     # and thus modifying the array does not modify the DataFrame | ||||
|     arr[0] = 0 | ||||
|     tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) | ||||
|  | ||||
|  | ||||
| def test_set_column_with_series(using_copy_on_write): | ||||
|     # Case: setting a series as a new column (df[col] = s) copies that data | ||||
|     # (with delayed copy with CoW) | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     ser = Series([1, 2, 3]) | ||||
|  | ||||
|     df["c"] = ser | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "c"), get_array(ser)) | ||||
|     else: | ||||
|         # the series data is copied | ||||
|         assert not np.shares_memory(get_array(df, "c"), get_array(ser)) | ||||
|  | ||||
|     # and modifying the series does not modify the DataFrame | ||||
|     ser.iloc[0] = 0 | ||||
|     assert ser.iloc[0] == 0 | ||||
|     tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) | ||||
|  | ||||
|  | ||||
| def test_set_column_with_index(using_copy_on_write): | ||||
|     # Case: setting an index as a new column (df[col] = idx) copies that data | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     idx = Index([1, 2, 3]) | ||||
|  | ||||
|     df["c"] = idx | ||||
|  | ||||
|     # the index data is copied | ||||
|     assert not np.shares_memory(get_array(df, "c"), idx.values) | ||||
|  | ||||
|     idx = RangeIndex(1, 4) | ||||
|     arr = idx.values | ||||
|  | ||||
|     df["d"] = idx | ||||
|  | ||||
|     assert not np.shares_memory(get_array(df, "d"), arr) | ||||
|  | ||||
|  | ||||
| def test_set_columns_with_dataframe(using_copy_on_write): | ||||
|     # Case: setting a DataFrame as new columns copies that data | ||||
|     # (with delayed copy with CoW) | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]}) | ||||
|  | ||||
|     df[["c", "d"]] = df2 | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) | ||||
|     else: | ||||
|         # the data is copied | ||||
|         assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) | ||||
|  | ||||
|     # and modifying the set DataFrame does not modify the original DataFrame | ||||
|     df2.iloc[0, 0] = 0 | ||||
|     tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c")) | ||||
|  | ||||
|  | ||||
| def test_setitem_series_no_copy(using_copy_on_write): | ||||
|     # Case: setting a Series as column into a DataFrame can delay copying that data | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     rhs = Series([4, 5, 6]) | ||||
|     rhs_orig = rhs.copy() | ||||
|  | ||||
|     # adding a new column | ||||
|     df["b"] = rhs | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(rhs), get_array(df, "b")) | ||||
|  | ||||
|     df.iloc[0, 1] = 100 | ||||
|     tm.assert_series_equal(rhs, rhs_orig) | ||||
|  | ||||
|  | ||||
| def test_setitem_series_no_copy_single_block(using_copy_on_write): | ||||
|     # Overwriting an existing column that is a single block | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) | ||||
|     rhs = Series([4, 5, 6]) | ||||
|     rhs_orig = rhs.copy() | ||||
|  | ||||
|     df["a"] = rhs | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(rhs), get_array(df, "a")) | ||||
|  | ||||
|     df.iloc[0, 0] = 100 | ||||
|     tm.assert_series_equal(rhs, rhs_orig) | ||||
|  | ||||
|  | ||||
| def test_setitem_series_no_copy_split_block(using_copy_on_write): | ||||
|     # Overwriting an existing column that is part of a larger block | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": 1}) | ||||
|     rhs = Series([4, 5, 6]) | ||||
|     rhs_orig = rhs.copy() | ||||
|  | ||||
|     df["b"] = rhs | ||||
|     if using_copy_on_write: | ||||
|         assert np.shares_memory(get_array(rhs), get_array(df, "b")) | ||||
|  | ||||
|     df.iloc[0, 1] = 100 | ||||
|     tm.assert_series_equal(rhs, rhs_orig) | ||||
|  | ||||
|  | ||||
| def test_setitem_series_column_midx_broadcasting(using_copy_on_write): | ||||
|     # Setting a Series to multiple columns will repeat the data | ||||
|     # (currently copying the data eagerly) | ||||
|     df = DataFrame( | ||||
|         [[1, 2, 3], [3, 4, 5]], | ||||
|         columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]), | ||||
|     ) | ||||
|     rhs = Series([10, 11]) | ||||
|     df["a"] = rhs | ||||
|     assert not np.shares_memory(get_array(rhs), df._get_column_array(0)) | ||||
|     if using_copy_on_write: | ||||
|         assert df._mgr._has_no_reference(0) | ||||
|  | ||||
|  | ||||
| def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write): | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|  | ||||
|     # this should not raise any warning | ||||
|     with tm.assert_produces_warning(None): | ||||
|         df["a"] += 1 | ||||
|  | ||||
|     # when it is not in a chain, then it should produce a warning | ||||
|     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     ser = df["a"] | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         ser += 1 | ||||
| @ -0,0 +1,14 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import DataFrame | ||||
| from pandas.tests.copy_view.util import get_array | ||||
|  | ||||
|  | ||||
| def test_get_array_numpy(): | ||||
|     df = DataFrame({"a": [1, 2, 3]}) | ||||
|     assert np.shares_memory(get_array(df, "a"), get_array(df, "a")) | ||||
|  | ||||
|  | ||||
| def test_get_array_masked(): | ||||
|     df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") | ||||
|     assert np.shares_memory(get_array(df, "a"), get_array(df, "a")) | ||||
							
								
								
									
										30
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/util.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								lib/python3.11/site-packages/pandas/tests/copy_view/util.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| from pandas.core.arrays import BaseMaskedArray | ||||
|  | ||||
|  | ||||
| def get_array(obj, col=None): | ||||
|     """ | ||||
|     Helper method to get array for a DataFrame column or a Series. | ||||
|  | ||||
|     Equivalent of df[col].values, but without going through normal getitem, | ||||
|     which triggers tracking references / CoW (and we might be testing that | ||||
|     this is done by some other operation). | ||||
|     """ | ||||
|     if isinstance(obj, Index): | ||||
|         arr = obj._values | ||||
|     elif isinstance(obj, Series) and (col is None or obj.name == col): | ||||
|         arr = obj._values | ||||
|     else: | ||||
|         assert col is not None | ||||
|         icol = obj.columns.get_loc(col) | ||||
|         assert isinstance(icol, int) | ||||
|         arr = obj._get_column_array(icol) | ||||
|     if isinstance(arr, BaseMaskedArray): | ||||
|         return arr._data | ||||
|     elif isinstance(arr, Categorical): | ||||
|         return arr | ||||
|     return getattr(arr, "_ndarray", arr) | ||||
		Reference in New Issue
	
	Block a user