done
This commit is contained in:
		
							
								
								
									
										40
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/common.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,40 @@ | ||||
| """ common utilities """ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from typing import ( | ||||
|     Any, | ||||
|     Literal, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _mklbl(prefix: str, n: int): | ||||
|     return [f"{prefix}{i}" for i in range(n)] | ||||
|  | ||||
|  | ||||
| def check_indexing_smoketest_or_raises( | ||||
|     obj, | ||||
|     method: Literal["iloc", "loc"], | ||||
|     key: Any, | ||||
|     axes: Literal[0, 1] | None = None, | ||||
|     fails=None, | ||||
| ) -> None: | ||||
|     if axes is None: | ||||
|         axes_list = [0, 1] | ||||
|     else: | ||||
|         assert axes in [0, 1] | ||||
|         axes_list = [axes] | ||||
|  | ||||
|     for ax in axes_list: | ||||
|         if ax < obj.ndim: | ||||
|             # create a tuple accessor | ||||
|             new_axes = [slice(None)] * obj.ndim | ||||
|             new_axes[ax] = key | ||||
|             axified = tuple(new_axes) | ||||
|             try: | ||||
|                 getattr(obj, method).__getitem__(axified) | ||||
|             except (IndexError, TypeError, KeyError) as detail: | ||||
|                 # if we are in fails, the ok, otherwise raise it | ||||
|                 if fails is not None: | ||||
|                     if isinstance(detail, fails): | ||||
|                         return | ||||
|                 raise | ||||
							
								
								
									
										127
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/conftest.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/conftest.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,127 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_ints(): | ||||
|     return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2)) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_ints(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=np.arange(0, 8, 2), | ||||
|         columns=np.arange(0, 12, 3), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_uints(): | ||||
|     return Series( | ||||
|         np.random.default_rng(2).random(4), | ||||
|         index=Index(np.arange(0, 8, 2, dtype=np.uint64)), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_uints(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=Index(range(0, 8, 2), dtype=np.uint64), | ||||
|         columns=Index(range(0, 12, 3), dtype=np.uint64), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_labels(): | ||||
|     return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd")) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_labels(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=list("abcd"), | ||||
|         columns=list("ABCD"), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_ts(): | ||||
|     return Series( | ||||
|         np.random.default_rng(2).standard_normal(4), | ||||
|         index=date_range("20130101", periods=4), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_ts(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=date_range("20130101", periods=4), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_floats(): | ||||
|     return Series( | ||||
|         np.random.default_rng(2).random(4), | ||||
|         index=Index(range(0, 8, 2), dtype=np.float64), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_floats(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=Index(range(0, 8, 2), dtype=np.float64), | ||||
|         columns=Index(range(0, 12, 3), dtype=np.float64), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_mixed(): | ||||
|     return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_mixed(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8] | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_empty(): | ||||
|     return DataFrame() | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_empty(): | ||||
|     return Series(dtype=object) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_multi(): | ||||
|     return DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), | ||||
|         index=MultiIndex.from_product([[1, 2], [3, 4]]), | ||||
|         columns=MultiIndex.from_product([[5, 6], [7, 8]]), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def series_multi(): | ||||
|     return Series( | ||||
|         np.random.default_rng(2).random(4), | ||||
|         index=MultiIndex.from_product([[1, 2], [3, 4]]), | ||||
|     ) | ||||
| @ -0,0 +1,225 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     @pytest.fixture | ||||
|     def series_with_interval_index(self): | ||||
|         return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) | ||||
|  | ||||
|     def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl): | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         expected = ser.iloc[:3] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[:3]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[:2.5]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5]) | ||||
|         if indexer_sl is tm.loc: | ||||
|             tm.assert_series_equal(expected, ser.loc[-1:3]) | ||||
|  | ||||
|         expected = ser.iloc[1:4] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) | ||||
|  | ||||
|         expected = ser.iloc[2:5] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) | ||||
|  | ||||
|     @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) | ||||
|     def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl): | ||||
|         tpls = [(0, 1), (2, 3), (4, 5)] | ||||
|         if direction == "decreasing": | ||||
|             tpls = tpls[::-1] | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         ser = Series(list("abc"), idx) | ||||
|  | ||||
|         for key, expected in zip(idx.left, ser): | ||||
|             if idx.closed_left: | ||||
|                 assert indexer_sl(ser)[key] == expected | ||||
|             else: | ||||
|                 with pytest.raises(KeyError, match=str(key)): | ||||
|                     indexer_sl(ser)[key] | ||||
|  | ||||
|         for key, expected in zip(idx.right, ser): | ||||
|             if idx.closed_right: | ||||
|                 assert indexer_sl(ser)[key] == expected | ||||
|             else: | ||||
|                 with pytest.raises(KeyError, match=str(key)): | ||||
|                     indexer_sl(ser)[key] | ||||
|  | ||||
|         for key, expected in zip(idx.mid, ser): | ||||
|             assert indexer_sl(ser)[key] == expected | ||||
|  | ||||
|     def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         # this is a departure from our current | ||||
|         # indexing scheme, but simpler | ||||
|         with pytest.raises(KeyError, match=r"\[-1\] not in index"): | ||||
|             indexer_sl(ser)[[-1, 3, 4, 5]] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"\[-1\] not in index"): | ||||
|             indexer_sl(ser)[[-1, 3]] | ||||
|  | ||||
|     def test_loc_getitem_large_series(self, monkeypatch): | ||||
|         size_cutoff = 20 | ||||
|         with monkeypatch.context(): | ||||
|             monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|             ser = Series( | ||||
|                 np.arange(size_cutoff), | ||||
|                 index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)), | ||||
|             ) | ||||
|  | ||||
|             result1 = ser.loc[:8] | ||||
|             result2 = ser.loc[0:8] | ||||
|             result3 = ser.loc[0:8:1] | ||||
|         tm.assert_series_equal(result1, result2) | ||||
|         tm.assert_series_equal(result1, result3) | ||||
|  | ||||
|     def test_loc_getitem_frame(self): | ||||
|         # CategoricalIndex with IntervalIndex categories | ||||
|         df = DataFrame({"A": range(10)}) | ||||
|         ser = pd.cut(df.A, 5) | ||||
|         df["B"] = ser | ||||
|         df = df.set_index("B") | ||||
|  | ||||
|         result = df.loc[4] | ||||
|         expected = df.iloc[4:6] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="10"): | ||||
|             df.loc[10] | ||||
|  | ||||
|         # single list-like | ||||
|         result = df.loc[[4]] | ||||
|         expected = df.iloc[4:6] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # non-unique | ||||
|         result = df.loc[[4, 5]] | ||||
|         expected = df.take([4, 5, 4, 5]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             r"None of \[Index\(\[10\], dtype='object', name='B'\)\] " | ||||
|             r"are in the \[index\]" | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             df.loc[[10]] | ||||
|  | ||||
|         # partial missing | ||||
|         with pytest.raises(KeyError, match=r"\[10\] not in index"): | ||||
|             df.loc[[10, 4]] | ||||
|  | ||||
|     def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl): | ||||
|         # GH#41831 | ||||
|  | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|         key = index[:-1] | ||||
|  | ||||
|         obj = frame_or_series(range(2), index=index) | ||||
|         if frame_or_series is DataFrame and indexer_sl is tm.setitem: | ||||
|             obj = obj.T | ||||
|  | ||||
|         result = indexer_sl(obj)[key] | ||||
|         expected = obj | ||||
|  | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_interval_with_slice(self): | ||||
|         # GH#54722 | ||||
|         ii = IntervalIndex.from_breaks(range(4, 15)) | ||||
|         ser = Series(range(10), index=ii) | ||||
|  | ||||
|         orig = ser.copy() | ||||
|  | ||||
|         # This should be a no-op (used to raise) | ||||
|         ser.loc[1:3] = 20 | ||||
|         tm.assert_series_equal(ser, orig) | ||||
|  | ||||
|         ser.loc[6:8] = 19 | ||||
|         orig.iloc[1:4] = 19 | ||||
|         tm.assert_series_equal(ser, orig) | ||||
|  | ||||
|         ser2 = Series(range(5), index=ii[::2]) | ||||
|         orig2 = ser2.copy() | ||||
|  | ||||
|         # this used to raise | ||||
|         ser2.loc[6:8] = 22  # <- raises on main, sets on branch | ||||
|         orig2.iloc[1] = 22 | ||||
|         tm.assert_series_equal(ser2, orig2) | ||||
|  | ||||
|         ser2.loc[5:7] = 21 | ||||
|         orig2.iloc[:2] = 21 | ||||
|         tm.assert_series_equal(ser2, orig2) | ||||
|  | ||||
|  | ||||
| class TestIntervalIndexInsideMultiIndex: | ||||
|     def test_mi_intervalindex_slicing_with_scalar(self): | ||||
|         # GH#27456 | ||||
|         ii = IntervalIndex.from_arrays( | ||||
|             [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP" | ||||
|         ) | ||||
|         idx = pd.MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]), | ||||
|                 pd.Index( | ||||
|                     ["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"] | ||||
|                 ), | ||||
|                 ii, | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         idx.names = ["Item", "RID", "MP"] | ||||
|         df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) | ||||
|         df.index = idx | ||||
|  | ||||
|         query_df = DataFrame( | ||||
|             { | ||||
|                 "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], | ||||
|                 "RID": ["RID1", "RID1", "RID1", "RID2", "RID2"], | ||||
|                 "MP": [0.2, 1.5, 1.6, 11.1, 10.9], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         query_df = query_df.sort_index() | ||||
|  | ||||
|         idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP]) | ||||
|         query_df.index = idx | ||||
|         result = df.value.loc[query_df.index] | ||||
|  | ||||
|         # the IntervalIndex level is indexed with floats, which map to | ||||
|         #  the intervals containing them.  Matching the behavior we would get | ||||
|         #  with _only_ an IntervalIndex, we get an IntervalIndex level back. | ||||
|         sliced_level = ii.take([0, 1, 1, 3, 2]) | ||||
|         expected_index = pd.MultiIndex.from_arrays( | ||||
|             [idx.get_level_values(0), idx.get_level_values(1), sliced_level] | ||||
|         ) | ||||
|         expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "base", | ||||
|         [101, 1010], | ||||
|     ) | ||||
|     def test_reindex_behavior_with_interval_index(self, base): | ||||
|         # GH 51826 | ||||
|  | ||||
|         ser = Series( | ||||
|             range(base), | ||||
|             index=IntervalIndex.from_arrays(range(base), range(1, base + 1)), | ||||
|         ) | ||||
|         expected_result = Series([np.nan, 0], index=[np.nan, 1.0], dtype=float) | ||||
|         result = ser.reindex(index=[np.nan, 1.0]) | ||||
|         tm.assert_series_equal(result, expected_result) | ||||
| @ -0,0 +1,229 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     @pytest.fixture | ||||
|     def series_with_interval_index(self): | ||||
|         return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) | ||||
|  | ||||
|     def test_loc_with_interval(self, series_with_interval_index, indexer_sl): | ||||
|         # loc with single label / list of labels: | ||||
|         #   - Intervals: only exact matches | ||||
|         #   - scalars: those that contain it | ||||
|  | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         expected = 0 | ||||
|         result = indexer_sl(ser)[Interval(0, 1)] | ||||
|         assert result == expected | ||||
|  | ||||
|         expected = ser.iloc[3:5] | ||||
|         result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         # missing or not exact | ||||
|         with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): | ||||
|             indexer_sl(ser)[Interval(3, 5, closed="left")] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): | ||||
|             indexer_sl(ser)[Interval(3, 5)] | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match=re.escape("Interval(-2, 0, closed='right')") | ||||
|         ): | ||||
|             indexer_sl(ser)[Interval(-2, 0)] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): | ||||
|             indexer_sl(ser)[Interval(5, 6)] | ||||
|  | ||||
|     def test_loc_with_scalar(self, series_with_interval_index, indexer_sl): | ||||
|         # loc with single label / list of labels: | ||||
|         #   - Intervals: only exact matches | ||||
|         #   - scalars: those that contain it | ||||
|  | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         assert indexer_sl(ser)[1] == 0 | ||||
|         assert indexer_sl(ser)[1.5] == 1 | ||||
|         assert indexer_sl(ser)[2] == 1 | ||||
|  | ||||
|         expected = ser.iloc[1:4] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) | ||||
|  | ||||
|         expected = ser.iloc[[1, 1, 2, 1]] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]]) | ||||
|  | ||||
|         expected = ser.iloc[2:5] | ||||
|         tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) | ||||
|  | ||||
|     def test_loc_with_slices(self, series_with_interval_index, indexer_sl): | ||||
|         # loc with slices: | ||||
|         #   - Interval objects: only works with exact matches | ||||
|         #   - scalars: only works for non-overlapping, monotonic intervals, | ||||
|         #     and start/stop select location based on the interval that | ||||
|         #     contains them: | ||||
|         #    (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) | ||||
|  | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         # slice of interval | ||||
|  | ||||
|         expected = ser.iloc[:3] | ||||
|         result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         expected = ser.iloc[3:] | ||||
|         result = indexer_sl(ser)[Interval(3, 4) :] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         msg = "Interval objects are not currently supported" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             indexer_sl(ser)[Interval(3, 6) :] | ||||
|  | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             indexer_sl(ser)[Interval(3, 4, closed="left") :] | ||||
|  | ||||
|     def test_slice_step_ne1(self, series_with_interval_index): | ||||
|         # GH#31658 slice of scalar with step != 1 | ||||
|         ser = series_with_interval_index.copy() | ||||
|         expected = ser.iloc[0:4:2] | ||||
|  | ||||
|         result = ser[0:4:2] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result2 = ser[0:4][::2] | ||||
|         tm.assert_series_equal(result2, expected) | ||||
|  | ||||
|     def test_slice_float_start_stop(self, series_with_interval_index): | ||||
|         # GH#31658 slicing with integers is positional, with floats is not | ||||
|         #  supported | ||||
|         ser = series_with_interval_index.copy() | ||||
|  | ||||
|         msg = "label-based slicing with step!=1 is not supported for IntervalIndex" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser[1.5:9.5:2] | ||||
|  | ||||
|     def test_slice_interval_step(self, series_with_interval_index): | ||||
|         # GH#31658 allows for integer step!=1, not Interval step | ||||
|         ser = series_with_interval_index.copy() | ||||
|         msg = "label-based slicing with step!=1 is not supported for IntervalIndex" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser[0 : 4 : Interval(0, 1)] | ||||
|  | ||||
|     def test_loc_with_overlap(self, indexer_sl): | ||||
|         idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) | ||||
|         ser = Series(range(len(idx)), index=idx) | ||||
|  | ||||
|         # scalar | ||||
|         expected = ser | ||||
|         result = indexer_sl(ser)[4] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         result = indexer_sl(ser)[[4]] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         # interval | ||||
|         expected = 0 | ||||
|         result = indexer_sl(ser)[Interval(1, 5)] | ||||
|         assert expected == result | ||||
|  | ||||
|         expected = ser | ||||
|         result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): | ||||
|             indexer_sl(ser)[Interval(3, 5)] | ||||
|  | ||||
|         msg = ( | ||||
|             r"None of \[IntervalIndex\(\[\(3, 5\]\], " | ||||
|             r"dtype='interval\[int64, right\]'\)\] are in the \[index\]" | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             indexer_sl(ser)[[Interval(3, 5)]] | ||||
|  | ||||
|         # slices with interval (only exact matches) | ||||
|         expected = ser | ||||
|         result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         msg = ( | ||||
|             "'can only get slices from an IntervalIndex if bounds are " | ||||
|             "non-overlapping and all monotonic increasing or decreasing'" | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)] | ||||
|  | ||||
|         if indexer_sl is tm.loc: | ||||
|             # slices with scalar raise for overlapping intervals | ||||
|             # TODO KeyError is the appropriate error? | ||||
|             with pytest.raises(KeyError, match=msg): | ||||
|                 ser.loc[1:4] | ||||
|  | ||||
|     def test_non_unique(self, indexer_sl): | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) | ||||
|         ser = Series(range(len(idx)), index=idx) | ||||
|  | ||||
|         result = indexer_sl(ser)[Interval(1, 3)] | ||||
|         assert result == 0 | ||||
|  | ||||
|         result = indexer_sl(ser)[[Interval(1, 3)]] | ||||
|         expected = ser.iloc[0:1] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_non_unique_moar(self, indexer_sl): | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) | ||||
|         ser = Series(range(len(idx)), index=idx) | ||||
|  | ||||
|         expected = ser.iloc[[0, 1]] | ||||
|         result = indexer_sl(ser)[Interval(1, 3)] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         expected = ser | ||||
|         result = indexer_sl(ser)[Interval(1, 3) :] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         expected = ser.iloc[[0, 1]] | ||||
|         result = indexer_sl(ser)[[Interval(1, 3)]] | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_loc_getitem_missing_key_error_message( | ||||
|         self, frame_or_series, series_with_interval_index | ||||
|     ): | ||||
|         # GH#27365 | ||||
|         ser = series_with_interval_index.copy() | ||||
|         obj = frame_or_series(ser) | ||||
|         with pytest.raises(KeyError, match=r"\[6\]"): | ||||
|             obj.loc[[4, 5, 6]] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "intervals", | ||||
|     [ | ||||
|         ([Interval(-np.inf, 0.0), Interval(0.0, 1.0)]), | ||||
|         ([Interval(-np.inf, -2.0), Interval(-2.0, -1.0)]), | ||||
|         ([Interval(-1.0, 0.0), Interval(0.0, np.inf)]), | ||||
|         ([Interval(1.0, 2.0), Interval(2.0, np.inf)]), | ||||
|     ], | ||||
| ) | ||||
| def test_repeating_interval_index_with_infs(intervals): | ||||
|     # GH 46658 | ||||
|  | ||||
|     interval_index = Index(intervals * 51) | ||||
|  | ||||
|     expected = np.arange(1, 102, 2, dtype=np.intp) | ||||
|     result = interval_index.get_indexer_for([intervals[1]]) | ||||
|  | ||||
|     tm.assert_equal(result, expected) | ||||
| @ -0,0 +1,87 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
| from pandas.errors import SettingWithCopyError | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): | ||||
|     # Inplace ops, originally from: | ||||
|     # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug | ||||
|     a = [12, 23] | ||||
|     b = [123, None] | ||||
|     c = [1234, 2345] | ||||
|     d = [12345, 23456] | ||||
|     tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")] | ||||
|     events = { | ||||
|         ("eyes", "left"): a, | ||||
|         ("eyes", "right"): b, | ||||
|         ("ears", "left"): c, | ||||
|         ("ears", "right"): d, | ||||
|     } | ||||
|     multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) | ||||
|     zed = DataFrame(events, index=["a", "b"], columns=multiind) | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             zed["eyes"]["right"].fillna(value=555, inplace=True) | ||||
|     elif warn_copy_on_write: | ||||
|         with tm.assert_produces_warning(None): | ||||
|             zed["eyes"]["right"].fillna(value=555, inplace=True) | ||||
|     else: | ||||
|         msg = "A value is trying to be set on a copy of a slice from a DataFrame" | ||||
|         with pytest.raises(SettingWithCopyError, match=msg): | ||||
|             with tm.assert_produces_warning(None): | ||||
|                 zed["eyes"]["right"].fillna(value=555, inplace=True) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test  # with ArrayManager df.loc[0] is not a view | ||||
| def test_cache_updating(using_copy_on_write, warn_copy_on_write): | ||||
|     # 5216 | ||||
|     # make sure that we don't try to set a dead cache | ||||
|     a = np.random.default_rng(2).random((10, 3)) | ||||
|     df = DataFrame(a, columns=["x", "y", "z"]) | ||||
|     df_original = df.copy() | ||||
|     tuples = [(i, j) for i in range(5) for j in range(2)] | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     df.index = index | ||||
|  | ||||
|     # setting via chained assignment | ||||
|     # but actually works, since everything is a view | ||||
|  | ||||
|     with tm.raises_chained_assignment_error(): | ||||
|         df.loc[0]["z"].iloc[0] = 1.0 | ||||
|  | ||||
|     if using_copy_on_write: | ||||
|         assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] | ||||
|     else: | ||||
|         result = df.loc[(0, 0), "z"] | ||||
|         assert result == 1 | ||||
|  | ||||
|     # correct setting | ||||
|     df.loc[(0, 0), "z"] = 2 | ||||
|     result = df.loc[(0, 0), "z"] | ||||
|     assert result == 2 | ||||
|  | ||||
|  | ||||
| def test_indexer_caching(monkeypatch): | ||||
|     # GH5727 | ||||
|     # make sure that indexers are in the _internal_names_set | ||||
|     size_cutoff = 20 | ||||
|     with monkeypatch.context(): | ||||
|         monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|         index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)]) | ||||
|         s = Series(np.zeros(size_cutoff), index=index) | ||||
|  | ||||
|         # setitem | ||||
|         s[s == 0] = 1 | ||||
|     expected = Series(np.ones(size_cutoff), index=index) | ||||
|     tm.assert_series_equal(s, expected) | ||||
| @ -0,0 +1,50 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Period, | ||||
|     Series, | ||||
|     period_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_multiindex_period_datetime(): | ||||
|     # GH4861, using datetime in period of multiindex raises exception | ||||
|  | ||||
|     idx1 = Index(["a", "a", "a", "b", "b"]) | ||||
|     idx2 = period_range("2012-01", periods=len(idx1), freq="M") | ||||
|     s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2]) | ||||
|  | ||||
|     # try Period as index | ||||
|     expected = s.iloc[0] | ||||
|     result = s.loc["a", Period("2012-01")] | ||||
|     assert result == expected | ||||
|  | ||||
|     # try datetime as index | ||||
|     result = s.loc["a", datetime(2012, 1, 1)] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_multiindex_datetime_columns(): | ||||
|     # GH35015, using datetime as column indices raises exception | ||||
|  | ||||
|     mi = MultiIndex.from_tuples( | ||||
|         [(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"] | ||||
|     ) | ||||
|  | ||||
|     df = DataFrame([], columns=mi) | ||||
|  | ||||
|     expected_df = DataFrame( | ||||
|         [], | ||||
|         columns=MultiIndex.from_arrays( | ||||
|             [[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"] | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(df, expected_df) | ||||
| @ -0,0 +1,410 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.indexing import IndexingError | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # test indexing of Series with multi-level Index | ||||
| # ---------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "access_method", | ||||
|     [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "level1_value, expected", | ||||
|     [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], | ||||
| ) | ||||
| def test_series_getitem_multiindex(access_method, level1_value, expected): | ||||
|     # GH 6018 | ||||
|     # series regression getitem with a multi-index | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"]) | ||||
|     ser = Series([1, 2, 3], index=mi) | ||||
|     expected.index.name = "A" | ||||
|  | ||||
|     result = access_method(ser, level1_value) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("level0_value", ["D", "A"]) | ||||
| def test_series_getitem_duplicates_multiindex(level0_value): | ||||
|     # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise | ||||
|     # the appropriate error, only in PY3 of course! | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], | ||||
|         codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], | ||||
|         names=["tag", "day"], | ||||
|     ) | ||||
|     arr = np.random.default_rng(2).standard_normal((len(index), 1)) | ||||
|     df = DataFrame(arr, index=index, columns=["val"]) | ||||
|  | ||||
|     # confirm indexing on missing value raises KeyError | ||||
|     if level0_value != "A": | ||||
|         with pytest.raises(KeyError, match=r"^'A'$"): | ||||
|             df.val["A"] | ||||
|  | ||||
|     with pytest.raises(KeyError, match=r"^'X'$"): | ||||
|         df.val["X"] | ||||
|  | ||||
|     result = df.val[level0_value] | ||||
|     expected = Series( | ||||
|         arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day") | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl): | ||||
|     s = multiindex_year_month_day_dataframe_random_data["A"] | ||||
|     expected = s.reindex(s.index[42:65]) | ||||
|     expected.index = expected.index.droplevel(0).droplevel(0) | ||||
|  | ||||
|     result = indexer_sl(s)[2000, 3] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_getitem_returns_scalar( | ||||
|     multiindex_year_month_day_dataframe_random_data, indexer_sl | ||||
| ): | ||||
|     s = multiindex_year_month_day_dataframe_random_data["A"] | ||||
|     expected = s.iloc[49] | ||||
|  | ||||
|     result = indexer_sl(s)[2000, 3, 10] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer,expected_error,expected_error_msg", | ||||
|     [ | ||||
|         (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"), | ||||
|         (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), | ||||
|         (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), | ||||
|         (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), | ||||
|         (lambda s: s.__getitem__(len(s)), KeyError, ""),  # match should include len(s) | ||||
|         (lambda s: s[len(s)], KeyError, ""),  # match should include len(s) | ||||
|         ( | ||||
|             lambda s: s.iloc[len(s)], | ||||
|             IndexError, | ||||
|             "single positional indexer is out-of-bounds", | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_series_getitem_indexing_errors( | ||||
|     multiindex_year_month_day_dataframe_random_data, | ||||
|     indexer, | ||||
|     expected_error, | ||||
|     expected_error_msg, | ||||
| ): | ||||
|     s = multiindex_year_month_day_dataframe_random_data["A"] | ||||
|     with pytest.raises(expected_error, match=expected_error_msg): | ||||
|         indexer(s) | ||||
|  | ||||
|  | ||||
| def test_series_getitem_corner_generator( | ||||
|     multiindex_year_month_day_dataframe_random_data, | ||||
| ): | ||||
|     s = multiindex_year_month_day_dataframe_random_data["A"] | ||||
|     result = s[(x > 0 for x in s)] | ||||
|     expected = s[s > 0] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # test indexing of DataFrame with multi-level Index | ||||
| # ---------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_getitem_simple(multiindex_dataframe_random_data): | ||||
|     df = multiindex_dataframe_random_data.T | ||||
|     expected = df.values[:, 0] | ||||
|     result = df["foo", "one"].values | ||||
|     tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer,expected_error_msg", | ||||
|     [ | ||||
|         (lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"), | ||||
|         (lambda df: df["foobar"], r"^'foobar'$"), | ||||
|     ], | ||||
| ) | ||||
| def test_frame_getitem_simple_key_error( | ||||
|     multiindex_dataframe_random_data, indexer, expected_error_msg | ||||
| ): | ||||
|     df = multiindex_dataframe_random_data.T | ||||
|     with pytest.raises(KeyError, match=expected_error_msg): | ||||
|         indexer(df) | ||||
|  | ||||
|  | ||||
| def test_tuple_string_column_names(): | ||||
|     # GH#50372 | ||||
|     mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")]) | ||||
|     df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi) | ||||
|     df["single_index"] = 0 | ||||
|  | ||||
|     df_flat = df.copy() | ||||
|     df_flat.columns = df_flat.columns.to_flat_index() | ||||
|     df_flat["new_single_index"] = 0 | ||||
|  | ||||
|     result = df_flat[[("a", "aa"), "new_single_index"]] | ||||
|     expected = DataFrame( | ||||
|         [[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_getitem_multicolumn_empty_level(): | ||||
|     df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]}) | ||||
|     df.columns = [ | ||||
|         ["level1 item1", "level1 item2"], | ||||
|         ["", "level2 item2"], | ||||
|         ["level3 item1", "level3 item2"], | ||||
|     ] | ||||
|  | ||||
|     result = df["level1 item1"] | ||||
|     expected = DataFrame( | ||||
|         [["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"] | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer,expected_slice", | ||||
|     [ | ||||
|         (lambda df: df["foo"], slice(3)), | ||||
|         (lambda df: df["bar"], slice(3, 5)), | ||||
|         (lambda df: df.loc[:, "bar"], slice(3, 5)), | ||||
|     ], | ||||
| ) | ||||
| def test_frame_getitem_toplevel( | ||||
|     multiindex_dataframe_random_data, indexer, expected_slice | ||||
| ): | ||||
|     df = multiindex_dataframe_random_data.T | ||||
|     expected = df.reindex(columns=df.columns[expected_slice]) | ||||
|     expected.columns = expected.columns.droplevel(0) | ||||
|     result = indexer(df) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_mixed_depth_get(): | ||||
|     arrays = [ | ||||
|         ["a", "top", "top", "routine1", "routine1", "routine2"], | ||||
|         ["", "OD", "OD", "result1", "result2", "result1"], | ||||
|         ["", "wx", "wy", "", "", ""], | ||||
|     ] | ||||
|  | ||||
|     tuples = sorted(zip(*arrays)) | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) | ||||
|  | ||||
|     result = df["a"] | ||||
|     expected = df["a", "", ""].rename("a") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df["routine1", "result1"] | ||||
|     expected = df["routine1", "result1", ""] | ||||
|     expected = expected.rename(("routine1", "result1")) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_getitem_nan_multiindex(nulls_fixture): | ||||
|     # GH#29751 | ||||
|     # loc on a multiindex containing nan values | ||||
|     n = nulls_fixture  # for code readability | ||||
|     cols = ["a", "b", "c"] | ||||
|     df = DataFrame( | ||||
|         [[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]], | ||||
|         columns=cols, | ||||
|     ).set_index(["a", "b"]) | ||||
|     df["c"] = df["c"].astype("int64") | ||||
|  | ||||
|     idx = (21, n) | ||||
|     result = df.loc[:idx] | ||||
|     expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"]) | ||||
|     expected["c"] = expected["c"].astype("int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df.loc[idx:] | ||||
|     expected = DataFrame( | ||||
|         [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols | ||||
|     ).set_index(["a", "b"]) | ||||
|     expected["c"] = expected["c"].astype("int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     idx1, idx2 = (21, n), (31, n) | ||||
|     result = df.loc[idx1:idx2] | ||||
|     expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"]) | ||||
|     expected["c"] = expected["c"].astype("int64") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer,expected", | ||||
|     [ | ||||
|         ( | ||||
|             (["b"], ["bar", np.nan]), | ||||
|             ( | ||||
|                 DataFrame( | ||||
|                     [[2, 3], [5, 6]], | ||||
|                     columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), | ||||
|                     dtype="int64", | ||||
|                 ) | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             (["a", "b"]), | ||||
|             ( | ||||
|                 DataFrame( | ||||
|                     [[1, 2, 3], [4, 5, 6]], | ||||
|                     columns=MultiIndex.from_tuples( | ||||
|                         [("a", "foo"), ("b", "bar"), ("b", np.nan)] | ||||
|                     ), | ||||
|                     dtype="int64", | ||||
|                 ) | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             (["b"]), | ||||
|             ( | ||||
|                 DataFrame( | ||||
|                     [[2, 3], [5, 6]], | ||||
|                     columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]), | ||||
|                     dtype="int64", | ||||
|                 ) | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             (["b"], ["bar"]), | ||||
|             ( | ||||
|                 DataFrame( | ||||
|                     [[2], [5]], | ||||
|                     columns=MultiIndex.from_tuples([("b", "bar")]), | ||||
|                     dtype="int64", | ||||
|                 ) | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             (["b"], [np.nan]), | ||||
|             ( | ||||
|                 DataFrame( | ||||
|                     [[3], [6]], | ||||
|                     columns=MultiIndex( | ||||
|                         codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]] | ||||
|                     ), | ||||
|                     dtype="int64", | ||||
|                 ) | ||||
|             ), | ||||
|         ), | ||||
|         (("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))), | ||||
|     ], | ||||
| ) | ||||
| def test_frame_getitem_nan_cols_multiindex( | ||||
|     indexer, | ||||
|     expected, | ||||
|     nulls_fixture, | ||||
| ): | ||||
|     # Slicing MultiIndex including levels with nan values, for more information | ||||
|     # see GH#25154 | ||||
|     df = DataFrame( | ||||
|         [[1, 2, 3], [4, 5, 6]], | ||||
|         columns=MultiIndex.from_tuples( | ||||
|             [("a", "foo"), ("b", "bar"), ("b", nulls_fixture)] | ||||
|         ), | ||||
|         dtype="int64", | ||||
|     ) | ||||
|  | ||||
|     result = df.loc[:, indexer] | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # test indexing of DataFrame with multi-level Index with duplicates | ||||
| # ---------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def dataframe_with_duplicate_index(): | ||||
|     """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" | ||||
|     data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]] | ||||
|     index = ["h1", "h3", "h5"] | ||||
|     columns = MultiIndex( | ||||
|         levels=[["A", "B"], ["A1", "A2", "B1", "B2"]], | ||||
|         codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], | ||||
|         names=["main", "sub"], | ||||
|     ) | ||||
|     return DataFrame(data, index=index, columns=columns) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]] | ||||
| ) | ||||
| def test_frame_mi_access(dataframe_with_duplicate_index, indexer): | ||||
|     # GH 4145 | ||||
|     df = dataframe_with_duplicate_index | ||||
|     index = Index(["h1", "h3", "h5"]) | ||||
|     columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"]) | ||||
|     expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T | ||||
|  | ||||
|     result = indexer(df) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_mi_access_returns_series(dataframe_with_duplicate_index): | ||||
|     # GH 4146, not returning a block manager when selecting a unique index | ||||
|     # from a duplicate index | ||||
|     # as of 4879, this returns a Series (which is similar to what happens | ||||
|     # with a non-unique) | ||||
|     df = dataframe_with_duplicate_index | ||||
|     expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1") | ||||
|     result = df["A"]["A1"] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): | ||||
|     # selecting a non_unique from the 2nd level | ||||
|     df = dataframe_with_duplicate_index | ||||
|     expected = DataFrame( | ||||
|         [["d", 4, 4], ["e", 5, 5]], | ||||
|         index=Index(["B2", "B2"], name="sub"), | ||||
|         columns=["h1", "h3", "h5"], | ||||
|     ).T | ||||
|     result = df["A"]["B2"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_mi_empty_slice(): | ||||
|     # GH 15454 | ||||
|     df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]])) | ||||
|     result = df[[]] | ||||
|     expected = DataFrame( | ||||
|         index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_empty_multiindex(): | ||||
|     # GH#36936 | ||||
|     arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]] | ||||
|     index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2")) | ||||
|     df = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) | ||||
|  | ||||
|     # loc on empty multiindex == loc with False mask | ||||
|     empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index | ||||
|     result = df.loc[empty_multiindex, :] | ||||
|     expected = df.loc[[False] * len(df.index), :] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # replacing value with loc on empty multiindex | ||||
|     df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5 | ||||
|     result = df | ||||
|     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,171 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def simple_multiindex_dataframe(): | ||||
|     """ | ||||
|     Factory function to create simple 3 x 3 dataframe with | ||||
|     both columns and row MultiIndex using supplied data or | ||||
|     random data by default. | ||||
|     """ | ||||
|  | ||||
|     data = np.random.default_rng(2).standard_normal((3, 3)) | ||||
|     return DataFrame( | ||||
|         data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer, expected", | ||||
|     [ | ||||
|         ( | ||||
|             lambda df: df.iloc[0], | ||||
|             lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)), | ||||
|         ), | ||||
|         ( | ||||
|             lambda df: df.iloc[2], | ||||
|             lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)), | ||||
|         ), | ||||
|         ( | ||||
|             lambda df: df.iloc[:, 2], | ||||
|             lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): | ||||
|     df = simple_multiindex_dataframe | ||||
|     arr = df.values | ||||
|     result = indexer(df) | ||||
|     expected = expected(arr) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_iloc_returns_dataframe(simple_multiindex_dataframe): | ||||
|     df = simple_multiindex_dataframe | ||||
|     result = df.iloc[[0, 1]] | ||||
|     expected = df.xs(4, drop_level=False) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_iloc_returns_scalar(simple_multiindex_dataframe): | ||||
|     df = simple_multiindex_dataframe | ||||
|     arr = df.values | ||||
|     result = df.iloc[2, 2] | ||||
|     expected = arr[2, 2] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_iloc_getitem_multiple_items(): | ||||
|     # GH 5528 | ||||
|     tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) | ||||
|     index = MultiIndex.from_tuples(tup) | ||||
|     df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index) | ||||
|     result = df.iloc[[2, 3]] | ||||
|     expected = df.xs("b", drop_level=False) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_iloc_getitem_labels(): | ||||
|     # this is basically regular indexing | ||||
|     arr = np.random.default_rng(2).standard_normal((4, 3)) | ||||
|     df = DataFrame( | ||||
|         arr, | ||||
|         columns=[["i", "i", "j"], ["A", "A", "B"]], | ||||
|         index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]], | ||||
|     ) | ||||
|     result = df.iloc[2, 2] | ||||
|     expected = arr[2, 2] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_frame_getitem_slice(multiindex_dataframe_random_data): | ||||
|     df = multiindex_dataframe_random_data | ||||
|     result = df.iloc[:4] | ||||
|     expected = df[:4] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_setitem_slice(multiindex_dataframe_random_data): | ||||
|     df = multiindex_dataframe_random_data | ||||
|     df.iloc[:4] = 0 | ||||
|  | ||||
|     assert (df.values[:4] == 0).all() | ||||
|     assert (df.values[4:] != 0).all() | ||||
|  | ||||
|  | ||||
| def test_indexing_ambiguity_bug_1678(): | ||||
|     # GH 1678 | ||||
|     columns = MultiIndex.from_tuples( | ||||
|         [("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")] | ||||
|     ) | ||||
|     index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) | ||||
|  | ||||
|     df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) | ||||
|  | ||||
|     result = df.iloc[:, 1] | ||||
|     expected = df.loc[:, ("Ohio", "Red")] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_iloc_integer_locations(): | ||||
|     # GH 13797 | ||||
|     data = [ | ||||
|         ["str00", "str01"], | ||||
|         ["str10", "str11"], | ||||
|         ["str20", "srt21"], | ||||
|         ["str30", "str31"], | ||||
|         ["str40", "str41"], | ||||
|     ] | ||||
|  | ||||
|     index = MultiIndex.from_tuples( | ||||
|         [("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")] | ||||
|     ) | ||||
|  | ||||
|     expected = DataFrame(data) | ||||
|     df = DataFrame(data, index=index) | ||||
|  | ||||
|     result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)]) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, indexes, values, expected_k", | ||||
|     [ | ||||
|         # test without indexer value in first level of MultiIndex | ||||
|         ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), | ||||
|         # test like code sample 1 in the issue | ||||
|         ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]), | ||||
|         # test like code sample 2 in the issue | ||||
|         ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), | ||||
|         # test like code sample 3 in the issue | ||||
|         ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]), | ||||
|     ], | ||||
| ) | ||||
| def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): | ||||
|     # GH17148 | ||||
|     df = DataFrame(data=data, columns=["i", "j", "k"]) | ||||
|     df = df.set_index(["i", "j"]) | ||||
|  | ||||
|     series = df.k.copy() | ||||
|     for i, v in zip(indexes, values): | ||||
|         series.iloc[i] += v | ||||
|  | ||||
|     df["k"] = expected_k | ||||
|     expected = df.k | ||||
|     tm.assert_series_equal(series, expected) | ||||
|  | ||||
|  | ||||
| def test_getitem_iloc(multiindex_dataframe_random_data): | ||||
|     df = multiindex_dataframe_random_data | ||||
|     result = df.iloc[2] | ||||
|     expected = df.xs(df.index[2]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,118 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def m(): | ||||
|     return 5 | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def n(): | ||||
|     return 100 | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def cols(): | ||||
|     return ["jim", "joe", "jolie", "joline", "jolia"] | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def vals(n): | ||||
|     vals = [ | ||||
|         np.random.default_rng(2).integers(0, 10, n), | ||||
|         np.random.default_rng(2).choice(list("abcdefghij"), n), | ||||
|         np.random.default_rng(2).choice( | ||||
|             pd.date_range("20141009", periods=10).tolist(), n | ||||
|         ), | ||||
|         np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n), | ||||
|         np.random.default_rng(2).standard_normal(n), | ||||
|     ] | ||||
|     vals = list(map(tuple, zip(*vals))) | ||||
|     return vals | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def keys(n, m, vals): | ||||
|     # bunch of keys for testing | ||||
|     keys = [ | ||||
|         np.random.default_rng(2).integers(0, 11, m), | ||||
|         np.random.default_rng(2).choice(list("abcdefghijk"), m), | ||||
|         np.random.default_rng(2).choice( | ||||
|             pd.date_range("20141009", periods=11).tolist(), m | ||||
|         ), | ||||
|         np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m), | ||||
|     ] | ||||
|     keys = list(map(tuple, zip(*keys))) | ||||
|     keys += [t[:-1] for t in vals[:: n // m]] | ||||
|     return keys | ||||
|  | ||||
|  | ||||
| # covers both unique index and non-unique index | ||||
| @pytest.fixture | ||||
| def df(vals, cols): | ||||
|     return DataFrame(vals, columns=cols) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def a(df): | ||||
|     return pd.concat([df, df]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def b(df, cols): | ||||
|     return df.drop_duplicates(subset=cols[:-1]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") | ||||
| @pytest.mark.parametrize("lexsort_depth", list(range(5))) | ||||
| @pytest.mark.parametrize("frame_fixture", ["a", "b"]) | ||||
| def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols): | ||||
|     # GH7724, GH2646 | ||||
|  | ||||
|     frame = request.getfixturevalue(frame_fixture) | ||||
|     if lexsort_depth == 0: | ||||
|         df = frame.copy(deep=False) | ||||
|     else: | ||||
|         df = frame.sort_values(by=cols[:lexsort_depth]) | ||||
|  | ||||
|     mi = df.set_index(cols[:-1]) | ||||
|     assert not mi.index._lexsort_depth < lexsort_depth | ||||
|     for key in keys: | ||||
|         mask = np.ones(len(df), dtype=bool) | ||||
|  | ||||
|         # test for all partials of this key | ||||
|         for i, k in enumerate(key): | ||||
|             mask &= df.iloc[:, i] == k | ||||
|  | ||||
|             if not mask.any(): | ||||
|                 assert key[: i + 1] not in mi.index | ||||
|                 continue | ||||
|  | ||||
|             assert key[: i + 1] in mi.index | ||||
|             right = df[mask].copy(deep=False) | ||||
|  | ||||
|             if i + 1 != len(key):  # partial key | ||||
|                 return_value = right.drop(cols[: i + 1], axis=1, inplace=True) | ||||
|                 assert return_value is None | ||||
|                 return_value = right.set_index(cols[i + 1 : -1], inplace=True) | ||||
|                 assert return_value is None | ||||
|                 tm.assert_frame_equal(mi.loc[key[: i + 1]], right) | ||||
|  | ||||
|             else:  # full key | ||||
|                 return_value = right.set_index(cols[:-1], inplace=True) | ||||
|                 assert return_value is None | ||||
|                 if len(right) == 1:  # single hit | ||||
|                     right = Series( | ||||
|                         right["jolia"].values, name=right.index[0], index=["jolia"] | ||||
|                     ) | ||||
|                     tm.assert_series_equal(mi.loc[key[: i + 1]], right) | ||||
|                 else:  # multi hit | ||||
|                     tm.assert_frame_equal(mi.loc[key[: i + 1]], right) | ||||
| @ -0,0 +1,992 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import ( | ||||
|     IndexingError, | ||||
|     PerformanceWarning, | ||||
| ) | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def single_level_multiindex(): | ||||
|     """single level MultiIndex""" | ||||
|     return MultiIndex( | ||||
|         levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def frame_random_data_integer_multi_index(): | ||||
|     levels = [[0, 1], [0, 1, 2]] | ||||
|     codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] | ||||
|     index = MultiIndex(levels=levels, codes=codes) | ||||
|     return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index) | ||||
|  | ||||
|  | ||||
| class TestMultiIndexLoc: | ||||
|     def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         frame.loc[("bar", "two"), "B"] = 5 | ||||
|         assert frame.loc[("bar", "two"), "B"] == 5 | ||||
|  | ||||
|         # with integer labels | ||||
|         df = frame.copy() | ||||
|         df.columns = list(range(3)) | ||||
|         df.loc[("bar", "two"), 1] = 7 | ||||
|         assert df.loc[("bar", "two"), 1] == 7 | ||||
|  | ||||
|     def test_loc_getitem_general(self, any_real_numpy_dtype): | ||||
|         # GH#2817 | ||||
|         dtype = any_real_numpy_dtype | ||||
|         data = { | ||||
|             "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, | ||||
|             "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, | ||||
|             "num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12}, | ||||
|         } | ||||
|         df = DataFrame(data) | ||||
|         df = df.astype({"col": dtype, "num": dtype}) | ||||
|         df = df.set_index(keys=["col", "num"]) | ||||
|         key = 4.0, 12 | ||||
|  | ||||
|         # emits a PerformanceWarning, ok | ||||
|         with tm.assert_produces_warning(PerformanceWarning): | ||||
|             tm.assert_frame_equal(df.loc[key], df.iloc[2:]) | ||||
|  | ||||
|         # this is ok | ||||
|         return_value = df.sort_index(inplace=True) | ||||
|         assert return_value is None | ||||
|         res = df.loc[key] | ||||
|  | ||||
|         # col has float dtype, result should be float64 Index | ||||
|         col_arr = np.array([4.0] * 3, dtype=dtype) | ||||
|         year_arr = np.array([12] * 3, dtype=dtype) | ||||
|         index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"]) | ||||
|         expected = DataFrame({"amount": [222, 333, 444]}, index=index) | ||||
|         tm.assert_frame_equal(res, expected) | ||||
|  | ||||
|     def test_loc_getitem_multiindex_missing_label_raises(self): | ||||
|         # GH#21593 | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             columns=[[2, 2, 4], [6, 8, 10]], | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^2$"): | ||||
|             df.loc[2] | ||||
|  | ||||
|     def test_loc_getitem_list_of_tuples_with_multiindex( | ||||
|         self, multiindex_year_month_day_dataframe_random_data | ||||
|     ): | ||||
|         ser = multiindex_year_month_day_dataframe_random_data["A"] | ||||
|         expected = ser.reindex(ser.index[49:51]) | ||||
|         result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_getitem_series(self): | ||||
|         # GH14730 | ||||
|         # passing a series as a key with a MultiIndex | ||||
|         index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) | ||||
|         x = Series(index=index, data=range(9), dtype=np.float64) | ||||
|         y = Series([1, 3]) | ||||
|         expected = Series( | ||||
|             data=[0, 1, 2, 6, 7, 8], | ||||
|             index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), | ||||
|             dtype=np.float64, | ||||
|         ) | ||||
|         result = x.loc[y] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = x.loc[[1, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH15424 | ||||
|         y1 = Series([1, 3], index=[1, 2]) | ||||
|         result = x.loc[y1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         empty = Series(data=[], dtype=np.float64) | ||||
|         expected = Series( | ||||
|             [], | ||||
|             index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), | ||||
|             dtype=np.float64, | ||||
|         ) | ||||
|         result = x.loc[empty] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_getitem_array(self): | ||||
|         # GH15434 | ||||
|         # passing an array as a key with a MultiIndex | ||||
|         index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) | ||||
|         x = Series(index=index, data=range(9), dtype=np.float64) | ||||
|         y = np.array([1, 3]) | ||||
|         expected = Series( | ||||
|             data=[0, 1, 2, 6, 7, 8], | ||||
|             index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), | ||||
|             dtype=np.float64, | ||||
|         ) | ||||
|         result = x.loc[y] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # empty array: | ||||
|         empty = np.array([]) | ||||
|         expected = Series( | ||||
|             [], | ||||
|             index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), | ||||
|             dtype="float64", | ||||
|         ) | ||||
|         result = x.loc[empty] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # 0-dim array (scalar): | ||||
|         scalar = np.int64(1) | ||||
|         expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64) | ||||
|         result = x.loc[scalar] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_multiindex_labels(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             columns=[["i", "i", "j"], ["A", "A", "B"]], | ||||
|             index=[["i", "i", "j"], ["X", "X", "Y"]], | ||||
|         ) | ||||
|  | ||||
|         # the first 2 rows | ||||
|         expected = df.iloc[[0, 1]].droplevel(0) | ||||
|         result = df.loc["i"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # 2nd (last) column | ||||
|         expected = df.iloc[:, [2]].droplevel(0, axis=1) | ||||
|         result = df.loc[:, "j"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # bottom right corner | ||||
|         expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1) | ||||
|         result = df.loc["j"].loc[:, "j"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # with a tuple | ||||
|         expected = df.iloc[[0, 1]] | ||||
|         result = df.loc[("i", "X")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_multiindex_ints(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             columns=[[2, 2, 4], [6, 8, 10]], | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|         ) | ||||
|         expected = df.iloc[[0, 1]].droplevel(0) | ||||
|         result = df.loc[4] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_multiindex_missing_label_raises(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             columns=[[2, 2, 4], [6, 8, 10]], | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^2$"): | ||||
|             df.loc[2] | ||||
|  | ||||
|     @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) | ||||
|     def test_loc_multiindex_list_missing_label(self, key, pos): | ||||
|         # GH 27148 - lists with missing labels _do_ raise | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             columns=[[2, 2, 4], [6, 8, 10]], | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             df.loc[key] | ||||
|  | ||||
|     def test_loc_multiindex_too_many_dims_raises(self): | ||||
|         # GH 14885 | ||||
|         s = Series( | ||||
|             range(8), | ||||
|             index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"): | ||||
|             s.loc["a", "b"] | ||||
|         with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"): | ||||
|             s.loc["a", "d", "g"] | ||||
|         with pytest.raises(IndexingError, match="Too many indexers"): | ||||
|             s.loc["a", "d", "g", "j"] | ||||
|  | ||||
|     def test_loc_multiindex_indexer_none(self): | ||||
|         # GH6788 | ||||
|         # multi-index indexer is None (meaning take all) | ||||
|         attributes = ["Attribute" + str(i) for i in range(1)] | ||||
|         attribute_values = ["Value" + str(i) for i in range(5)] | ||||
|  | ||||
|         index = MultiIndex.from_product([attributes, attribute_values]) | ||||
|         df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5 | ||||
|         df = DataFrame(df, columns=index) | ||||
|         result = df[attributes] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         # GH 7349 | ||||
|         # loc with a multi-index seems to be doing fallback | ||||
|         df = DataFrame( | ||||
|             np.arange(12).reshape(-1, 1), | ||||
|             index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]), | ||||
|         ) | ||||
|  | ||||
|         expected = df.loc[([1, 2],), :] | ||||
|         result = df.loc[[1, 2]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_multiindex_incomplete(self): | ||||
|         # GH 7399 | ||||
|         # incomplete indexers | ||||
|         s = Series( | ||||
|             np.arange(15, dtype="int64"), | ||||
|             MultiIndex.from_product([range(5), ["a", "b", "c"]]), | ||||
|         ) | ||||
|         expected = s.loc[:, "a":"c"] | ||||
|  | ||||
|         result = s.loc[0:4, "a":"c"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.loc[:4, "a":"c"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.loc[0:, "a":"c"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH 7400 | ||||
|         # multiindexer getitem with list of indexers skips wrong element | ||||
|         s = Series( | ||||
|             np.arange(15, dtype="int64"), | ||||
|             MultiIndex.from_product([range(5), ["a", "b", "c"]]), | ||||
|         ) | ||||
|         expected = s.iloc[[6, 7, 8, 12, 13, 14]] | ||||
|         result = s.loc[2:4:2, "a":"c"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_get_loc_single_level(self, single_level_multiindex): | ||||
|         single_level = single_level_multiindex | ||||
|         s = Series( | ||||
|             np.random.default_rng(2).standard_normal(len(single_level)), | ||||
|             index=single_level, | ||||
|         ) | ||||
|         for k in single_level.values: | ||||
|             s[k] | ||||
|  | ||||
|     def test_loc_getitem_int_slice(self): | ||||
|         # GH 3053 | ||||
|         # loc should treat integer slices like label slices | ||||
|  | ||||
|         index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index) | ||||
|         result = df.loc[6:8, :] | ||||
|         expected = df | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index) | ||||
|         result = df.loc[20:30, :] | ||||
|         expected = df.iloc[2:] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # doc examples | ||||
|         result = df.loc[10, :] | ||||
|         expected = df.iloc[0:2] | ||||
|         expected.index = ["a", "b"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[:, 10] | ||||
|         expected = df[10] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index) | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index) | ||||
|     ) | ||||
|     def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): | ||||
|         # GH #19686 | ||||
|         # .loc should work with nested indexers which can be | ||||
|         # any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices | ||||
|  | ||||
|         def convert_nested_indexer(indexer_type, keys): | ||||
|             if indexer_type == np.ndarray: | ||||
|                 return np.array(keys) | ||||
|             if indexer_type == slice: | ||||
|                 return slice(*keys) | ||||
|             return indexer_type(keys) | ||||
|  | ||||
|         a = [10, 20, 30] | ||||
|         b = [1, 2, 3] | ||||
|         index = MultiIndex.from_product([a, b]) | ||||
|         df = DataFrame( | ||||
|             np.arange(len(index), dtype="int64"), index=index, columns=["Data"] | ||||
|         ) | ||||
|  | ||||
|         keys = ([10, 20], [2, 3]) | ||||
|         types = (indexer_type_1, indexer_type_2) | ||||
|  | ||||
|         # check indexers with all the combinations of nested objects | ||||
|         # of all the valid types | ||||
|         indexer = tuple( | ||||
|             convert_nested_indexer(indexer_type, k) | ||||
|             for indexer_type, k in zip(types, keys) | ||||
|         ) | ||||
|         if indexer_type_1 is set or indexer_type_2 is set: | ||||
|             with pytest.raises(TypeError, match="as an indexer is not supported"): | ||||
|                 df.loc[indexer, "Data"] | ||||
|  | ||||
|             return | ||||
|         else: | ||||
|             result = df.loc[indexer, "Data"] | ||||
|         expected = Series( | ||||
|             [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series): | ||||
|         # GH#37711 | ||||
|         mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) | ||||
|         obj = frame_or_series([1, 2], index=mi) | ||||
|         obj.loc[("a",)] = 0 | ||||
|         expected = frame_or_series([0, 2], index=mi) | ||||
|         tm.assert_equal(obj, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("indexer", [("a",), ("a")]) | ||||
|     def test_multiindex_one_dimensional_tuple_columns(self, indexer): | ||||
|         # GH#37711 | ||||
|         mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")]) | ||||
|         obj = DataFrame([1, 2], index=mi) | ||||
|         obj.loc[indexer, :] = 0 | ||||
|         expected = DataFrame([0, 2], index=mi) | ||||
|         tm.assert_frame_equal(obj, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)] | ||||
|     ) | ||||
|     def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): | ||||
|         # GH#39147 | ||||
|         mi = MultiIndex.from_tuples([(1, 2), (3, 4)]) | ||||
|         df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"]) | ||||
|         df.loc[indexer, ["c", "d"]] = 1.0 | ||||
|         expected = DataFrame( | ||||
|             [[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]], | ||||
|             index=mi, | ||||
|             columns=["a", "b", "c", "d"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_sorted_multiindex_after_union(self): | ||||
|         # GH#44752 | ||||
|         midx = MultiIndex.from_product( | ||||
|             [pd.date_range("20110101", periods=2), Index(["a", "b"])] | ||||
|         ) | ||||
|         ser1 = Series(1, index=midx) | ||||
|         ser2 = Series(1, index=midx[:2]) | ||||
|         df = pd.concat([ser1, ser2], axis=1) | ||||
|         expected = df.copy() | ||||
|         result = df.loc["2011-01-01":"2011-01-02"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df = DataFrame({0: ser1, 1: ser2}) | ||||
|         result = df.loc["2011-01-01":"2011-01-02"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1) | ||||
|         result = df.loc["2011-01-01":"2011-01-02"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_no_second_level_index(self): | ||||
|         # GH#43599 | ||||
|         df = DataFrame( | ||||
|             index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]), | ||||
|             columns=["Val"], | ||||
|         ) | ||||
|         res = df.loc[np.s_[:, "c", :]] | ||||
|         expected = DataFrame( | ||||
|             index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"] | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, expected) | ||||
|  | ||||
|     def test_loc_multi_index_key_error(self): | ||||
|         # GH 51892 | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 (1, 2): ["a", "b", "c"], | ||||
|                 (1, 3): ["d", "e", "f"], | ||||
|                 (2, 2): ["g", "h", "i"], | ||||
|                 (2, 4): ["j", "k", "l"], | ||||
|             } | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=r"(1, 4)"): | ||||
|             df.loc[0, (1, 4)] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer, pos", | ||||
|     [ | ||||
|         ([], []),  # empty ok | ||||
|         (["A"], slice(3)), | ||||
|         (["A", "D"], []),  # "D" isn't present -> raise | ||||
|         (["D", "E"], []),  # no values found -> raise | ||||
|         (["D"], []),  # same, with single item list: GH 27148 | ||||
|         (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), | ||||
|         (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), | ||||
|     ], | ||||
| ) | ||||
| def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): | ||||
|     # GH 7866 | ||||
|     # multi-index slicing with missing indexers | ||||
|     idx = MultiIndex.from_product( | ||||
|         [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"] | ||||
|     ) | ||||
|     ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index() | ||||
|     expected = ser.iloc[pos] | ||||
|  | ||||
|     if expected.size == 0 and indexer != []: | ||||
|         with pytest.raises(KeyError, match=str(indexer)): | ||||
|             ser.loc[indexer] | ||||
|     elif indexer == (slice(None), ["foo", "bah"]): | ||||
|         # "bah" is not in idx.levels[1], raising KeyError enforced in 2.0 | ||||
|         with pytest.raises(KeyError, match="'bah'"): | ||||
|             ser.loc[indexer] | ||||
|     else: | ||||
|         result = ser.loc[indexer] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])]) | ||||
| def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): | ||||
|     # GH 8737 | ||||
|     # empty indexer | ||||
|     multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((5, 6)), | ||||
|         index=range(5), | ||||
|         columns=multi_index, | ||||
|     ) | ||||
|     df = df.sort_index(level=0, axis=1) | ||||
|  | ||||
|     expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) | ||||
|     result = df.loc[:, columns_indexer] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_duplicates_multiindex_non_scalar_type_object(): | ||||
|     # regression from < 0.14.0 | ||||
|     # GH 7914 | ||||
|     df = DataFrame( | ||||
|         [[np.mean, np.median], ["mean", "median"]], | ||||
|         columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]), | ||||
|         index=["function", "name"], | ||||
|     ) | ||||
|     result = df.loc["function", ("functs", "mean")] | ||||
|     expected = np.mean | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_tuple_plus_slice(): | ||||
|     # GH 671 | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": np.arange(10), | ||||
|             "b": np.arange(10), | ||||
|             "c": np.random.default_rng(2).standard_normal(10), | ||||
|             "d": np.random.default_rng(2).standard_normal(10), | ||||
|         } | ||||
|     ).set_index(["a", "b"]) | ||||
|     expected = df.loc[0, 0] | ||||
|     result = df.loc[(0, 0), :] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_int(frame_random_data_integer_multi_index): | ||||
|     df = frame_random_data_integer_multi_index | ||||
|     result = df.loc[1] | ||||
|     expected = df[-3:] | ||||
|     expected.index = expected.index.droplevel(0) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index): | ||||
|     df = frame_random_data_integer_multi_index | ||||
|     with pytest.raises(KeyError, match=r"^3$"): | ||||
|         df.loc[3] | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data): | ||||
|     df = multiindex_dataframe_random_data | ||||
|  | ||||
|     # test setup - check key not in dataframe | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"): | ||||
|         df.loc[("bar", "three"), "B"] | ||||
|  | ||||
|     # in theory should be inserting in a sorted space???? | ||||
|     df.loc[("bar", "three"), "B"] = 0 | ||||
|     expected = 0 | ||||
|     result = df.sort_index().loc[("bar", "three"), "B"] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_loc_setitem_single_column_slice(): | ||||
|     # case from https://github.com/pandas-dev/pandas/issues/27841 | ||||
|     df = DataFrame( | ||||
|         "string", | ||||
|         index=list("abcd"), | ||||
|         columns=MultiIndex.from_product([["Main"], ("another", "one")]), | ||||
|     ) | ||||
|     df["labels"] = "a" | ||||
|     df.loc[:, "labels"] = df.index | ||||
|     tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index)) | ||||
|  | ||||
|     # test with non-object block | ||||
|     df = DataFrame( | ||||
|         np.nan, | ||||
|         index=range(4), | ||||
|         columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), | ||||
|     ) | ||||
|     expected = df.copy() | ||||
|     df.loc[:, "B"] = np.arange(4) | ||||
|     expected.iloc[:, 2] = np.arange(4) | ||||
|     tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_nan_multiindex(using_infer_string): | ||||
|     # GH 5286 | ||||
|     tups = [ | ||||
|         ("Good Things", "C", np.nan), | ||||
|         ("Good Things", "R", np.nan), | ||||
|         ("Bad Things", "C", np.nan), | ||||
|         ("Bad Things", "T", np.nan), | ||||
|         ("Okay Things", "N", "B"), | ||||
|         ("Okay Things", "N", "D"), | ||||
|         ("Okay Things", "B", np.nan), | ||||
|         ("Okay Things", "D", np.nan), | ||||
|     ] | ||||
|     df = DataFrame( | ||||
|         np.ones((8, 4)), | ||||
|         columns=Index(["d1", "d2", "d3", "d4"]), | ||||
|         index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]), | ||||
|     ) | ||||
|     result = df.loc["Good Things"].loc["C"] | ||||
|     expected = DataFrame( | ||||
|         np.ones((1, 4)), | ||||
|         index=Index( | ||||
|             [np.nan], | ||||
|             dtype="object" if not using_infer_string else "str", | ||||
|             name="u3", | ||||
|         ), | ||||
|         columns=Index(["d1", "d2", "d3", "d4"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_period_string_indexing(): | ||||
|     # GH 9892 | ||||
|     a = pd.period_range("2013Q1", "2013Q4", freq="Q") | ||||
|     i = (1111, 2222, 3333) | ||||
|     idx = MultiIndex.from_product((a, i), names=("Period", "CVR")) | ||||
|     df = DataFrame( | ||||
|         index=idx, | ||||
|         columns=( | ||||
|             "OMS", | ||||
|             "OMK", | ||||
|             "RES", | ||||
|             "DRIFT_IND", | ||||
|             "OEVRIG_IND", | ||||
|             "FIN_IND", | ||||
|             "VARE_UD", | ||||
|             "LOEN_UD", | ||||
|             "FIN_UD", | ||||
|         ), | ||||
|     ) | ||||
|     result = df.loc[("2013Q1", 1111), "OMS"] | ||||
|  | ||||
|     alt = df.loc[(a[0], 1111), "OMS"] | ||||
|     assert np.isnan(alt) | ||||
|  | ||||
|     # Because the resolution of the string matches, it is an exact lookup, | ||||
|     #  not a slice | ||||
|     assert np.isnan(result) | ||||
|  | ||||
|     alt = df.loc[("2013Q1", 1111), "OMS"] | ||||
|     assert np.isnan(alt) | ||||
|  | ||||
|  | ||||
| def test_loc_datetime_mask_slicing(): | ||||
|     # GH 16699 | ||||
|     dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) | ||||
|     m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) | ||||
|     df = DataFrame( | ||||
|         data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] | ||||
|     ) | ||||
|     result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] | ||||
|     expected = Series( | ||||
|         [3], | ||||
|         name="C1", | ||||
|         index=MultiIndex.from_tuples( | ||||
|             [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))], | ||||
|             names=["Idx1", "Idx2"], | ||||
|         ), | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_datetime_series_tuple_slicing(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/35858 | ||||
|     date = pd.Timestamp("2000") | ||||
|     ser = Series( | ||||
|         1, | ||||
|         index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]), | ||||
|         name="c", | ||||
|     ) | ||||
|     result = ser.loc[:, [date]] | ||||
|     tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| def test_loc_with_mi_indexer(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/35351 | ||||
|     df = DataFrame( | ||||
|         data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]], | ||||
|         index=MultiIndex.from_tuples( | ||||
|             [(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"] | ||||
|         ), | ||||
|         columns=["author", "price"], | ||||
|     ) | ||||
|     idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"]) | ||||
|     result = df.loc[idx, :] | ||||
|     expected = DataFrame( | ||||
|         [["a", 1], ["b", 1], ["c", 2]], | ||||
|         index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]), | ||||
|         columns=["author", "price"], | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_mi_with_level1_named_0(): | ||||
|     # GH#37194 | ||||
|     dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") | ||||
|  | ||||
|     ser = Series(range(3), index=dti) | ||||
|     df = ser.to_frame() | ||||
|     df[1] = dti | ||||
|  | ||||
|     df2 = df.set_index(0, append=True) | ||||
|     assert df2.index.names == (None, 0) | ||||
|     df2.index.get_loc(dti[0])  # smoke test | ||||
|  | ||||
|     result = df2.loc[dti[0]] | ||||
|     expected = df2.iloc[[0]].droplevel(None) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     ser2 = df2[1] | ||||
|     assert ser2.index.names == (None, 0) | ||||
|  | ||||
|     result = ser2.loc[dti[0]] | ||||
|     expected = ser2.iloc[[0]].droplevel(None) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_getitem_str_slice(): | ||||
|     # GH#15928 | ||||
|     df = DataFrame( | ||||
|         [ | ||||
|             ["20160525 13:30:00.023", "MSFT", "51.95", "51.95"], | ||||
|             ["20160525 13:30:00.048", "GOOG", "720.50", "720.93"], | ||||
|             ["20160525 13:30:00.076", "AAPL", "98.55", "98.56"], | ||||
|             ["20160525 13:30:00.131", "AAPL", "98.61", "98.62"], | ||||
|             ["20160525 13:30:00.135", "MSFT", "51.92", "51.95"], | ||||
|             ["20160525 13:30:00.135", "AAPL", "98.61", "98.62"], | ||||
|         ], | ||||
|         columns="time,ticker,bid,ask".split(","), | ||||
|     ) | ||||
|     df2 = df.set_index(["ticker", "time"]).sort_index() | ||||
|  | ||||
|     res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0) | ||||
|     expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :] | ||||
|     tm.assert_frame_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_3levels_leading_period_index(): | ||||
|     # GH#24091 | ||||
|     pi = pd.PeriodIndex( | ||||
|         ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"], | ||||
|         name="datetime", | ||||
|         freq="D", | ||||
|     ) | ||||
|     lev2 = ["A", "A", "Z", "W"] | ||||
|     lev3 = ["B", "C", "Q", "F"] | ||||
|     mi = MultiIndex.from_arrays([pi, lev2, lev3]) | ||||
|  | ||||
|     ser = Series(range(4), index=mi, dtype=np.float64) | ||||
|     result = ser.loc[(pi[0], "A", "B")] | ||||
|     assert result == 0.0 | ||||
|  | ||||
|  | ||||
| class TestKeyErrorsWithMultiIndex: | ||||
|     def test_missing_keys_raises_keyerror(self): | ||||
|         # GH#27420 KeyError, not TypeError | ||||
|         df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) | ||||
|         df2 = df.set_index(["A", "B"]) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="1"): | ||||
|             df2.loc[(1, 6)] | ||||
|  | ||||
|     def test_missing_key_raises_keyerror2(self): | ||||
|         # GH#21168 KeyError, not "IndexingError: Too many indexers" | ||||
|         ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2)) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"\(0, 3\)"): | ||||
|             ser.loc[0, 3] | ||||
|  | ||||
|     def test_missing_key_combination(self): | ||||
|         # GH: 19556 | ||||
|         mi = MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 np.array(["a", "a", "b", "b"]), | ||||
|                 np.array(["1", "2", "2", "3"]), | ||||
|                 np.array(["c", "d", "c", "d"]), | ||||
|             ], | ||||
|             names=["one", "two", "three"], | ||||
|         ) | ||||
|         df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi) | ||||
|         msg = r"\('b', '1', slice\(None, None, None\)\)" | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             df.loc[("b", "1", slice(None)), :] | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             df.index.get_locs(("b", "1", slice(None))) | ||||
|         with pytest.raises(KeyError, match=r"\('b', '1'\)"): | ||||
|             df.loc[("b", "1"), :] | ||||
|  | ||||
|  | ||||
| def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data): | ||||
|     df = multiindex_year_month_day_dataframe_random_data | ||||
|     ser = df["A"] | ||||
|     result = ser[2000, 5] | ||||
|     expected = df.loc[2000, 5]["A"] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_with_nan(): | ||||
|     # GH: 27104 | ||||
|     df = DataFrame( | ||||
|         {"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]} | ||||
|     ).set_index(["ind1", "ind2"]) | ||||
|     result = df.loc[["a"]] | ||||
|     expected = DataFrame( | ||||
|         {"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df.loc["a"] | ||||
|     expected = DataFrame({"col": [1]}, index=Index([1], name="ind2")) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_getitem_non_found_tuple(): | ||||
|     # GH: 25236 | ||||
|     df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index( | ||||
|         ["a", "b", "c"] | ||||
|     ) | ||||
|     with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"): | ||||
|         df.loc[(2.0, 2.0, 3.0)] | ||||
|  | ||||
|  | ||||
| def test_get_loc_datetime_index(): | ||||
|     # GH#24263 | ||||
|     index = pd.date_range("2001-01-01", periods=100) | ||||
|     mi = MultiIndex.from_arrays([index]) | ||||
|     # Check if get_loc matches for Index and MultiIndex | ||||
|     assert mi.get_loc("2001-01") == slice(0, 31, None) | ||||
|     assert index.get_loc("2001-01") == slice(0, 31, None) | ||||
|  | ||||
|     loc = mi[::2].get_loc("2001-01") | ||||
|     expected = index[::2].get_loc("2001-01") | ||||
|     assert loc == expected | ||||
|  | ||||
|     loc = mi.repeat(2).get_loc("2001-01") | ||||
|     expected = index.repeat(2).get_loc("2001-01") | ||||
|     assert loc == expected | ||||
|  | ||||
|     loc = mi.append(mi).get_loc("2001-01") | ||||
|     expected = index.append(index).get_loc("2001-01") | ||||
|     # TODO: standardize return type for MultiIndex.get_loc | ||||
|     tm.assert_numpy_array_equal(loc.nonzero()[0], expected) | ||||
|  | ||||
|  | ||||
| def test_loc_setitem_indexer_differently_ordered(): | ||||
|     # GH#34603 | ||||
|     mi = MultiIndex.from_product([["a", "b"], [0, 1]]) | ||||
|     df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi) | ||||
|  | ||||
|     indexer = ("a", [1, 0]) | ||||
|     df.loc[indexer, :] = np.array([[9, 10], [11, 12]]) | ||||
|     expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi) | ||||
|     tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_index_differently_ordered_slice_none(): | ||||
|     # GH#31330 | ||||
|     df = DataFrame( | ||||
|         [[1, 2], [3, 4], [5, 6], [7, 8]], | ||||
|         index=[["a", "a", "b", "b"], [1, 2, 1, 2]], | ||||
|         columns=["a", "b"], | ||||
|     ) | ||||
|     result = df.loc[(slice(None), [2, 1]), :] | ||||
|     expected = DataFrame( | ||||
|         [[3, 4], [7, 8], [1, 2], [5, 6]], | ||||
|         index=[["a", "b", "a", "b"], [2, 2, 1, 1]], | ||||
|         columns=["a", "b"], | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]]) | ||||
| def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): | ||||
|     # GH#40978 | ||||
|     df = DataFrame( | ||||
|         [1] * 8, | ||||
|         index=MultiIndex.from_tuples( | ||||
|             [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)] | ||||
|         ), | ||||
|         columns=["a"], | ||||
|     ) | ||||
|     result = df.loc[(slice(None), indexer), :] | ||||
|     expected = DataFrame( | ||||
|         [1] * 8, | ||||
|         index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], | ||||
|         columns=["a"], | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df.loc[df.index.isin(indexer, level=1), :] | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_drops_levels_for_one_row_dataframe(): | ||||
|     # GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped | ||||
|     mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"]) | ||||
|     df = DataFrame({"d": [0]}, index=mi) | ||||
|     expected = df.droplevel([0, 2]) | ||||
|     result = df.loc["x", :, "z"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     ser = Series([0], index=mi) | ||||
|     result = ser.loc["x", :, "z"] | ||||
|     expected = Series([0], index=Index(["y"], name="b")) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mi_columns_loc_list_label_order(): | ||||
|     # GH 10710 | ||||
|     cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]]) | ||||
|     df = DataFrame(np.zeros((5, 6)), columns=cols) | ||||
|     result = df.loc[:, ["B", "A"]] | ||||
|     expected = DataFrame( | ||||
|         np.zeros((5, 4)), | ||||
|         columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mi_partial_indexing_list_raises(): | ||||
|     # GH 13501 | ||||
|     frame = DataFrame( | ||||
|         np.arange(12).reshape((4, 3)), | ||||
|         index=[["a", "a", "b", "b"], [1, 2, 1, 2]], | ||||
|         columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]], | ||||
|     ) | ||||
|     frame.index.names = ["key1", "key2"] | ||||
|     frame.columns.names = ["state", "color"] | ||||
|     with pytest.raises(KeyError, match="\\[2\\] not in index"): | ||||
|         frame.loc[["b", 2], "Colorado"] | ||||
|  | ||||
|  | ||||
| def test_mi_indexing_list_nonexistent_raises(): | ||||
|     # GH 15452 | ||||
|     s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]])) | ||||
|     with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"): | ||||
|         s.loc[["not", "found"]] | ||||
|  | ||||
|  | ||||
| def test_mi_add_cell_missing_row_non_unique(): | ||||
|     # GH 16018 | ||||
|     result = DataFrame( | ||||
|         [[1, 2, 5, 6], [3, 4, 7, 8]], | ||||
|         index=["a", "a"], | ||||
|         columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), | ||||
|     ) | ||||
|     result.loc["c"] = -1 | ||||
|     result.loc["c", (1, "A")] = 3 | ||||
|     result.loc["d", (1, "A")] = 3 | ||||
|     expected = DataFrame( | ||||
|         [ | ||||
|             [1.0, 2.0, 5.0, 6.0], | ||||
|             [3.0, 4.0, 7.0, 8.0], | ||||
|             [3.0, -1.0, -1, -1], | ||||
|             [3.0, np.nan, np.nan, np.nan], | ||||
|         ], | ||||
|         index=["a", "a", "c", "d"], | ||||
|         columns=MultiIndex.from_product([[1, 2], ["A", "B"]]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_get_scalar_casting_to_float(): | ||||
|     # GH#41369 | ||||
|     df = DataFrame( | ||||
|         {"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"]) | ||||
|     ) | ||||
|     result = df.loc[(3, 4), "b"] | ||||
|     assert result == 2 | ||||
|     assert isinstance(result, np.int64) | ||||
|     result = df.loc[[(3, 4)], "b"].iloc[0] | ||||
|     assert result == 2 | ||||
|     assert isinstance(result, np.int64) | ||||
|  | ||||
|  | ||||
| def test_loc_empty_single_selector_with_names(): | ||||
|     # GH 19517 | ||||
|     idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0]) | ||||
|     s2 = Series(index=idx, dtype=np.float64) | ||||
|     result = s2.loc["a"] | ||||
|     expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_keyerror_rightmost_key_missing(): | ||||
|     # GH 20951 | ||||
|  | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": [100, 100, 200, 200, 300, 300], | ||||
|             "B": [10, 10, 20, 21, 31, 33], | ||||
|             "C": range(6), | ||||
|         } | ||||
|     ) | ||||
|     df = df.set_index(["A", "B"]) | ||||
|     with pytest.raises(KeyError, match="^1$"): | ||||
|         df.loc[(100, 1)] | ||||
|  | ||||
|  | ||||
| def test_multindex_series_loc_with_tuple_label(): | ||||
|     # GH#43908 | ||||
|     mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))]) | ||||
|     ser = Series([1, 2], index=mi) | ||||
|     result = ser.loc[(3, (4, 5))] | ||||
|     assert result == 2 | ||||
| @ -0,0 +1,235 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas._libs.index as libindex | ||||
| from pandas.errors import PerformanceWarning | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalDtype, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.boolean import BooleanDtype | ||||
|  | ||||
|  | ||||
| class TestMultiIndexBasic: | ||||
|     def test_multiindex_perf_warn(self): | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "jim": [0, 0, 1, 1], | ||||
|                 "joe": ["x", "x", "z", "y"], | ||||
|                 "jolie": np.random.default_rng(2).random(4), | ||||
|             } | ||||
|         ).set_index(["jim", "joe"]) | ||||
|  | ||||
|         with tm.assert_produces_warning(PerformanceWarning): | ||||
|             df.loc[(1, "z")] | ||||
|  | ||||
|         df = df.iloc[[2, 1, 3, 0]] | ||||
|         with tm.assert_produces_warning(PerformanceWarning): | ||||
|             df.loc[(0,)] | ||||
|  | ||||
|     @pytest.mark.parametrize("offset", [-5, 5]) | ||||
|     def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset): | ||||
|         size_cutoff = 20 | ||||
|         n = size_cutoff + offset | ||||
|  | ||||
|         with monkeypatch.context(): | ||||
|             monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|             s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n)))) | ||||
|  | ||||
|             # hai it works! | ||||
|             assert s[("a", 5)] == 5 | ||||
|             assert s[("a", 6)] == 6 | ||||
|             assert s[("a", 7)] == 7 | ||||
|  | ||||
|     def test_multi_nan_indexing(self): | ||||
|         # GH 3588 | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "a": ["R1", "R2", np.nan, "R4"], | ||||
|                 "b": ["C1", "C2", "C3", "C4"], | ||||
|                 "c": [10, 15, np.nan, 20], | ||||
|             } | ||||
|         ) | ||||
|         result = df.set_index(["a", "b"], drop=False) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "a": ["R1", "R2", np.nan, "R4"], | ||||
|                 "b": ["C1", "C2", "C3", "C4"], | ||||
|                 "c": [10, 15, np.nan, 20], | ||||
|             }, | ||||
|             index=[ | ||||
|                 Index(["R1", "R2", np.nan, "R4"], name="a"), | ||||
|                 Index(["C1", "C2", "C3", "C4"], name="b"), | ||||
|             ], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_exclusive_nat_column_indexing(self): | ||||
|         # GH 38025 | ||||
|         # test multi indexing when one column exclusively contains NaT values | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT], | ||||
|                 "b": ["C1", "C2", "C3", "C4"], | ||||
|                 "c": [10, 15, np.nan, 20], | ||||
|             } | ||||
|         ) | ||||
|         df = df.set_index(["a", "b"]) | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "c": [10, 15, np.nan, 20], | ||||
|             }, | ||||
|             index=[ | ||||
|                 Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"), | ||||
|                 Index(["C1", "C2", "C3", "C4"], name="b"), | ||||
|             ], | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_nested_tuples_duplicates(self): | ||||
|         # GH#30892 | ||||
|  | ||||
|         dti = pd.to_datetime(["20190101", "20190101", "20190102"]) | ||||
|         idx = Index(["a", "a", "c"]) | ||||
|         mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) | ||||
|  | ||||
|         df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) | ||||
|  | ||||
|         expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) | ||||
|  | ||||
|         df2 = df.copy(deep=True) | ||||
|         df2.loc[(dti[0], "a"), "c2"] = 1.0 | ||||
|         tm.assert_frame_equal(df2, expected) | ||||
|  | ||||
|         df3 = df.copy(deep=True) | ||||
|         df3.loc[[(dti[0], "a")], "c2"] = 1.0 | ||||
|         tm.assert_frame_equal(df3, expected) | ||||
|  | ||||
|     def test_multiindex_with_datatime_level_preserves_freq(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35563 | ||||
|         idx = Index(range(2), name="A") | ||||
|         dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") | ||||
|         mi = MultiIndex.from_product([idx, dti]) | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi) | ||||
|         result = df.loc[0].index | ||||
|         tm.assert_index_equal(result, dti) | ||||
|         assert result.freq == dti.freq | ||||
|  | ||||
|     def test_multiindex_complex(self): | ||||
|         # GH#42145 | ||||
|         complex_data = [1 + 2j, 4 - 3j, 10 - 1j] | ||||
|         non_complex_data = [3, 4, 5] | ||||
|         result = DataFrame( | ||||
|             { | ||||
|                 "x": complex_data, | ||||
|                 "y": non_complex_data, | ||||
|                 "z": non_complex_data, | ||||
|             } | ||||
|         ) | ||||
|         result.set_index(["x", "y"], inplace=True) | ||||
|         expected = DataFrame( | ||||
|             {"z": non_complex_data}, | ||||
|             index=MultiIndex.from_arrays( | ||||
|                 [complex_data, non_complex_data], | ||||
|                 names=("x", "y"), | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_rename_multiindex_with_duplicates(self): | ||||
|         # GH 38015 | ||||
|         mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")]) | ||||
|         df = DataFrame(index=mi) | ||||
|         df = df.rename(index={"A": "Apple"}, level=0) | ||||
|  | ||||
|         mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")]) | ||||
|         expected = DataFrame(index=mi2) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_series_align_multiindex_with_nan_overlap_only(self): | ||||
|         # GH 38439 | ||||
|         mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]]) | ||||
|         mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]]) | ||||
|         ser1 = Series([1, 2], index=mi1) | ||||
|         ser2 = Series([1, 2], index=mi2) | ||||
|         result1, result2 = ser1.align(ser2) | ||||
|  | ||||
|         mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]]) | ||||
|         expected1 = Series([1.0, np.nan, 2.0], index=mi) | ||||
|         expected2 = Series([np.nan, 2.0, 1.0], index=mi) | ||||
|  | ||||
|         tm.assert_series_equal(result1, expected1) | ||||
|         tm.assert_series_equal(result2, expected2) | ||||
|  | ||||
|     def test_series_align_multiindex_with_nan(self): | ||||
|         # GH 38439 | ||||
|         mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]]) | ||||
|         mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]]) | ||||
|         ser1 = Series([1, 2], index=mi1) | ||||
|         ser2 = Series([1, 2], index=mi2) | ||||
|         result1, result2 = ser1.align(ser2) | ||||
|  | ||||
|         mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]]) | ||||
|         expected1 = Series([1, 2], index=mi) | ||||
|         expected2 = Series([2, 1], index=mi) | ||||
|  | ||||
|         tm.assert_series_equal(result1, expected1) | ||||
|         tm.assert_series_equal(result2, expected2) | ||||
|  | ||||
|     def test_nunique_smoke(self): | ||||
|         # GH 34019 | ||||
|         n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique() | ||||
|         assert n == 1 | ||||
|  | ||||
|     def test_multiindex_repeated_keys(self): | ||||
|         # GH19414 | ||||
|         tm.assert_series_equal( | ||||
|             Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[ | ||||
|                 ["a", "a", "b", "b"] | ||||
|             ], | ||||
|             Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])), | ||||
|         ) | ||||
|  | ||||
|     def test_multiindex_with_na_missing_key(self): | ||||
|         # GH46173 | ||||
|         df = DataFrame.from_dict( | ||||
|             { | ||||
|                 ("foo",): [1, 2, 3], | ||||
|                 ("bar",): [5, 6, 7], | ||||
|                 (None,): [8, 9, 0], | ||||
|             } | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match="missing_key"): | ||||
|             df[[("missing_key",)]] | ||||
|  | ||||
|     def test_multiindex_dtype_preservation(self): | ||||
|         # GH51261 | ||||
|         columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"]) | ||||
|         df = DataFrame(["value"], columns=columns).astype("category") | ||||
|         df_no_multiindex = df["A"] | ||||
|         assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype) | ||||
|  | ||||
|         # geopandas 1763 analogue | ||||
|         df = DataFrame( | ||||
|             [[1, 0], [0, 1]], | ||||
|             columns=[ | ||||
|                 ["foo", "foo"], | ||||
|                 ["location", "location"], | ||||
|                 ["x", "y"], | ||||
|             ], | ||||
|         ).assign(bools=Series([True, False], dtype="boolean")) | ||||
|         assert isinstance(df["bools"].dtype, BooleanDtype) | ||||
|  | ||||
|     def test_multiindex_from_tuples_with_nan(self): | ||||
|         # GH#23578 | ||||
|         result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")]) | ||||
|         expected = MultiIndex.from_tuples( | ||||
|             [("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")] | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,269 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     MultiIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestMultiIndexPartial: | ||||
|     def test_getitem_partial_int(self): | ||||
|         # GH 12416 | ||||
|         # with single item | ||||
|         l1 = [10, 20] | ||||
|         l2 = ["a", "b"] | ||||
|         df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2])) | ||||
|         expected = DataFrame(index=range(2), columns=l2) | ||||
|         result = df[20] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # with list | ||||
|         expected = DataFrame( | ||||
|             index=range(2), columns=MultiIndex.from_product([l1[1:], l2]) | ||||
|         ) | ||||
|         result = df[[20]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # missing item: | ||||
|         with pytest.raises(KeyError, match="1"): | ||||
|             df[1] | ||||
|         with pytest.raises(KeyError, match=r"'\[1\] not in index'"): | ||||
|             df[[1]] | ||||
|  | ||||
|     def test_series_slice_partial(self): | ||||
|         pass | ||||
|  | ||||
|     def test_xs_partial( | ||||
|         self, | ||||
|         multiindex_dataframe_random_data, | ||||
|         multiindex_year_month_day_dataframe_random_data, | ||||
|     ): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         result = frame.xs("foo") | ||||
|         result2 = frame.loc["foo"] | ||||
|         expected = frame.T["foo"].T | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         tm.assert_frame_equal(result, result2) | ||||
|  | ||||
|         result = ymd.xs((2000, 4)) | ||||
|         expected = ymd.loc[2000, 4] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # ex from #1796 | ||||
|         index = MultiIndex( | ||||
|             levels=[["foo", "bar"], ["one", "two"], [-1, 1]], | ||||
|             codes=[ | ||||
|                 [0, 0, 0, 0, 1, 1, 1, 1], | ||||
|                 [0, 0, 1, 1, 0, 0, 1, 1], | ||||
|                 [0, 1, 0, 1, 0, 1, 0, 1], | ||||
|             ], | ||||
|         ) | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((8, 4)), | ||||
|             index=index, | ||||
|             columns=list("abcd"), | ||||
|         ) | ||||
|  | ||||
|         result = df.xs(("foo", "one")) | ||||
|         expected = df.loc["foo", "one"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data): | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         ymd = ymd.T | ||||
|         result = ymd[2000, 2] | ||||
|  | ||||
|         expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) | ||||
|         expected.columns = expected.columns.droplevel(0).droplevel(0) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_fancy_slice_partial( | ||||
|         self, | ||||
|         multiindex_dataframe_random_data, | ||||
|         multiindex_year_month_day_dataframe_random_data, | ||||
|     ): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         result = frame.loc["bar":"baz"] | ||||
|         expected = frame[3:7] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         result = ymd.loc[(2000, 2):(2000, 4)] | ||||
|         lev = ymd.index.codes[1] | ||||
|         expected = ymd[(lev >= 1) & (lev <= 3)] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_partial_column_select(self): | ||||
|         idx = MultiIndex( | ||||
|             codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], | ||||
|             levels=[["a", "b"], ["x", "y"], ["p", "q"]], | ||||
|         ) | ||||
|         df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx) | ||||
|  | ||||
|         result = df.loc[("a", "y"), :] | ||||
|         expected = df.loc[("a", "y")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[("a", "y"), [1, 0]] | ||||
|         expected = df.loc[("a", "y")][[1, 0]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"\('a', 'foo'\)"): | ||||
|             df.loc[("a", "foo"), :] | ||||
|  | ||||
|     # TODO(ArrayManager) rewrite test to not use .values | ||||
|     # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view | ||||
|     @td.skip_array_manager_invalid_test | ||||
|     def test_partial_set( | ||||
|         self, | ||||
|         multiindex_year_month_day_dataframe_random_data, | ||||
|         using_copy_on_write, | ||||
|         warn_copy_on_write, | ||||
|     ): | ||||
|         # GH #397 | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         df = ymd.copy() | ||||
|         exp = ymd.copy() | ||||
|         df.loc[2000, 4] = 0 | ||||
|         exp.iloc[65:85] = 0 | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"].loc[2000, 4] = 1 | ||||
|             df.loc[(2000, 4), "A"] = 1 | ||||
|         else: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"].loc[2000, 4] = 1 | ||||
|         exp.iloc[65:85, 0] = 1 | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|  | ||||
|         df.loc[2000] = 5 | ||||
|         exp.iloc[:100] = 5 | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|  | ||||
|         # this works...for now | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["A"].iloc[14] = 5 | ||||
|         if using_copy_on_write: | ||||
|             assert df["A"].iloc[14] == exp["A"].iloc[14] | ||||
|         else: | ||||
|             assert df["A"].iloc[14] == 5 | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [int, float]) | ||||
|     def test_getitem_intkey_leading_level( | ||||
|         self, multiindex_year_month_day_dataframe_random_data, dtype | ||||
|     ): | ||||
|         # GH#33355 dont fall-back to positional when leading level is int | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         levels = ymd.index.levels | ||||
|         ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) | ||||
|         ser = ymd["A"] | ||||
|         mi = ser.index | ||||
|         assert isinstance(mi, MultiIndex) | ||||
|         if dtype is int: | ||||
|             assert mi.levels[0].dtype == np.dtype(int) | ||||
|         else: | ||||
|             assert mi.levels[0].dtype == np.float64 | ||||
|  | ||||
|         assert 14 not in mi.levels[0] | ||||
|         assert not mi.levels[0]._should_fallback_to_positional | ||||
|         assert not mi._should_fallback_to_positional | ||||
|  | ||||
|         with pytest.raises(KeyError, match="14"): | ||||
|             ser[14] | ||||
|  | ||||
|     # --------------------------------------------------------------------- | ||||
|  | ||||
|     def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         expected = frame.copy() | ||||
|         result = frame.copy() | ||||
|         result.loc[["foo", "bar"]] = 0 | ||||
|         expected.loc["foo"] = 0 | ||||
|         expected.loc["bar"] = 0 | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = frame.copy() | ||||
|         result = frame.copy() | ||||
|         result.loc["foo":"bar"] = 0 | ||||
|         expected.loc["foo"] = 0 | ||||
|         expected.loc["bar"] = 0 | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = frame["A"].copy() | ||||
|         result = frame["A"].copy() | ||||
|         result.loc[["foo", "bar"]] = 0 | ||||
|         expected.loc["foo"] = 0 | ||||
|         expected.loc["bar"] = 0 | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = frame["A"].copy() | ||||
|         result = frame["A"].copy() | ||||
|         result.loc["foo":"bar"] = 0 | ||||
|         expected.loc["foo"] = 0 | ||||
|         expected.loc["bar"] = 0 | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "indexer, exp_idx, exp_values", | ||||
|         [ | ||||
|             ( | ||||
|                 slice("2019-2", None), | ||||
|                 DatetimeIndex(["2019-02-01"], dtype="M8[ns]"), | ||||
|                 [2, 3], | ||||
|             ), | ||||
|             ( | ||||
|                 slice(None, "2019-2"), | ||||
|                 date_range("2019", periods=2, freq="MS"), | ||||
|                 [0, 1, 2, 3], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): | ||||
|         # GH: 25165 | ||||
|         date_idx = date_range("2019", periods=2, freq="MS") | ||||
|         df = DataFrame( | ||||
|             list(range(4)), | ||||
|             index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             exp_values, | ||||
|             index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), | ||||
|         ) | ||||
|         result = df[indexer] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = df.loc[indexer] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc(axis=0)[indexer] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[indexer, :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df2 = df.swaplevel(0, 1).sort_index() | ||||
|         expected = expected.swaplevel(0, 1).sort_index() | ||||
|  | ||||
|         result = df2.loc[:, indexer, :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_loc_getitem_partial_both_axis(): | ||||
|     # gh-12660 | ||||
|     iterables = [["a", "b"], [2, 1]] | ||||
|     columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) | ||||
|     rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns | ||||
|     ) | ||||
|     expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) | ||||
|     result = df.loc["a", "b"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,589 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import SettingWithCopyError | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     isna, | ||||
|     notna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def assert_equal(a, b): | ||||
|     assert a == b | ||||
|  | ||||
|  | ||||
| class TestMultiIndexSetItem: | ||||
|     def check(self, target, indexers, value, compare_fn=assert_equal, expected=None): | ||||
|         target.loc[indexers] = value | ||||
|         result = target.loc[indexers] | ||||
|         if expected is None: | ||||
|             expected = value | ||||
|         compare_fn(result, expected) | ||||
|  | ||||
|     def test_setitem_multiindex(self): | ||||
|         # GH#7190 | ||||
|         cols = ["A", "w", "l", "a", "x", "X", "d", "profit"] | ||||
|         index = MultiIndex.from_product( | ||||
|             [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] | ||||
|         ) | ||||
|         t, n = 0, 2 | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.nan, | ||||
|             columns=cols, | ||||
|             index=index, | ||||
|         ) | ||||
|         self.check(target=df, indexers=((t, n), "X"), value=0) | ||||
|  | ||||
|         df = DataFrame(-999, columns=cols, index=index) | ||||
|         self.check(target=df, indexers=((t, n), "X"), value=1) | ||||
|  | ||||
|         df = DataFrame(columns=cols, index=index) | ||||
|         self.check(target=df, indexers=((t, n), "X"), value=2) | ||||
|  | ||||
|         # gh-7218: assigning with 0-dim arrays | ||||
|         df = DataFrame(-999, columns=cols, index=index) | ||||
|         self.check( | ||||
|             target=df, | ||||
|             indexers=((t, n), "X"), | ||||
|             value=np.array(3), | ||||
|             expected=3, | ||||
|         ) | ||||
|  | ||||
|     def test_setitem_multiindex2(self): | ||||
|         # GH#5206 | ||||
|         df = DataFrame( | ||||
|             np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float | ||||
|         ) | ||||
|         df["F"] = 99 | ||||
|         row_selection = df["A"] % 2 == 0 | ||||
|         col_selection = ["B", "C"] | ||||
|         df.loc[row_selection, col_selection] = df["F"] | ||||
|         output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) | ||||
|         tm.assert_frame_equal(df.loc[row_selection, col_selection], output) | ||||
|         self.check( | ||||
|             target=df, | ||||
|             indexers=(row_selection, col_selection), | ||||
|             value=df["F"], | ||||
|             compare_fn=tm.assert_frame_equal, | ||||
|             expected=output, | ||||
|         ) | ||||
|  | ||||
|     def test_setitem_multiindex3(self): | ||||
|         # GH#11372 | ||||
|         idx = MultiIndex.from_product( | ||||
|             [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] | ||||
|         ) | ||||
|         cols = MultiIndex.from_product( | ||||
|             [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] | ||||
|         ) | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).random((12, 4)), index=idx, columns=cols | ||||
|         ) | ||||
|  | ||||
|         subidx = MultiIndex.from_arrays( | ||||
|             [["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")] | ||||
|         ) | ||||
|         subcols = MultiIndex.from_arrays( | ||||
|             [["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")] | ||||
|         ) | ||||
|  | ||||
|         vals = DataFrame( | ||||
|             np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols | ||||
|         ) | ||||
|         self.check( | ||||
|             target=df, | ||||
|             indexers=(subidx, subcols), | ||||
|             value=vals, | ||||
|             compare_fn=tm.assert_frame_equal, | ||||
|         ) | ||||
|         # set all columns | ||||
|         vals = DataFrame( | ||||
|             np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols | ||||
|         ) | ||||
|         self.check( | ||||
|             target=df, | ||||
|             indexers=(subidx, slice(None, None, None)), | ||||
|             value=vals, | ||||
|             compare_fn=tm.assert_frame_equal, | ||||
|         ) | ||||
|         # identity | ||||
|         copy = df.copy() | ||||
|         self.check( | ||||
|             target=df, | ||||
|             indexers=(df.index, df.columns), | ||||
|             value=df, | ||||
|             compare_fn=tm.assert_frame_equal, | ||||
|             expected=copy, | ||||
|         ) | ||||
|  | ||||
|     # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in | ||||
|     # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) | ||||
|     @td.skip_array_manager_not_yet_implemented | ||||
|     def test_multiindex_setitem(self): | ||||
|         # GH 3738 | ||||
|         # setting with a multi-index right hand side | ||||
|         arrays = [ | ||||
|             np.array(["bar", "bar", "baz", "qux", "qux", "bar"]), | ||||
|             np.array(["one", "two", "one", "one", "two", "one"]), | ||||
|             np.arange(0, 6, 1), | ||||
|         ] | ||||
|  | ||||
|         df_orig = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((6, 3)), | ||||
|             index=arrays, | ||||
|             columns=["A", "B", "C"], | ||||
|         ).sort_index() | ||||
|  | ||||
|         expected = df_orig.loc[["bar"]] * 2 | ||||
|         df = df_orig.copy() | ||||
|         df.loc[["bar"]] *= 2 | ||||
|         tm.assert_frame_equal(df.loc[["bar"]], expected) | ||||
|  | ||||
|         # raise because these have differing levels | ||||
|         msg = "cannot align on a multi-index with out specifying the join levels" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.loc["bar"] *= 2 | ||||
|  | ||||
|     def test_multiindex_setitem2(self): | ||||
|         # from SO | ||||
|         # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation | ||||
|         df_orig = DataFrame.from_dict( | ||||
|             { | ||||
|                 "price": { | ||||
|                     ("DE", "Coal", "Stock"): 2, | ||||
|                     ("DE", "Gas", "Stock"): 4, | ||||
|                     ("DE", "Elec", "Demand"): 1, | ||||
|                     ("FR", "Gas", "Stock"): 5, | ||||
|                     ("FR", "Solar", "SupIm"): 0, | ||||
|                     ("FR", "Wind", "SupIm"): 0, | ||||
|                 } | ||||
|             } | ||||
|         ) | ||||
|         df_orig.index = MultiIndex.from_tuples( | ||||
|             df_orig.index, names=["Sit", "Com", "Type"] | ||||
|         ) | ||||
|  | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 1, 3]] *= 2 | ||||
|  | ||||
|         idx = pd.IndexSlice | ||||
|         df = df_orig.copy() | ||||
|         df.loc[idx[:, :, "Stock"], :] *= 2 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[idx[:, :, "Stock"], "price"] *= 2 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_multiindex_assignment(self): | ||||
|         # GH3777 part 2 | ||||
|  | ||||
|         # mixed dtype | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3), | ||||
|             columns=list("abc"), | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|         ) | ||||
|         df["d"] = np.nan | ||||
|         arr = np.array([0.0, 1.0]) | ||||
|  | ||||
|         df.loc[4, "d"] = arr | ||||
|         tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) | ||||
|  | ||||
|     def test_multiindex_assignment_single_dtype( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # GH3777 part 2b | ||||
|         # single dtype | ||||
|         arr = np.array([0.0, 1.0]) | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3), | ||||
|             columns=list("abc"), | ||||
|             index=[[4, 4, 8], [8, 10, 12]], | ||||
|             dtype=np.int64, | ||||
|         ) | ||||
|         view = df["c"].iloc[:2].values | ||||
|  | ||||
|         # arr can be losslessly cast to int, so this setitem is inplace | ||||
|         # INFO(CoW-warn) this does not warn because we directly took .values | ||||
|         # above, so no reference to a pandas object is alive for `view` | ||||
|         df.loc[4, "c"] = arr | ||||
|         exp = Series(arr, index=[8, 10], name="c", dtype="int64") | ||||
|         result = df.loc[4, "c"] | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|         # extra check for inplace-ness | ||||
|         if not using_copy_on_write: | ||||
|             tm.assert_numpy_array_equal(view, exp.values) | ||||
|  | ||||
|         # arr + 0.5 cannot be cast losslessly to int, so we upcast | ||||
|         with tm.assert_produces_warning( | ||||
|             FutureWarning, match="item of incompatible dtype" | ||||
|         ): | ||||
|             df.loc[4, "c"] = arr + 0.5 | ||||
|         result = df.loc[4, "c"] | ||||
|         exp = exp + 0.5 | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|         # scalar ok | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             df.loc[4, "c"] = 10 | ||||
|         exp = Series(10, index=[8, 10], name="c", dtype="float64") | ||||
|         tm.assert_series_equal(df.loc[4, "c"], exp) | ||||
|  | ||||
|         # invalid assignments | ||||
|         msg = "Must have equal len keys and value when setting with an iterable" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[4, "c"] = [0, 1, 2, 3] | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[4, "c"] = [0] | ||||
|  | ||||
|         # But with a length-1 listlike column indexer this behaves like | ||||
|         #  `df.loc[4, "c"] = 0 | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             df.loc[4, ["c"]] = [0] | ||||
|         assert (df.loc[4, "c"] == 0).all() | ||||
|  | ||||
|     def test_groupby_example(self): | ||||
|         # groupby example | ||||
|         NUM_ROWS = 100 | ||||
|         NUM_COLS = 10 | ||||
|         col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())] | ||||
|         index_cols = col_names[:5] | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)), | ||||
|             dtype=np.int64, | ||||
|             columns=col_names, | ||||
|         ) | ||||
|         df = df.set_index(index_cols).sort_index() | ||||
|         grp = df.groupby(level=index_cols[:4]) | ||||
|         df["new_col"] = np.nan | ||||
|  | ||||
|         # we are actually operating on a copy here | ||||
|         # but in this case, that's ok | ||||
|         for name, df2 in grp: | ||||
|             new_vals = np.arange(df2.shape[0]) | ||||
|             df.loc[name, "new_col"] = new_vals | ||||
|  | ||||
|     def test_series_setitem( | ||||
|         self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write | ||||
|     ): | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         s = ymd["A"] | ||||
|  | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             s[2000, 3] = np.nan | ||||
|         assert isna(s.values[42:65]).all() | ||||
|         assert notna(s.values[:42]).all() | ||||
|         assert notna(s.values[65:]).all() | ||||
|  | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             s[2000, 3, 10] = np.nan | ||||
|         assert isna(s.iloc[49]) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="49"): | ||||
|             # GH#33355 dont fall-back to positional when leading level is int | ||||
|             s[49] | ||||
|  | ||||
|     def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         df = frame.T.copy() | ||||
|         values = df.values.copy() | ||||
|  | ||||
|         result = df[df > 0] | ||||
|         expected = df.where(df > 0) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df[df > 0] = 5 | ||||
|         values[values > 0] = 5 | ||||
|         tm.assert_almost_equal(df.values, values) | ||||
|  | ||||
|         df[df == 5] = 0 | ||||
|         values[values == 5] = 0 | ||||
|         tm.assert_almost_equal(df.values, values) | ||||
|  | ||||
|         # a df that needs alignment first | ||||
|         df[df[:-1] < 0] = 2 | ||||
|         np.putmask(values[:-1], values[:-1] < 0, 2) | ||||
|         tm.assert_almost_equal(df.values, values) | ||||
|  | ||||
|         with pytest.raises(TypeError, match="boolean values only"): | ||||
|             df[df * 0] = 2 | ||||
|  | ||||
|     def test_frame_getitem_setitem_multislice(self): | ||||
|         levels = [["t1", "t2"], ["a", "b", "c"]] | ||||
|         codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] | ||||
|         midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"]) | ||||
|         df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) | ||||
|  | ||||
|         result = df.loc[:, "value"] | ||||
|         tm.assert_series_equal(df["value"], result) | ||||
|  | ||||
|         result = df.loc[df.index[1:3], "value"] | ||||
|         tm.assert_series_equal(df["value"][1:3], result) | ||||
|  | ||||
|         result = df.loc[:, :] | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|         result = df | ||||
|         df.loc[:, "value"] = 10 | ||||
|         result["value"] = 10 | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|         df.loc[:, :] = 10 | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|     def test_frame_setitem_multi_column(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=[["a", "a", "b", "b"], [0, 1, 0, 1]], | ||||
|         ) | ||||
|  | ||||
|         cp = df.copy() | ||||
|         cp["a"] = cp["b"] | ||||
|         tm.assert_frame_equal(cp["a"], cp["b"]) | ||||
|  | ||||
|         # set with ndarray | ||||
|         cp = df.copy() | ||||
|         cp["a"] = cp["b"].values | ||||
|         tm.assert_frame_equal(cp["a"], cp["b"]) | ||||
|  | ||||
|     def test_frame_setitem_multi_column2(self): | ||||
|         # --------------------------------------- | ||||
|         # GH#1803 | ||||
|         columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) | ||||
|         df = DataFrame(index=[1, 3, 5], columns=columns) | ||||
|  | ||||
|         # Works, but adds a column instead of updating the two existing ones | ||||
|         df["A"] = 0.0  # Doesn't work | ||||
|         assert (df["A"].values == 0).all() | ||||
|  | ||||
|         # it broadcasts | ||||
|         df["B", "1"] = [1, 2, 3] | ||||
|         df["A"] = df["B", "1"] | ||||
|  | ||||
|         sliced_a1 = df["A", "1"] | ||||
|         sliced_a2 = df["A", "2"] | ||||
|         sliced_b1 = df["B", "1"] | ||||
|         tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) | ||||
|         tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) | ||||
|         assert sliced_a1.name == ("A", "1") | ||||
|         assert sliced_a2.name == ("A", "2") | ||||
|         assert sliced_b1.name == ("B", "1") | ||||
|  | ||||
|     def test_loc_getitem_tuple_plus_columns( | ||||
|         self, multiindex_year_month_day_dataframe_random_data | ||||
|     ): | ||||
|         # GH #1013 | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         df = ymd[:5] | ||||
|  | ||||
|         result = df.loc[(2000, 1, 6), ["A", "B", "C"]] | ||||
|         expected = df.loc[2000, 1, 6][["A", "B", "C"]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
|     def test_loc_getitem_setitem_slice_integers(self, frame_or_series): | ||||
|         index = MultiIndex( | ||||
|             levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] | ||||
|         ) | ||||
|  | ||||
|         obj = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((len(index), 4)), | ||||
|             index=index, | ||||
|             columns=["a", "b", "c", "d"], | ||||
|         ) | ||||
|         obj = tm.get_obj(obj, frame_or_series) | ||||
|  | ||||
|         res = obj.loc[1:2] | ||||
|         exp = obj.reindex(obj.index[2:]) | ||||
|         tm.assert_equal(res, exp) | ||||
|  | ||||
|         obj.loc[1:2] = 7 | ||||
|         assert (obj.loc[1:2] == 7).values.all() | ||||
|  | ||||
|     def test_setitem_change_dtype(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         dft = frame.T | ||||
|         s = dft["foo", "two"] | ||||
|         dft["foo", "two"] = s > s.median() | ||||
|         tm.assert_series_equal(dft["foo", "two"], s > s.median()) | ||||
|         # assert isinstance(dft._data.blocks[1].items, MultiIndex) | ||||
|  | ||||
|         reindexed = dft.reindex(columns=[("foo", "two")]) | ||||
|         tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) | ||||
|  | ||||
|     def test_set_column_scalar_with_loc( | ||||
|         self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         subset = frame.index[[1, 4, 5]] | ||||
|  | ||||
|         frame.loc[subset] = 99 | ||||
|         assert (frame.loc[subset].values == 99).all() | ||||
|  | ||||
|         frame_original = frame.copy() | ||||
|         col = frame["B"] | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             col[subset] = 97 | ||||
|         if using_copy_on_write: | ||||
|             # chained setitem doesn't work with CoW | ||||
|             tm.assert_frame_equal(frame, frame_original) | ||||
|         else: | ||||
|             assert (frame.loc[subset, "B"] == 97).all() | ||||
|  | ||||
|     def test_nonunique_assignment_1750(self): | ||||
|         df = DataFrame( | ||||
|             [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD") | ||||
|         ) | ||||
|  | ||||
|         df = df.set_index(["A", "B"]) | ||||
|         mi = MultiIndex.from_tuples([(1, 1)]) | ||||
|  | ||||
|         df.loc[mi, "C"] = "_" | ||||
|  | ||||
|         assert (df.xs((1, 1))["C"] == "_").all() | ||||
|  | ||||
|     def test_astype_assignment_with_dups(self): | ||||
|         # GH 4686 | ||||
|         # assignment with dups that has a dtype change | ||||
|         cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")]) | ||||
|         df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) | ||||
|         index = df.index.copy() | ||||
|  | ||||
|         df["A"] = df["A"].astype(np.float64) | ||||
|         tm.assert_index_equal(df.index, index) | ||||
|  | ||||
|     def test_setitem_nonmonotonic(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31449 | ||||
|         index = MultiIndex.from_tuples( | ||||
|             [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"] | ||||
|         ) | ||||
|         df = DataFrame(data=[0, 1, 2], index=index, columns=["e"]) | ||||
|         df.loc["a", "e"] = np.arange(99, 101, dtype="int64") | ||||
|         expected = DataFrame({"e": [99, 1, 100]}, index=index) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| class TestSetitemWithExpansionMultiIndex: | ||||
|     def test_setitem_new_column_mixed_depth(self): | ||||
|         arrays = [ | ||||
|             ["a", "top", "top", "routine1", "routine1", "routine2"], | ||||
|             ["", "OD", "OD", "result1", "result2", "result1"], | ||||
|             ["", "wx", "wy", "", "", ""], | ||||
|         ] | ||||
|  | ||||
|         tuples = sorted(zip(*arrays)) | ||||
|         index = MultiIndex.from_tuples(tuples) | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) | ||||
|  | ||||
|         result = df.copy() | ||||
|         expected = df.copy() | ||||
|         result["b"] = [1, 2, 3, 4] | ||||
|         expected["b", "", ""] = [1, 2, 3, 4] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_new_column_all_na(self): | ||||
|         # GH#1534 | ||||
|         mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) | ||||
|         df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) | ||||
|         s = Series({(1, 1): 1, (1, 2): 2}) | ||||
|         df["new"] = s | ||||
|         assert df["new"].isna().all() | ||||
|  | ||||
|     def test_setitem_enlargement_keep_index_names(self): | ||||
|         # GH#53053 | ||||
|         mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"]) | ||||
|         df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"]) | ||||
|         df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)] | ||||
|         mi_expected = MultiIndex.from_tuples( | ||||
|             [(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"] | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             data=[[10, 20, 30], [10, 20, 30]], | ||||
|             index=mi_expected, | ||||
|             columns=["A", "B", "C"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_invalid_test  # df["foo"] select multiple columns -> .values | ||||
| # is not a view | ||||
| def test_frame_setitem_view_direct( | ||||
|     multiindex_dataframe_random_data, using_copy_on_write | ||||
| ): | ||||
|     # this works because we are modifying the underlying array | ||||
|     # really a no-no | ||||
|     df = multiindex_dataframe_random_data.T | ||||
|     if using_copy_on_write: | ||||
|         with pytest.raises(ValueError, match="read-only"): | ||||
|             df["foo"].values[:] = 0 | ||||
|         assert (df["foo"].values != 0).all() | ||||
|     else: | ||||
|         df["foo"].values[:] = 0 | ||||
|         assert (df["foo"].values == 0).all() | ||||
|  | ||||
|  | ||||
| def test_frame_setitem_copy_raises( | ||||
|     multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write | ||||
| ): | ||||
|     # will raise/warn as its chained assignment | ||||
|     df = multiindex_dataframe_random_data.T | ||||
|     if using_copy_on_write or warn_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["foo"]["one"] = 2 | ||||
|     else: | ||||
|         msg = "A value is trying to be set on a copy of a slice from a DataFrame" | ||||
|         with pytest.raises(SettingWithCopyError, match=msg): | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["foo"]["one"] = 2 | ||||
|  | ||||
|  | ||||
| def test_frame_setitem_copy_no_write( | ||||
|     multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write | ||||
| ): | ||||
|     frame = multiindex_dataframe_random_data.T | ||||
|     expected = frame | ||||
|     df = frame.copy() | ||||
|     if using_copy_on_write or warn_copy_on_write: | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["foo"]["one"] = 2 | ||||
|     else: | ||||
|         msg = "A value is trying to be set on a copy of a slice from a DataFrame" | ||||
|         with pytest.raises(SettingWithCopyError, match=msg): | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["foo"]["one"] = 2 | ||||
|  | ||||
|     result = df | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_frame_setitem_partial_multiindex(): | ||||
|     # GH 54875 | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": [1, 2, 3], | ||||
|             "b": [3, 4, 5], | ||||
|             "c": 6, | ||||
|             "d": 7, | ||||
|         } | ||||
|     ).set_index(["a", "b", "c"]) | ||||
|     ser = Series(8, index=df.index.droplevel("c")) | ||||
|     result = df.copy() | ||||
|     result["d"] = ser | ||||
|     expected = df.copy() | ||||
|     expected["d"] = 8 | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,796 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import UnsortedIndexError | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.indexing.common import _mklbl | ||||
|  | ||||
|  | ||||
| class TestMultiIndexSlicers: | ||||
|     def test_per_axis_per_level_getitem(self): | ||||
|         # GH6134 | ||||
|         # example test case | ||||
|         ix = MultiIndex.from_product( | ||||
|             [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)] | ||||
|         ) | ||||
|         df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) | ||||
|  | ||||
|         result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if a in ("A1", "A2", "A3") and c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # test multi-index slicing with per axis and per index controls | ||||
|         index = MultiIndex.from_tuples( | ||||
|             [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] | ||||
|         ) | ||||
|         columns = MultiIndex.from_tuples( | ||||
|             [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], | ||||
|             names=["lvl0", "lvl1"], | ||||
|         ) | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns | ||||
|         ) | ||||
|         df = df.sort_index(axis=0).sort_index(axis=1) | ||||
|  | ||||
|         # identity | ||||
|         result = df.loc[(slice(None), slice(None)), :] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|         result = df.loc[:, (slice(None), slice(None))] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         # index | ||||
|         result = df.loc[(slice(None), [1]), :] | ||||
|         expected = df.iloc[[0, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[(slice(None), 1), :] | ||||
|         expected = df.iloc[[0, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # columns | ||||
|         result = df.loc[:, (slice(None), ["foo"])] | ||||
|         expected = df.iloc[:, [1, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # both | ||||
|         result = df.loc[(slice(None), 1), (slice(None), ["foo"])] | ||||
|         expected = df.iloc[[0, 3], [1, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc["A", "a"] | ||||
|         expected = DataFrame( | ||||
|             {"bar": [1, 5, 9], "foo": [0, 4, 8]}, | ||||
|             index=Index([1, 2, 3], name="two"), | ||||
|             columns=Index(["bar", "foo"], name="lvl1"), | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[(slice(None), [1, 2]), :] | ||||
|         expected = df.iloc[[0, 1, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # multi-level series | ||||
|         s = Series(np.arange(len(ix.to_numpy())), index=ix) | ||||
|         result = s.loc["A1":"A3", :, ["C1", "C3"]] | ||||
|         expected = s.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in s.index.values | ||||
|                 if a in ("A1", "A2", "A3") and c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # boolean indexers | ||||
|         result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] | ||||
|         expected = df.iloc[[2, 3]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             "cannot index with a boolean indexer " | ||||
|             "that is not the same length as the index" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[(slice(None), np.array([True, False])), :] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"\[1\] not in index"): | ||||
|             # slice(None) is on the index, [1] is on the columns, but 1 is | ||||
|             #  not in the columns, so we raise | ||||
|             #  This used to treat [1] as positional GH#16396 | ||||
|             df.loc[slice(None), [1]] | ||||
|  | ||||
|         # not lexsorted | ||||
|         assert df.index._lexsort_depth == 2 | ||||
|         df = df.sort_index(level=1, axis=0) | ||||
|         assert df.index._lexsort_depth == 0 | ||||
|  | ||||
|         msg = ( | ||||
|             "MultiIndex slicing requires the index to be " | ||||
|             r"lexsorted: slicing on levels \[1\], lexsort depth 0" | ||||
|         ) | ||||
|         with pytest.raises(UnsortedIndexError, match=msg): | ||||
|             df.loc[(slice(None), slice("bar")), :] | ||||
|  | ||||
|         # GH 16734: not sorted, but no real slicing | ||||
|         result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] | ||||
|         tm.assert_frame_equal(result, df.iloc[[1, 3], :]) | ||||
|  | ||||
|     def test_multiindex_slicers_non_unique(self): | ||||
|         # GH 7106 | ||||
|         # non-unique mi index support | ||||
|         df = ( | ||||
|             DataFrame( | ||||
|                 { | ||||
|                     "A": ["foo", "foo", "foo", "foo"], | ||||
|                     "B": ["a", "a", "a", "a"], | ||||
|                     "C": [1, 2, 1, 3], | ||||
|                     "D": [1, 2, 3, 4], | ||||
|                 } | ||||
|             ) | ||||
|             .set_index(["A", "B", "C"]) | ||||
|             .sort_index() | ||||
|         ) | ||||
|         assert not df.index.is_unique | ||||
|         expected = ( | ||||
|             DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) | ||||
|             .set_index(["A", "B", "C"]) | ||||
|             .sort_index() | ||||
|         ) | ||||
|         result = df.loc[(slice(None), slice(None), 1), :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # this is equivalent of an xs expression | ||||
|         result = df.xs(1, level=2, drop_level=False) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df = ( | ||||
|             DataFrame( | ||||
|                 { | ||||
|                     "A": ["foo", "foo", "foo", "foo"], | ||||
|                     "B": ["a", "a", "a", "a"], | ||||
|                     "C": [1, 2, 1, 2], | ||||
|                     "D": [1, 2, 3, 4], | ||||
|                 } | ||||
|             ) | ||||
|             .set_index(["A", "B", "C"]) | ||||
|             .sort_index() | ||||
|         ) | ||||
|         assert not df.index.is_unique | ||||
|         expected = ( | ||||
|             DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]}) | ||||
|             .set_index(["A", "B", "C"]) | ||||
|             .sort_index() | ||||
|         ) | ||||
|         result = df.loc[(slice(None), slice(None), 1), :] | ||||
|         assert not result.index.is_unique | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # GH12896 | ||||
|         # numpy-implementation dependent bug | ||||
|         ints = [ | ||||
|             1, | ||||
|             2, | ||||
|             3, | ||||
|             4, | ||||
|             5, | ||||
|             6, | ||||
|             7, | ||||
|             8, | ||||
|             9, | ||||
|             10, | ||||
|             11, | ||||
|             12, | ||||
|             12, | ||||
|             13, | ||||
|             14, | ||||
|             14, | ||||
|             16, | ||||
|             17, | ||||
|             18, | ||||
|             19, | ||||
|             200000, | ||||
|             200000, | ||||
|         ] | ||||
|         n = len(ints) | ||||
|         idx = MultiIndex.from_arrays([["a"] * n, ints]) | ||||
|         result = Series([1] * n, index=idx) | ||||
|         result = result.sort_index() | ||||
|         result = result.loc[(slice(None), slice(100000))] | ||||
|         expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_multiindex_slicers_datetimelike(self): | ||||
|         # GH 7429 | ||||
|         # buggy/inconsistent behavior when slicing with datetime-like | ||||
|         dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)] | ||||
|         freq = [1, 2] | ||||
|         index = MultiIndex.from_product([dates, freq], names=["date", "frequency"]) | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4), | ||||
|             index=index, | ||||
|             columns=list("ABCD"), | ||||
|         ) | ||||
|  | ||||
|         # multi-axis slicing | ||||
|         idx = pd.IndexSlice | ||||
|         expected = df.iloc[[0, 2, 4], [0, 1]] | ||||
|         result = df.loc[ | ||||
|             ( | ||||
|                 slice( | ||||
|                     Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") | ||||
|                 ), | ||||
|                 slice(1, 1), | ||||
|             ), | ||||
|             slice("A", "B"), | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[ | ||||
|             ( | ||||
|                 idx[ | ||||
|                     Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12") | ||||
|                 ], | ||||
|                 idx[1:1], | ||||
|             ), | ||||
|             slice("A", "B"), | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[ | ||||
|             ( | ||||
|                 slice( | ||||
|                     Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") | ||||
|                 ), | ||||
|                 1, | ||||
|             ), | ||||
|             slice("A", "B"), | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # with strings | ||||
|         result = df.loc[ | ||||
|             (slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)), | ||||
|             slice("A", "B"), | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[ | ||||
|             (idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_multiindex_slicers_edges(self): | ||||
|         # GH 8132 | ||||
|         # various edge cases | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5, | ||||
|                 "B": ["B0", "B0", "B1", "B1", "B2"] * 3, | ||||
|                 "DATE": [ | ||||
|                     "2013-06-11", | ||||
|                     "2013-07-02", | ||||
|                     "2013-07-09", | ||||
|                     "2013-07-30", | ||||
|                     "2013-08-06", | ||||
|                     "2013-06-11", | ||||
|                     "2013-07-02", | ||||
|                     "2013-07-09", | ||||
|                     "2013-07-30", | ||||
|                     "2013-08-06", | ||||
|                     "2013-09-03", | ||||
|                     "2013-10-01", | ||||
|                     "2013-07-09", | ||||
|                     "2013-08-06", | ||||
|                     "2013-09-03", | ||||
|                 ], | ||||
|                 "VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         df["DATE"] = pd.to_datetime(df["DATE"]) | ||||
|         df1 = df.set_index(["A", "B", "DATE"]) | ||||
|         df1 = df1.sort_index() | ||||
|  | ||||
|         # A1 - Get all values under "A0" and "A1" | ||||
|         result = df1.loc[(slice("A1")), :] | ||||
|         expected = df1.iloc[0:10] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # A2 - Get all values from the start to "A2" | ||||
|         result = df1.loc[(slice("A2")), :] | ||||
|         expected = df1 | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # A3 - Get all values under "B1" or "B2" | ||||
|         result = df1.loc[(slice(None), slice("B1", "B2")), :] | ||||
|         expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # A4 - Get all values between 2013-07-02 and 2013-07-09 | ||||
|         result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :] | ||||
|         expected = df1.iloc[[1, 2, 6, 7, 12]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # B1 - Get all values in B0 that are also under A0, A1 and A2 | ||||
|         result = df1.loc[(slice("A2"), slice("B0")), :] | ||||
|         expected = df1.iloc[[0, 1, 5, 6, 10, 11]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for | ||||
|         # the As) | ||||
|         result = df1.loc[(slice(None), slice("B2")), :] | ||||
|         expected = df1 | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # B3 - Get all values from B1 to B2 and up to 2013-08-06 | ||||
|         result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :] | ||||
|         expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # B4 - Same as A4 but the start of the date slice is not a key. | ||||
|         #      shows indexing on a partial selection slice | ||||
|         result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :] | ||||
|         expected = df1.iloc[[1, 2, 6, 7, 12]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_per_axis_per_level_doc_examples(self): | ||||
|         # test index maker | ||||
|         idx = pd.IndexSlice | ||||
|  | ||||
|         # from indexing.rst / advanced | ||||
|         index = MultiIndex.from_product( | ||||
|             [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] | ||||
|         ) | ||||
|         columns = MultiIndex.from_tuples( | ||||
|             [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], | ||||
|             names=["lvl0", "lvl1"], | ||||
|         ) | ||||
|         df = DataFrame( | ||||
|             np.arange(len(index) * len(columns), dtype="int64").reshape( | ||||
|                 (len(index), len(columns)) | ||||
|             ), | ||||
|             index=index, | ||||
|             columns=columns, | ||||
|         ) | ||||
|         result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if a in ("A1", "A2", "A3") and c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :] | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = df.loc[idx[:, :, ["C1", "C3"]], :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # not sorted | ||||
|         msg = ( | ||||
|             "MultiIndex slicing requires the index to be lexsorted: " | ||||
|             r"slicing on levels \[1\], lexsort depth 1" | ||||
|         ) | ||||
|         with pytest.raises(UnsortedIndexError, match=msg): | ||||
|             df.loc["A1", ("a", slice("foo"))] | ||||
|  | ||||
|         # GH 16734: not sorted, but no real slicing | ||||
|         tm.assert_frame_equal( | ||||
|             df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]] | ||||
|         ) | ||||
|  | ||||
|         df = df.sort_index(axis=1) | ||||
|  | ||||
|         # slicing | ||||
|         df.loc["A1", (slice(None), "foo")] | ||||
|         df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")] | ||||
|  | ||||
|         # setitem | ||||
|         df.loc(axis=0)[:, :, ["C1", "C3"]] = -10 | ||||
|  | ||||
|     def test_loc_axis_arguments(self): | ||||
|         index = MultiIndex.from_product( | ||||
|             [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] | ||||
|         ) | ||||
|         columns = MultiIndex.from_tuples( | ||||
|             [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], | ||||
|             names=["lvl0", "lvl1"], | ||||
|         ) | ||||
|         df = ( | ||||
|             DataFrame( | ||||
|                 np.arange(len(index) * len(columns), dtype="int64").reshape( | ||||
|                     (len(index), len(columns)) | ||||
|                 ), | ||||
|                 index=index, | ||||
|                 columns=columns, | ||||
|             ) | ||||
|             .sort_index() | ||||
|             .sort_index(axis=1) | ||||
|         ) | ||||
|  | ||||
|         # axis 0 | ||||
|         result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]] | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if a in ("A1", "A2", "A3") and c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc(axis="index")[:, :, ["C1", "C3"]] | ||||
|         expected = df.loc[ | ||||
|             [ | ||||
|                 ( | ||||
|                     a, | ||||
|                     b, | ||||
|                     c, | ||||
|                     d, | ||||
|                 ) | ||||
|                 for a, b, c, d in df.index.values | ||||
|                 if c in ("C1", "C3") | ||||
|             ] | ||||
|         ] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # axis 1 | ||||
|         result = df.loc(axis=1)[:, "foo"] | ||||
|         expected = df.loc[:, (slice(None), "foo")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.loc(axis="columns")[:, "foo"] | ||||
|         expected = df.loc[:, (slice(None), "foo")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # invalid axis | ||||
|         for i in [-1, 2, "foo"]: | ||||
|             msg = f"No axis named {i} for object type DataFrame" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 df.loc(axis=i)[:, :, ["C1", "C3"]] | ||||
|  | ||||
|     def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): | ||||
|         # GH29519 | ||||
|         df = DataFrame( | ||||
|             np.arange(27).reshape(3, 9), | ||||
|             columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), | ||||
|         ) | ||||
|         result = df.loc(axis=1)["a1":"a2"] | ||||
|         expected = df.iloc[:, :-3] | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self): | ||||
|         # GH29519 | ||||
|         df = DataFrame( | ||||
|             np.arange(27).reshape(3, 9), | ||||
|             columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]), | ||||
|         ) | ||||
|         result = df.loc(axis=1)["a1"] | ||||
|         expected = df.iloc[:, :3] | ||||
|         expected.columns = ["b1", "b2", "b3"] | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_ax_single_level_indexer_simple_df(self): | ||||
|         # GH29519 | ||||
|         # test single level indexing on single index column data frame | ||||
|         df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) | ||||
|         result = df.loc(axis=1)["a"] | ||||
|         expected = Series(np.array([0, 3, 6]), name="a") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_per_axis_per_level_setitem(self): | ||||
|         # test index maker | ||||
|         idx = pd.IndexSlice | ||||
|  | ||||
|         # test multi-index slicing with per axis and per index controls | ||||
|         index = MultiIndex.from_tuples( | ||||
|             [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] | ||||
|         ) | ||||
|         columns = MultiIndex.from_tuples( | ||||
|             [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], | ||||
|             names=["lvl0", "lvl1"], | ||||
|         ) | ||||
|  | ||||
|         df_orig = DataFrame( | ||||
|             np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns | ||||
|         ) | ||||
|         df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) | ||||
|  | ||||
|         # identity | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), slice(None)), :] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[:, :] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc(axis=0)[:, :] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[:, :] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[:, :] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[:, (slice(None), slice(None))] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[:, :] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # index | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), [1]), :] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), :] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc(axis=0)[:, 1] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # columns | ||||
|         df = df_orig.copy() | ||||
|         df.loc[:, (slice(None), ["foo"])] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[:, [1, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # both | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[idx[:, 1], idx[:, ["foo"]]] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc["A", "a"] = 100 | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[0:3, 0:2] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # setting with a list-like | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( | ||||
|             [[100, 100], [100, 100]], dtype="int64" | ||||
|         ) | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] = 100 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # not enough values | ||||
|         df = df_orig.copy() | ||||
|  | ||||
|         msg = "setting an array element with a sequence." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( | ||||
|                 [[100], [100, 100]], dtype="int64" | ||||
|             ) | ||||
|  | ||||
|         msg = "Must have equal len keys and value when setting with an iterable" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( | ||||
|                 [100, 100, 100, 100], dtype="int64" | ||||
|             ) | ||||
|  | ||||
|         # with an alignable rhs | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), (slice(None), ["foo"])] = ( | ||||
|             df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5 | ||||
|         ) | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[ | ||||
|             (slice(None), 1), (slice(None), ["foo"]) | ||||
|         ] | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy() | ||||
|         rhs.loc[:, ("c", "bah")] = 10 | ||||
|         df = df_orig.copy() | ||||
|         df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs | ||||
|         expected = df_orig.copy() | ||||
|         expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_multiindex_label_slicing_with_negative_step(self): | ||||
|         ser = Series( | ||||
|             np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)]) | ||||
|         ) | ||||
|         SLC = pd.IndexSlice | ||||
|  | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1]) | ||||
|  | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1]) | ||||
|  | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1]) | ||||
|  | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0]) | ||||
|  | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1]) | ||||
|         tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1]) | ||||
|         tm.assert_indexing_slices_equivalent( | ||||
|             ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1] | ||||
|         ) | ||||
|  | ||||
|     def test_multiindex_slice_first_level(self): | ||||
|         # GH 12697 | ||||
|         freq = ["a", "b", "c", "d"] | ||||
|         idx = MultiIndex.from_product([freq, range(500)]) | ||||
|         df = DataFrame(list(range(2000)), index=idx, columns=["Test"]) | ||||
|         df_slice = df.loc[pd.IndexSlice[:, 30:70], :] | ||||
|         result = df_slice.loc["a"] | ||||
|         expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71)) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = df_slice.loc["d"] | ||||
|         expected = DataFrame( | ||||
|             list(range(1530, 1571)), columns=["Test"], index=range(30, 71) | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data): | ||||
|         ymd = multiindex_year_month_day_dataframe_random_data | ||||
|         s = ymd["A"] | ||||
|         result = s[5:] | ||||
|         expected = s.reindex(s.index[5:]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         s = ymd["A"].copy() | ||||
|         exp = ymd["A"].copy() | ||||
|         s[5:] = 0 | ||||
|         exp.iloc[5:] = 0 | ||||
|         tm.assert_numpy_array_equal(s.values, exp.values) | ||||
|  | ||||
|         result = ymd[5:] | ||||
|         expected = ymd.reindex(s.index[5:]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, loc, iloc", | ||||
|         [ | ||||
|             # dtype = int, step = -1 | ||||
|             ("int", slice(None, None, -1), slice(None, None, -1)), | ||||
|             ("int", slice(3, None, -1), slice(3, None, -1)), | ||||
|             ("int", slice(None, 1, -1), slice(None, 0, -1)), | ||||
|             ("int", slice(3, 1, -1), slice(3, 0, -1)), | ||||
|             # dtype = int, step = -2 | ||||
|             ("int", slice(None, None, -2), slice(None, None, -2)), | ||||
|             ("int", slice(3, None, -2), slice(3, None, -2)), | ||||
|             ("int", slice(None, 1, -2), slice(None, 0, -2)), | ||||
|             ("int", slice(3, 1, -2), slice(3, 0, -2)), | ||||
|             # dtype = str, step = -1 | ||||
|             ("str", slice(None, None, -1), slice(None, None, -1)), | ||||
|             ("str", slice("d", None, -1), slice(3, None, -1)), | ||||
|             ("str", slice(None, "b", -1), slice(None, 0, -1)), | ||||
|             ("str", slice("d", "b", -1), slice(3, 0, -1)), | ||||
|             # dtype = str, step = -2 | ||||
|             ("str", slice(None, None, -2), slice(None, None, -2)), | ||||
|             ("str", slice("d", None, -2), slice(3, None, -2)), | ||||
|             ("str", slice(None, "b", -2), slice(None, 0, -2)), | ||||
|             ("str", slice("d", "b", -2), slice(3, 0, -2)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_loc_slice_negative_stepsize(self, dtype, loc, iloc): | ||||
|         # GH#38071 | ||||
|         labels = { | ||||
|             "str": list("abcde"), | ||||
|             "int": range(5), | ||||
|         }[dtype] | ||||
|  | ||||
|         mi = MultiIndex.from_arrays([labels] * 2) | ||||
|         df = DataFrame(1.0, index=mi, columns=["A"]) | ||||
|  | ||||
|         SLC = pd.IndexSlice | ||||
|  | ||||
|         expected = df.iloc[iloc, :] | ||||
|         result_get_loc = df.loc[SLC[loc], :] | ||||
|         result_get_locs_level_0 = df.loc[SLC[loc, :], :] | ||||
|         result_get_locs_level_1 = df.loc[SLC[:, loc], :] | ||||
|  | ||||
|         tm.assert_frame_equal(result_get_loc, expected) | ||||
|         tm.assert_frame_equal(result_get_locs_level_0, expected) | ||||
|         tm.assert_frame_equal(result_get_locs_level_1, expected) | ||||
| @ -0,0 +1,153 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     array, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestMultiIndexSorted: | ||||
|     def test_getitem_multilevel_index_tuple_not_sorted(self): | ||||
|         index_columns = list("abc") | ||||
|         df = DataFrame( | ||||
|             [[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"] | ||||
|         ) | ||||
|         df = df.set_index(index_columns) | ||||
|         query_index = df.index[:1] | ||||
|         rs = df.loc[query_index, "data"] | ||||
|  | ||||
|         xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"]) | ||||
|         xp = Series(["x"], index=xp_idx, name="data") | ||||
|         tm.assert_series_equal(rs, xp) | ||||
|  | ||||
|     def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         df = frame.sort_index(level=1).T | ||||
|  | ||||
|         # buglet with int typechecking | ||||
|         result = df.iloc[:, : np.int32(3)] | ||||
|         expected = df.reindex(columns=df.columns[:3]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("key", [None, lambda x: x]) | ||||
|     def test_frame_getitem_not_sorted2(self, key): | ||||
|         # 13431 | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "col1": ["b", "d", "b", "a"], | ||||
|                 "col2": [3, 1, 1, 2], | ||||
|                 "data": ["one", "two", "three", "four"], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         df2 = df.set_index(["col1", "col2"]) | ||||
|         df2_original = df2.copy() | ||||
|  | ||||
|         df2.index = df2.index.set_levels(["b", "d", "a"], level="col1") | ||||
|         df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1") | ||||
|         assert not df2.index.is_monotonic_increasing | ||||
|  | ||||
|         assert df2_original.index.equals(df2.index) | ||||
|         expected = df2.sort_index(key=key) | ||||
|         assert expected.index.is_monotonic_increasing | ||||
|  | ||||
|         result = df2.sort_index(level=0, key=key) | ||||
|         assert result.index.is_monotonic_increasing | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_sort_values_key(self): | ||||
|         arrays = [ | ||||
|             ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], | ||||
|             ["one", "two", "one", "two", "one", "two", "one", "two"], | ||||
|         ] | ||||
|         tuples = zip(*arrays) | ||||
|         index = MultiIndex.from_tuples(tuples) | ||||
|         index = index.sort_values(  # sort by third letter | ||||
|             key=lambda x: x.map(lambda entry: entry[2]) | ||||
|         ) | ||||
|         result = DataFrame(range(8), index=index) | ||||
|  | ||||
|         arrays = [ | ||||
|             ["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"], | ||||
|             ["one", "two", "one", "two", "one", "two", "one", "two"], | ||||
|         ] | ||||
|         tuples = zip(*arrays) | ||||
|         index = MultiIndex.from_tuples(tuples) | ||||
|         expected = DataFrame(range(8), index=index) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_argsort_with_na(self): | ||||
|         # GH48495 | ||||
|         arrays = [ | ||||
|             array([2, NA, 1], dtype="Int64"), | ||||
|             array([1, 2, 3], dtype="Int64"), | ||||
|         ] | ||||
|         index = MultiIndex.from_arrays(arrays) | ||||
|         result = index.argsort() | ||||
|         expected = np.array([2, 0, 1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_sort_values_with_na(self): | ||||
|         # GH48495 | ||||
|         arrays = [ | ||||
|             array([2, NA, 1], dtype="Int64"), | ||||
|             array([1, 2, 3], dtype="Int64"), | ||||
|         ] | ||||
|         index = MultiIndex.from_arrays(arrays) | ||||
|         index = index.sort_values() | ||||
|         result = DataFrame(range(3), index=index) | ||||
|  | ||||
|         arrays = [ | ||||
|             array([1, 2, NA], dtype="Int64"), | ||||
|             array([3, 1, 2], dtype="Int64"), | ||||
|         ] | ||||
|         index = MultiIndex.from_arrays(arrays) | ||||
|         expected = DataFrame(range(3), index=index) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data): | ||||
|         frame = multiindex_dataframe_random_data | ||||
|         df = frame.T | ||||
|         df["foo", "four"] = "foo" | ||||
|  | ||||
|         arrays = [np.array(x) for x in zip(*df.columns.values)] | ||||
|  | ||||
|         result = df["foo"] | ||||
|         result2 = df.loc[:, "foo"] | ||||
|         expected = df.reindex(columns=df.columns[arrays[0] == "foo"]) | ||||
|         expected.columns = expected.columns.droplevel(0) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         tm.assert_frame_equal(result2, expected) | ||||
|  | ||||
|         df = df.T | ||||
|         result = df.xs("foo") | ||||
|         result2 = df.loc["foo"] | ||||
|         expected = df.reindex(df.index[arrays[0] == "foo"]) | ||||
|         expected.index = expected.index.droplevel(0) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         tm.assert_frame_equal(result2, expected) | ||||
|  | ||||
|     def test_series_getitem_not_sorted(self): | ||||
|         arrays = [ | ||||
|             ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], | ||||
|             ["one", "two", "one", "two", "one", "two", "one", "two"], | ||||
|         ] | ||||
|         tuples = zip(*arrays) | ||||
|         index = MultiIndex.from_tuples(tuples) | ||||
|         s = Series(np.random.default_rng(2).standard_normal(8), index=index) | ||||
|  | ||||
|         arrays = [np.array(x) for x in zip(*index.values)] | ||||
|  | ||||
|         result = s["qux"] | ||||
|         result2 = s.loc["qux"] | ||||
|         expected = s[arrays[0] == "qux"] | ||||
|         expected.index = expected.index.droplevel(0) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         tm.assert_series_equal(result2, expected) | ||||
							
								
								
									
										252
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_at.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										252
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_at.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,252 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timezone, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_at_timezone(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/33544 | ||||
|     result = DataFrame({"foo": [datetime(2000, 1, 1)]}) | ||||
|     with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): | ||||
|         result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) | ||||
|     expected = DataFrame( | ||||
|         {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_selection_methods_of_assigned_col(): | ||||
|     # GH 29282 | ||||
|     df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|     df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0]) | ||||
|     df["c"] = df2["c"] | ||||
|     df.at[1, "c"] = 11 | ||||
|     result = df | ||||
|     expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|     result = df.at[1, "c"] | ||||
|     assert result == 11 | ||||
|  | ||||
|     result = df["c"] | ||||
|     expected = Series([9, 11, 7], name="c") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df[["c"]] | ||||
|     expected = DataFrame({"c": [9, 11, 7]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestAtSetItem: | ||||
|     def test_at_setitem_item_cache_cleared(self): | ||||
|         # GH#22372 Note the multi-step construction is necessary to trigger | ||||
|         #  the original bug. pandas/issues/22372#issuecomment-413345309 | ||||
|         df = DataFrame(index=[0]) | ||||
|         df["x"] = 1 | ||||
|         df["cost"] = 2 | ||||
|  | ||||
|         # accessing df["cost"] adds "cost" to the _item_cache | ||||
|         df["cost"] | ||||
|  | ||||
|         # This loc[[0]] lookup used to call _consolidate_inplace at the | ||||
|         #  BlockManager level, which failed to clear the _item_cache | ||||
|         df.loc[[0]] | ||||
|  | ||||
|         df.at[0, "x"] = 4 | ||||
|         df.at[0, "cost"] = 789 | ||||
|  | ||||
|         expected = DataFrame({"x": [4], "cost": 789}, index=[0]) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # And in particular, check that the _item_cache has updated correctly. | ||||
|         tm.assert_series_equal(df["cost"], expected["cost"]) | ||||
|  | ||||
|     def test_at_setitem_mixed_index_assignment(self): | ||||
|         # GH#19860 | ||||
|         ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) | ||||
|         ser.at["a"] = 11 | ||||
|         assert ser.iat[0] == 11 | ||||
|         ser.at[1] = 22 | ||||
|         assert ser.iat[3] == 22 | ||||
|  | ||||
|     def test_at_setitem_categorical_missing(self): | ||||
|         df = DataFrame( | ||||
|             index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"]) | ||||
|         ) | ||||
|         df.at[1, 1] = "foo" | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [ | ||||
|                 [np.nan, np.nan, np.nan], | ||||
|                 [np.nan, "foo", np.nan], | ||||
|                 [np.nan, np.nan, np.nan], | ||||
|             ], | ||||
|             dtype=CategoricalDtype(["foo", "bar"]), | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_at_setitem_multiindex(self): | ||||
|         df = DataFrame( | ||||
|             np.zeros((3, 2), dtype="int64"), | ||||
|             columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), | ||||
|         ) | ||||
|         df.at[0, "a"] = 10 | ||||
|         expected = DataFrame( | ||||
|             [[10, 10], [0, 0], [0, 0]], | ||||
|             columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("row", (Timestamp("2019-01-01"), "2019-01-01")) | ||||
|     def test_at_datetime_index(self, row): | ||||
|         # Set float64 dtype to avoid upcast when setting .5 | ||||
|         df = DataFrame( | ||||
|             data=[[1] * 2], index=DatetimeIndex(data=["2019-01-01", "2019-01-02"]) | ||||
|         ).astype({0: "float64"}) | ||||
|         expected = DataFrame( | ||||
|             data=[[0.5, 1], [1.0, 1]], | ||||
|             index=DatetimeIndex(data=["2019-01-01", "2019-01-02"]), | ||||
|         ) | ||||
|  | ||||
|         df.at[row, 0] = 0.5 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| class TestAtSetItemWithExpansion: | ||||
|     def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): | ||||
|         # GH#25506 | ||||
|         ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) | ||||
|         result = Series(ts) | ||||
|         result.at[1] = ts | ||||
|         expected = Series([ts, ts]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestAtWithDuplicates: | ||||
|     def test_at_with_duplicate_axes_requires_scalar_lookup(self): | ||||
|         # GH#33041 check that falling back to loc doesn't allow non-scalar | ||||
|         #  args to slip in | ||||
|  | ||||
|         arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2) | ||||
|         df = DataFrame(arr, columns=["A", "A"]) | ||||
|  | ||||
|         msg = "Invalid call for scalar access" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[[1, 2]] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[1, ["A"]] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[:, "A"] | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[[1, 2]] = 1 | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[1, ["A"]] = 1 | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.at[:, "A"] = 1 | ||||
|  | ||||
|  | ||||
| class TestAtErrors: | ||||
|     # TODO: De-duplicate/parametrize | ||||
|     #  test_at_series_raises_key_error2, test_at_frame_raises_key_error2 | ||||
|  | ||||
|     def test_at_series_raises_key_error(self, indexer_al): | ||||
|         # GH#31724 .at should match .loc | ||||
|  | ||||
|         ser = Series([1, 2, 3], index=[3, 2, 1]) | ||||
|         result = indexer_al(ser)[1] | ||||
|         assert result == 3 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="a"): | ||||
|             indexer_al(ser)["a"] | ||||
|  | ||||
|     def test_at_frame_raises_key_error(self, indexer_al): | ||||
|         # GH#31724 .at should match .loc | ||||
|  | ||||
|         df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) | ||||
|  | ||||
|         result = indexer_al(df)[1, 0] | ||||
|         assert result == 3 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="a"): | ||||
|             indexer_al(df)["a", 0] | ||||
|  | ||||
|         with pytest.raises(KeyError, match="a"): | ||||
|             indexer_al(df)[1, "a"] | ||||
|  | ||||
|     def test_at_series_raises_key_error2(self, indexer_al): | ||||
|         # at should not fallback | ||||
|         # GH#7814 | ||||
|         # GH#31724 .at should match .loc | ||||
|         ser = Series([1, 2, 3], index=list("abc")) | ||||
|         result = indexer_al(ser)["a"] | ||||
|         assert result == 1 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="^0$"): | ||||
|             indexer_al(ser)[0] | ||||
|  | ||||
|     def test_at_frame_raises_key_error2(self, indexer_al): | ||||
|         # GH#31724 .at should match .loc | ||||
|         df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) | ||||
|         result = indexer_al(df)["a", "A"] | ||||
|         assert result == 1 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="^0$"): | ||||
|             indexer_al(df)["a", 0] | ||||
|  | ||||
|     def test_at_frame_multiple_columns(self): | ||||
|         # GH#48296 - at shouldn't modify multiple columns | ||||
|         df = DataFrame({"a": [1, 2], "b": [3, 4]}) | ||||
|         new_row = [6, 7] | ||||
|         with pytest.raises( | ||||
|             InvalidIndexError, | ||||
|             match=f"You can only assign a scalar value not a \\{type(new_row)}", | ||||
|         ): | ||||
|             df.at[5] = new_row | ||||
|  | ||||
|     def test_at_getitem_mixed_index_no_fallback(self): | ||||
|         # GH#19860 | ||||
|         ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) | ||||
|         with pytest.raises(KeyError, match="^0$"): | ||||
|             ser.at[0] | ||||
|         with pytest.raises(KeyError, match="^4$"): | ||||
|             ser.at[4] | ||||
|  | ||||
|     def test_at_categorical_integers(self): | ||||
|         # CategoricalIndex with integer categories that don't happen to match | ||||
|         #  the Categorical's codes | ||||
|         ci = CategoricalIndex([3, 4]) | ||||
|  | ||||
|         arr = np.arange(4).reshape(2, 2) | ||||
|         frame = DataFrame(arr, index=ci) | ||||
|  | ||||
|         for df in [frame, frame.T]: | ||||
|             for key in [0, 1]: | ||||
|                 with pytest.raises(KeyError, match=str(key)): | ||||
|                     df.at[key, key] | ||||
|  | ||||
|     def test_at_applied_for_rows(self): | ||||
|         # GH#48729 .at should raise InvalidIndexError when assigning rows | ||||
|         df = DataFrame(index=["a"], columns=["col1", "col2"]) | ||||
|         new_row = [123, 15] | ||||
|         with pytest.raises( | ||||
|             InvalidIndexError, | ||||
|             match=f"You can only assign a scalar value not a \\{type(new_row)}", | ||||
|         ): | ||||
|             df.at["a"] = new_row | ||||
| @ -0,0 +1,573 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Interval, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df(): | ||||
|     return DataFrame( | ||||
|         { | ||||
|             "A": np.arange(6, dtype="int64"), | ||||
|         }, | ||||
|         index=CategoricalIndex( | ||||
|             list("aabbca"), dtype=CategoricalDtype(list("cab")), name="B" | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df2(): | ||||
|     return DataFrame( | ||||
|         { | ||||
|             "A": np.arange(6, dtype="int64"), | ||||
|         }, | ||||
|         index=CategoricalIndex( | ||||
|             list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B" | ||||
|         ), | ||||
|     ) | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndex: | ||||
|     def test_loc_scalar(self, df): | ||||
|         dtype = CategoricalDtype(list("cab")) | ||||
|         result = df.loc["a"] | ||||
|         bidx = Series(list("aaa"), name="B").astype(dtype) | ||||
|         assert bidx.dtype == dtype | ||||
|  | ||||
|         expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx)) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df = df.copy() | ||||
|         df.loc["a"] = 20 | ||||
|         bidx2 = Series(list("aabbca"), name="B").astype(dtype) | ||||
|         assert bidx2.dtype == dtype | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "A": [20, 20, 2, 3, 4, 20], | ||||
|             }, | ||||
|             index=Index(bidx2), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # value not in the categories | ||||
|         with pytest.raises(KeyError, match=r"^'d'$"): | ||||
|             df.loc["d"] | ||||
|  | ||||
|         df2 = df.copy() | ||||
|         expected = df2.copy() | ||||
|         expected.index = expected.index.astype(object) | ||||
|         expected.loc["d"] = 10 | ||||
|         df2.loc["d"] = 10 | ||||
|         tm.assert_frame_equal(df2, expected) | ||||
|  | ||||
|     def test_loc_setitem_with_expansion_non_category(self, df): | ||||
|         # Setting-with-expansion with a new key "d" that is not among caegories | ||||
|         df.loc["a"] = 20 | ||||
|  | ||||
|         # Setting a new row on an existing column | ||||
|         df3 = df.copy() | ||||
|         df3.loc["d", "A"] = 10 | ||||
|         bidx3 = Index(list("aabbcad"), name="B") | ||||
|         expected3 = DataFrame( | ||||
|             { | ||||
|                 "A": [20, 20, 2, 3, 4, 20, 10.0], | ||||
|             }, | ||||
|             index=Index(bidx3), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df3, expected3) | ||||
|  | ||||
|         # Setting a new row _and_ new column | ||||
|         df4 = df.copy() | ||||
|         df4.loc["d", "C"] = 10 | ||||
|         expected3 = DataFrame( | ||||
|             { | ||||
|                 "A": [20, 20, 2, 3, 4, 20, np.nan], | ||||
|                 "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10], | ||||
|             }, | ||||
|             index=Index(bidx3), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df4, expected3) | ||||
|  | ||||
|     def test_loc_getitem_scalar_non_category(self, df): | ||||
|         with pytest.raises(KeyError, match="^1$"): | ||||
|             df.loc[1] | ||||
|  | ||||
|     def test_slicing(self): | ||||
|         cat = Series(Categorical([1, 2, 3, 4])) | ||||
|         reverse = cat[::-1] | ||||
|         exp = np.array([4, 3, 2, 1], dtype=np.int64) | ||||
|         tm.assert_numpy_array_equal(reverse.__array__(), exp) | ||||
|  | ||||
|         df = DataFrame({"value": (np.arange(100) + 1).astype("int64")}) | ||||
|         df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) | ||||
|  | ||||
|         expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10) | ||||
|         result = df.iloc[10] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             {"value": np.arange(11, 21).astype("int64")}, | ||||
|             index=np.arange(10, 20).astype("int64"), | ||||
|         ) | ||||
|         expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) | ||||
|         result = df.iloc[10:20] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8) | ||||
|         result = df.loc[8] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_slicing_and_getting_ops(self): | ||||
|         # systematically test the slicing operations: | ||||
|         #  for all slicing ops: | ||||
|         #   - returning a dataframe | ||||
|         #   - returning a column | ||||
|         #   - returning a row | ||||
|         #   - returning a single value | ||||
|  | ||||
|         cats = Categorical( | ||||
|             ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"] | ||||
|         ) | ||||
|         idx = Index(["h", "i", "j", "k", "l", "m", "n"]) | ||||
|         values = [1, 2, 3, 4, 5, 6, 7] | ||||
|         df = DataFrame({"cats": cats, "values": values}, index=idx) | ||||
|  | ||||
|         # the expected values | ||||
|         cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) | ||||
|         idx2 = Index(["j", "k"]) | ||||
|         values2 = [3, 4] | ||||
|  | ||||
|         # 2:4,: | "j":"k",: | ||||
|         exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) | ||||
|  | ||||
|         # :,"cats" | :,0 | ||||
|         exp_col = Series(cats, index=idx, name="cats") | ||||
|  | ||||
|         # "j",: | 2,: | ||||
|         exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j") | ||||
|  | ||||
|         # "j","cats | 2,0 | ||||
|         exp_val = "b" | ||||
|  | ||||
|         # iloc | ||||
|         # frame | ||||
|         res_df = df.iloc[2:4, :] | ||||
|         tm.assert_frame_equal(res_df, exp_df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|         # row | ||||
|         res_row = df.iloc[2, :] | ||||
|         tm.assert_series_equal(res_row, exp_row) | ||||
|         assert isinstance(res_row["cats"], str) | ||||
|  | ||||
|         # col | ||||
|         res_col = df.iloc[:, 0] | ||||
|         tm.assert_series_equal(res_col, exp_col) | ||||
|         assert isinstance(res_col.dtype, CategoricalDtype) | ||||
|  | ||||
|         # single value | ||||
|         res_val = df.iloc[2, 0] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # loc | ||||
|         # frame | ||||
|         res_df = df.loc["j":"k", :] | ||||
|         tm.assert_frame_equal(res_df, exp_df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|         # row | ||||
|         res_row = df.loc["j", :] | ||||
|         tm.assert_series_equal(res_row, exp_row) | ||||
|         assert isinstance(res_row["cats"], str) | ||||
|  | ||||
|         # col | ||||
|         res_col = df.loc[:, "cats"] | ||||
|         tm.assert_series_equal(res_col, exp_col) | ||||
|         assert isinstance(res_col.dtype, CategoricalDtype) | ||||
|  | ||||
|         # single value | ||||
|         res_val = df.loc["j", "cats"] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # single value | ||||
|         res_val = df.loc["j", df.columns[0]] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # iat | ||||
|         res_val = df.iat[2, 0] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # at | ||||
|         res_val = df.at["j", "cats"] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # fancy indexing | ||||
|         exp_fancy = df.iloc[[2]] | ||||
|  | ||||
|         res_fancy = df[df["cats"] == "b"] | ||||
|         tm.assert_frame_equal(res_fancy, exp_fancy) | ||||
|         res_fancy = df[df["values"] == 3] | ||||
|         tm.assert_frame_equal(res_fancy, exp_fancy) | ||||
|  | ||||
|         # get_value | ||||
|         res_val = df.at["j", "cats"] | ||||
|         assert res_val == exp_val | ||||
|  | ||||
|         # i : int, slice, or sequence of integers | ||||
|         res_row = df.iloc[2] | ||||
|         tm.assert_series_equal(res_row, exp_row) | ||||
|         assert isinstance(res_row["cats"], str) | ||||
|  | ||||
|         res_df = df.iloc[slice(2, 4)] | ||||
|         tm.assert_frame_equal(res_df, exp_df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|         res_df = df.iloc[[2, 3]] | ||||
|         tm.assert_frame_equal(res_df, exp_df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|         res_col = df.iloc[:, 0] | ||||
|         tm.assert_series_equal(res_col, exp_col) | ||||
|         assert isinstance(res_col.dtype, CategoricalDtype) | ||||
|  | ||||
|         res_df = df.iloc[:, slice(0, 2)] | ||||
|         tm.assert_frame_equal(res_df, df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|         res_df = df.iloc[:, [0, 1]] | ||||
|         tm.assert_frame_equal(res_df, df) | ||||
|         assert isinstance(res_df["cats"].dtype, CategoricalDtype) | ||||
|  | ||||
|     def test_slicing_doc_examples(self): | ||||
|         # GH 7918 | ||||
|         cats = Categorical( | ||||
|             ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"] | ||||
|         ) | ||||
|         idx = Index(["h", "i", "j", "k", "l", "m", "n"]) | ||||
|         values = [1, 2, 2, 2, 3, 4, 5] | ||||
|         df = DataFrame({"cats": cats, "values": values}, index=idx) | ||||
|  | ||||
|         result = df.iloc[2:4, :] | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "cats": Categorical(["b", "b"], categories=["a", "b", "c"]), | ||||
|                 "values": [2, 2], | ||||
|             }, | ||||
|             index=["j", "k"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.iloc[2:4, :].dtypes | ||||
|         expected = Series(["category", "int64"], ["cats", "values"], dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.loc["h":"j", "cats"] | ||||
|         expected = Series( | ||||
|             Categorical(["a", "b", "b"], categories=["a", "b", "c"]), | ||||
|             index=["h", "i", "j"], | ||||
|             name="cats", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.loc["h":"j", df.columns[0:1]] | ||||
|         expected = DataFrame( | ||||
|             {"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])}, | ||||
|             index=["h", "i", "j"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_getitem_listlike_labels(self, df): | ||||
|         # list of labels | ||||
|         result = df.loc[["c", "a"]] | ||||
|         expected = df.iloc[[4, 0, 1, 5]] | ||||
|         tm.assert_frame_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     def test_loc_getitem_listlike_unused_category(self, df2): | ||||
|         # GH#37901 a label that is in index.categories but not in index | ||||
|         # listlike containing an element in the categories but not in the values | ||||
|         with pytest.raises(KeyError, match=re.escape("['e'] not in index")): | ||||
|             df2.loc[["a", "b", "e"]] | ||||
|  | ||||
|     def test_loc_getitem_label_unused_category(self, df2): | ||||
|         # element in the categories but not in the values | ||||
|         with pytest.raises(KeyError, match=r"^'e'$"): | ||||
|             df2.loc["e"] | ||||
|  | ||||
|     def test_loc_getitem_non_category(self, df2): | ||||
|         # not all labels in the categories | ||||
|         with pytest.raises(KeyError, match=re.escape("['d'] not in index")): | ||||
|             df2.loc[["a", "d"]] | ||||
|  | ||||
|     def test_loc_setitem_expansion_label_unused_category(self, df2): | ||||
|         # assigning with a label that is in the categories but not in the index | ||||
|         df = df2.copy() | ||||
|         df.loc["e"] = 20 | ||||
|         result = df.loc[["a", "b", "e"]] | ||||
|         exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") | ||||
|         expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_listlike_dtypes(self): | ||||
|         # GH 11586 | ||||
|  | ||||
|         # unique categories and codes | ||||
|         index = CategoricalIndex(["a", "b", "c"]) | ||||
|         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) | ||||
|  | ||||
|         # unique slice | ||||
|         res = df.loc[["a", "b"]] | ||||
|         exp_index = CategoricalIndex(["a", "b"], categories=index.categories) | ||||
|         exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # duplicated slice | ||||
|         res = df.loc[["a", "a", "b"]] | ||||
|  | ||||
|         exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories) | ||||
|         exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("['x'] not in index")): | ||||
|             df.loc[["a", "x"]] | ||||
|  | ||||
|     def test_loc_listlike_dtypes_duplicated_categories_and_codes(self): | ||||
|         # duplicated categories and codes | ||||
|         index = CategoricalIndex(["a", "b", "a"]) | ||||
|         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) | ||||
|  | ||||
|         # unique slice | ||||
|         res = df.loc[["a", "b"]] | ||||
|         exp = DataFrame( | ||||
|             {"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"]) | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # duplicated slice | ||||
|         res = df.loc[["a", "a", "b"]] | ||||
|         exp = DataFrame( | ||||
|             {"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]}, | ||||
|             index=CategoricalIndex(["a", "a", "a", "a", "b"]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("['x'] not in index")): | ||||
|             df.loc[["a", "x"]] | ||||
|  | ||||
|     def test_loc_listlike_dtypes_unused_category(self): | ||||
|         # contains unused category | ||||
|         index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) | ||||
|         df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) | ||||
|  | ||||
|         res = df.loc[["a", "b"]] | ||||
|         exp = DataFrame( | ||||
|             {"A": [1, 3, 2], "B": [5, 7, 6]}, | ||||
|             index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")), | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         # duplicated slice | ||||
|         res = df.loc[["a", "a", "b"]] | ||||
|         exp = DataFrame( | ||||
|             {"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]}, | ||||
|             index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")), | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp, check_index_type=True) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("['x'] not in index")): | ||||
|             df.loc[["a", "x"]] | ||||
|  | ||||
|     def test_loc_getitem_listlike_unused_category_raises_keyerror(self): | ||||
|         # key that is an *unused* category raises | ||||
|         index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) | ||||
|         df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="e"): | ||||
|             # For comparison, check the scalar behavior | ||||
|             df.loc["e"] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=re.escape("['e'] not in index")): | ||||
|             df.loc[["a", "e"]] | ||||
|  | ||||
|     def test_ix_categorical_index(self): | ||||
|         # GH 12531 | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 3)), | ||||
|             index=list("ABC"), | ||||
|             columns=list("XYZ"), | ||||
|         ) | ||||
|         cdf = df.copy() | ||||
|         cdf.index = CategoricalIndex(df.index) | ||||
|         cdf.columns = CategoricalIndex(df.columns) | ||||
|  | ||||
|         expect = Series(df.loc["A", :], index=cdf.columns, name="A") | ||||
|         tm.assert_series_equal(cdf.loc["A", :], expect) | ||||
|  | ||||
|         expect = Series(df.loc[:, "X"], index=cdf.index, name="X") | ||||
|         tm.assert_series_equal(cdf.loc[:, "X"], expect) | ||||
|  | ||||
|         exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"]) | ||||
|         expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index) | ||||
|         tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) | ||||
|  | ||||
|         exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"]) | ||||
|         expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) | ||||
|         tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] | ||||
|     ) | ||||
|     def test_ix_categorical_index_non_unique(self, infer_string): | ||||
|         # non-unique | ||||
|         with option_context("future.infer_string", infer_string): | ||||
|             df = DataFrame( | ||||
|                 np.random.default_rng(2).standard_normal((3, 3)), | ||||
|                 index=list("ABA"), | ||||
|                 columns=list("XYX"), | ||||
|             ) | ||||
|             cdf = df.copy() | ||||
|             cdf.index = CategoricalIndex(df.index) | ||||
|             cdf.columns = CategoricalIndex(df.columns) | ||||
|  | ||||
|             exp_index = CategoricalIndex(list("AA"), categories=["A", "B"]) | ||||
|             expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index) | ||||
|             tm.assert_frame_equal(cdf.loc["A", :], expect) | ||||
|  | ||||
|             exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"]) | ||||
|             expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns) | ||||
|             tm.assert_frame_equal(cdf.loc[:, "X"], expect) | ||||
|  | ||||
|             expect = DataFrame( | ||||
|                 df.loc[["A", "B"], :], | ||||
|                 columns=cdf.columns, | ||||
|                 index=CategoricalIndex(list("AAB")), | ||||
|             ) | ||||
|             tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) | ||||
|  | ||||
|             expect = DataFrame( | ||||
|                 df.loc[:, ["X", "Y"]], | ||||
|                 index=cdf.index, | ||||
|                 columns=CategoricalIndex(list("XXY")), | ||||
|             ) | ||||
|             tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) | ||||
|  | ||||
|     def test_loc_slice(self, df): | ||||
|         # GH9748 | ||||
|         msg = ( | ||||
|             "cannot do slice indexing on CategoricalIndex with these " | ||||
|             r"indexers \[1\] of type int" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.loc[1:5] | ||||
|  | ||||
|         result = df.loc["b":"c"] | ||||
|         expected = df.iloc[[2, 3, 4]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_loc_and_at_with_categorical_index(self): | ||||
|         # GH 20629 | ||||
|         df = DataFrame( | ||||
|             [[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"]) | ||||
|         ) | ||||
|  | ||||
|         s = df[0] | ||||
|         assert s.loc["A"] == 1 | ||||
|         assert s.at["A"] == 1 | ||||
|  | ||||
|         assert df.loc["B", 1] == 4 | ||||
|         assert df.at["B", 1] == 4 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx_values", | ||||
|         [ | ||||
|             # python types | ||||
|             [1, 2, 3], | ||||
|             [-1, -2, -3], | ||||
|             [1.5, 2.5, 3.5], | ||||
|             [-1.5, -2.5, -3.5], | ||||
|             # numpy int/uint | ||||
|             *(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES), | ||||
|             # numpy floats | ||||
|             *(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES), | ||||
|             # numpy object | ||||
|             np.array([1, "b", 3.5], dtype=object), | ||||
|             # pandas scalars | ||||
|             [Interval(1, 4), Interval(4, 6), Interval(6, 9)], | ||||
|             [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], | ||||
|             [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], | ||||
|             # pandas Integer arrays | ||||
|             *(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES), | ||||
|             # other pandas arrays | ||||
|             pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, | ||||
|             pd.date_range("2019-01-01", periods=3).array, | ||||
|             pd.timedelta_range(start="1d", periods=3).array, | ||||
|         ], | ||||
|     ) | ||||
|     def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): | ||||
|         # GH-17569 | ||||
|         cat_idx = CategoricalIndex(idx_values, ordered=ordered) | ||||
|         df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) | ||||
|         sl = slice(idx_values[0], idx_values[1]) | ||||
|  | ||||
|         # scalar selection | ||||
|         result = df.loc[idx_values[0]] | ||||
|         expected = Series(["foo"], index=["A"], name=idx_values[0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # list selection | ||||
|         result = df.loc[idx_values[:2]] | ||||
|         expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # slice selection | ||||
|         result = df.loc[sl] | ||||
|         expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # scalar assignment | ||||
|         result = df.copy() | ||||
|         result.loc[idx_values[0]] = "qux" | ||||
|         expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # list assignment | ||||
|         result = df.copy() | ||||
|         result.loc[idx_values[:2], "A"] = ["qux", "qux2"] | ||||
|         expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # slice assignment | ||||
|         result = df.copy() | ||||
|         result.loc[sl, "A"] = ["qux", "qux2"] | ||||
|         expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_categorical_with_nan(self): | ||||
|         # GH#41933 | ||||
|         ci = CategoricalIndex(["A", "B", np.nan]) | ||||
|  | ||||
|         ser = Series(range(3), index=ci) | ||||
|  | ||||
|         assert ser[np.nan] == 2 | ||||
|         assert ser.loc[np.nan] == 2 | ||||
|  | ||||
|         df = DataFrame(ser) | ||||
|         assert df.loc[np.nan, 0] == 2 | ||||
|         assert df.loc[np.nan][0] == 2 | ||||
| @ -0,0 +1,647 @@ | ||||
| from string import ascii_letters | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import ( | ||||
|     SettingWithCopyError, | ||||
|     SettingWithCopyWarning, | ||||
| ) | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| msg = "A value is trying to be set on a copy of a slice from a DataFrame" | ||||
|  | ||||
|  | ||||
| def random_text(nobs=100): | ||||
|     # Construct a DataFrame where each row is a random slice from 'letters' | ||||
|     idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2)) | ||||
|     idxs.sort(axis=1) | ||||
|     strings = [ascii_letters[x[0] : x[1]] for x in idxs] | ||||
|  | ||||
|     return DataFrame(strings, columns=["letters"]) | ||||
|  | ||||
|  | ||||
| class TestCaching: | ||||
|     def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): | ||||
|         # this is chained assignment, but will 'work' | ||||
|         with option_context("chained_assignment", None): | ||||
|             # #3970 | ||||
|             df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5}) | ||||
|  | ||||
|             # Creates a second float block | ||||
|             df["cc"] = 0.0 | ||||
|  | ||||
|             # caches a reference to the 'bb' series | ||||
|             df["bb"] | ||||
|  | ||||
|             # Assignment to wrong series | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["bb"].iloc[0] = 0.17 | ||||
|             df._clear_item_cache() | ||||
|             if not using_copy_on_write: | ||||
|                 tm.assert_almost_equal(df["bb"][0], 0.17) | ||||
|             else: | ||||
|                 # with ArrayManager, parent is not mutated with chained assignment | ||||
|                 tm.assert_almost_equal(df["bb"][0], 2.2) | ||||
|  | ||||
|     @pytest.mark.parametrize("do_ref", [True, False]) | ||||
|     def test_setitem_cache_updating(self, do_ref): | ||||
|         # GH 5424 | ||||
|         cont = ["one", "two", "three", "four", "five", "six", "seven"] | ||||
|  | ||||
|         df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) | ||||
|  | ||||
|         # ref the cache | ||||
|         if do_ref: | ||||
|             df.loc[0, "c"] | ||||
|  | ||||
|         # set it | ||||
|         df.loc[7, "c"] = 1 | ||||
|  | ||||
|         assert df.loc[0, "c"] == 0.0 | ||||
|         assert df.loc[7, "c"] == 1.0 | ||||
|  | ||||
|     def test_setitem_cache_updating_slices( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # GH 7084 | ||||
|         # not updating cache on series setting with slices | ||||
|         expected = DataFrame( | ||||
|             {"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014") | ||||
|         ) | ||||
|         out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) | ||||
|         df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]}) | ||||
|  | ||||
|         # loop through df to update out | ||||
|         six = Timestamp("5/7/2014") | ||||
|         eix = Timestamp("5/9/2014") | ||||
|         for ix, row in df.iterrows(): | ||||
|             out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"] | ||||
|  | ||||
|         tm.assert_frame_equal(out, expected) | ||||
|         tm.assert_series_equal(out["A"], expected["A"]) | ||||
|  | ||||
|         # try via a chain indexing | ||||
|         # this actually works | ||||
|         out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) | ||||
|         out_original = out.copy() | ||||
|         for ix, row in df.iterrows(): | ||||
|             v = out[row["C"]][six:eix] + row["D"] | ||||
|             with tm.raises_chained_assignment_error( | ||||
|                 (ix == 0) or warn_copy_on_write or using_copy_on_write | ||||
|             ): | ||||
|                 out[row["C"]][six:eix] = v | ||||
|  | ||||
|         if not using_copy_on_write: | ||||
|             tm.assert_frame_equal(out, expected) | ||||
|             tm.assert_series_equal(out["A"], expected["A"]) | ||||
|         else: | ||||
|             tm.assert_frame_equal(out, out_original) | ||||
|             tm.assert_series_equal(out["A"], out_original["A"]) | ||||
|  | ||||
|         out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) | ||||
|         for ix, row in df.iterrows(): | ||||
|             out.loc[six:eix, row["C"]] += row["D"] | ||||
|  | ||||
|         tm.assert_frame_equal(out, expected) | ||||
|         tm.assert_series_equal(out["A"], expected["A"]) | ||||
|  | ||||
|     def test_altering_series_clears_parent_cache( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # GH #33675 | ||||
|         df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) | ||||
|         ser = df["A"] | ||||
|  | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             assert "A" not in df._item_cache | ||||
|         else: | ||||
|             assert "A" in df._item_cache | ||||
|  | ||||
|         # Adding a new entry to ser swaps in a new array, so "A" needs to | ||||
|         #  be removed from df._item_cache | ||||
|         ser["c"] = 5 | ||||
|         assert len(ser) == 3 | ||||
|         assert "A" not in df._item_cache | ||||
|         assert df["A"] is not ser | ||||
|         assert len(df["A"]) == 2 | ||||
|  | ||||
|  | ||||
| class TestChaining: | ||||
|     def test_setitem_chained_setfault(self, using_copy_on_write): | ||||
|         # GH6026 | ||||
|         data = ["right", "left", "left", "left", "right", "left", "timeout"] | ||||
|         mdata = ["right", "left", "left", "left", "right", "left", "none"] | ||||
|  | ||||
|         df = DataFrame({"response": np.array(data)}) | ||||
|         mask = df.response == "timeout" | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df.response[mask] = "none" | ||||
|         if using_copy_on_write: | ||||
|             tm.assert_frame_equal(df, DataFrame({"response": data})) | ||||
|         else: | ||||
|             tm.assert_frame_equal(df, DataFrame({"response": mdata})) | ||||
|  | ||||
|         recarray = np.rec.fromarrays([data], names=["response"]) | ||||
|         df = DataFrame(recarray) | ||||
|         mask = df.response == "timeout" | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df.response[mask] = "none" | ||||
|         if using_copy_on_write: | ||||
|             tm.assert_frame_equal(df, DataFrame({"response": data})) | ||||
|         else: | ||||
|             tm.assert_frame_equal(df, DataFrame({"response": mdata})) | ||||
|  | ||||
|         df = DataFrame({"response": data, "response1": data}) | ||||
|         df_original = df.copy() | ||||
|         mask = df.response == "timeout" | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df.response[mask] = "none" | ||||
|         if using_copy_on_write: | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|         else: | ||||
|             tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) | ||||
|  | ||||
|         # GH 6056 | ||||
|         expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) | ||||
|         df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["A"].iloc[0] = np.nan | ||||
|         if using_copy_on_write: | ||||
|             expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) | ||||
|         else: | ||||
|             expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) | ||||
|         result = df.head() | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df.A.iloc[0] = np.nan | ||||
|         result = df.head() | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment(self, using_copy_on_write): | ||||
|         with option_context("chained_assignment", "raise"): | ||||
|             # work with the chain | ||||
|             expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) | ||||
|             df = DataFrame( | ||||
|                 np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64" | ||||
|             ) | ||||
|             df_original = df.copy() | ||||
|             assert df._is_copy is None | ||||
|  | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][0] = -5 | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][1] = -6 | ||||
|             if using_copy_on_write: | ||||
|                 tm.assert_frame_equal(df, df_original) | ||||
|             else: | ||||
|                 tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_raises( | ||||
|         self, using_array_manager, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # test with the chaining | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": Series(range(2), dtype="int64"), | ||||
|                 "B": np.array(np.arange(2, 4), dtype=np.float64), | ||||
|             } | ||||
|         ) | ||||
|         df_original = df.copy() | ||||
|         assert df._is_copy is None | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][0] = -5 | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][1] = -6 | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|         elif warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][0] = -5 | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][1] = np.nan | ||||
|         elif not using_array_manager: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 with tm.raises_chained_assignment_error(): | ||||
|                     df["A"][0] = -5 | ||||
|  | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 with tm.raises_chained_assignment_error(): | ||||
|                     df["A"][1] = np.nan | ||||
|  | ||||
|             assert df["A"]._is_copy is None | ||||
|         else: | ||||
|             # INFO(ArrayManager) for ArrayManager it doesn't matter that it's | ||||
|             # a mixed dataframe | ||||
|             df["A"][0] = -5 | ||||
|             df["A"][1] = -6 | ||||
|             expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) | ||||
|             expected["B"] = expected["B"].astype("float64") | ||||
|             tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_fails( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # Using a copy (the chain), fails | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": Series(range(2), dtype="int64"), | ||||
|                 "B": np.array(np.arange(2, 4), dtype=np.float64), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.loc[0]["A"] = -5 | ||||
|         else: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df.loc[0]["A"] = -5 | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_doc_example( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # Doc example | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "a": ["one", "one", "two", "three", "two", "one", "six"], | ||||
|                 "c": Series(range(7), dtype="int64"), | ||||
|             } | ||||
|         ) | ||||
|         assert df._is_copy is None | ||||
|  | ||||
|         indexer = df.a.str.startswith("o") | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df[indexer]["c"] = 42 | ||||
|         else: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df[indexer]["c"] = 42 | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_object_dtype( | ||||
|         self, using_array_manager, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) | ||||
|         df = DataFrame( | ||||
|             {"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]} | ||||
|         ) | ||||
|         df_original = df.copy() | ||||
|  | ||||
|         if not using_copy_on_write and not warn_copy_on_write: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df.loc[0]["A"] = 111 | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][0] = 111 | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|         elif warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["A"][0] = 111 | ||||
|             tm.assert_frame_equal(df, expected) | ||||
|         elif not using_array_manager: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 with tm.raises_chained_assignment_error(): | ||||
|                     df["A"][0] = 111 | ||||
|  | ||||
|             df.loc[0, "A"] = 111 | ||||
|             tm.assert_frame_equal(df, expected) | ||||
|         else: | ||||
|             # INFO(ArrayManager) for ArrayManager it doesn't matter that it's | ||||
|             # a mixed dataframe | ||||
|             df["A"][0] = 111 | ||||
|             tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_is_copy_pickle(self): | ||||
|         # gh-5475: Make sure that is_copy is picked up reconstruction | ||||
|         df = DataFrame({"A": [1, 2]}) | ||||
|         assert df._is_copy is None | ||||
|  | ||||
|         with tm.ensure_clean("__tmp__pickle") as path: | ||||
|             df.to_pickle(path) | ||||
|             df2 = pd.read_pickle(path) | ||||
|             df2["B"] = df2["A"] | ||||
|             df2["B"] = df2["A"] | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_setting_entire_column(self): | ||||
|         # gh-5597: a spurious raise as we are setting the entire column here | ||||
|  | ||||
|         df = random_text(100000) | ||||
|  | ||||
|         # Always a copy | ||||
|         x = df.iloc[[0, 1, 2]] | ||||
|         assert x._is_copy is not None | ||||
|  | ||||
|         x = df.iloc[[0, 1, 2, 4]] | ||||
|         assert x._is_copy is not None | ||||
|  | ||||
|         # Explicitly copy | ||||
|         indexer = df.letters.apply(lambda x: len(x) > 10) | ||||
|         df = df.loc[indexer].copy() | ||||
|  | ||||
|         assert df._is_copy is None | ||||
|         df["letters"] = df["letters"].apply(str.lower) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_implicit_take(self): | ||||
|         # Implicitly take | ||||
|         df = random_text(100000) | ||||
|         indexer = df.letters.apply(lambda x: len(x) > 10) | ||||
|         df = df.loc[indexer] | ||||
|  | ||||
|         assert df._is_copy is not None | ||||
|         df["letters"] = df["letters"].apply(str.lower) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_implicit_take2( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             pytest.skip("_is_copy is not always set for CoW") | ||||
|         # Implicitly take 2 | ||||
|         df = random_text(100000) | ||||
|         indexer = df.letters.apply(lambda x: len(x) > 10) | ||||
|  | ||||
|         df = df.loc[indexer] | ||||
|         assert df._is_copy is not None | ||||
|         df.loc[:, "letters"] = df["letters"].apply(str.lower) | ||||
|  | ||||
|         # with the enforcement of #45333 in 2.0, the .loc[:, letters] setting | ||||
|         #  is inplace, so df._is_copy remains non-None. | ||||
|         assert df._is_copy is not None | ||||
|  | ||||
|         df["letters"] = df["letters"].apply(str.lower) | ||||
|         assert df._is_copy is None | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_str(self): | ||||
|         df = random_text(100000) | ||||
|         indexer = df.letters.apply(lambda x: len(x) > 10) | ||||
|         df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_is_copy(self): | ||||
|         # an identical take, so no copy | ||||
|         df = DataFrame({"a": [1]}).dropna() | ||||
|         assert df._is_copy is None | ||||
|         df["a"] += 1 | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_sorting(self): | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) | ||||
|         ser = df.iloc[:, 0].sort_values() | ||||
|  | ||||
|         tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) | ||||
|         tm.assert_series_equal(ser, df[0].sort_values()) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_false_positives(self): | ||||
|         # see gh-6025: false positives | ||||
|         df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]}) | ||||
|         str(df) | ||||
|  | ||||
|         df["column1"] = df["column1"] + "b" | ||||
|         str(df) | ||||
|  | ||||
|         df = df[df["column2"] != 8] | ||||
|         str(df) | ||||
|  | ||||
|         df["column1"] = df["column1"] + "c" | ||||
|         str(df) | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_undefined_column( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # from SO: | ||||
|         # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc | ||||
|         df = DataFrame(np.arange(0, 9), columns=["count"]) | ||||
|         df["group"] = "b" | ||||
|         df_original = df.copy() | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.iloc[0:5]["group"] = "a" | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|         elif warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.iloc[0:5]["group"] = "a" | ||||
|         else: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 with tm.raises_chained_assignment_error(): | ||||
|                     df.iloc[0:5]["group"] = "a" | ||||
|  | ||||
|     @pytest.mark.arm_slow | ||||
|     def test_detect_chained_assignment_changing_dtype( | ||||
|         self, using_array_manager, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # Mixed type setting but same dtype & changing dtype | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": date_range("20130101", periods=5), | ||||
|                 "B": np.random.default_rng(2).standard_normal(5), | ||||
|                 "C": np.arange(5, dtype="int64"), | ||||
|                 "D": ["a", "b", "c", "d", "e"], | ||||
|             } | ||||
|         ) | ||||
|         df_original = df.copy() | ||||
|  | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.loc[2]["D"] = "foo" | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.loc[2]["C"] = "foo" | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|             with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): | ||||
|                 df["C"][2] = "foo" | ||||
|             if using_copy_on_write: | ||||
|                 tm.assert_frame_equal(df, df_original) | ||||
|             else: | ||||
|                 assert df.loc[2, "C"] == "foo" | ||||
|         else: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df.loc[2]["D"] = "foo" | ||||
|  | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df.loc[2]["C"] = "foo" | ||||
|  | ||||
|             if not using_array_manager: | ||||
|                 with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                     with tm.raises_chained_assignment_error(): | ||||
|                         df["C"][2] = "foo" | ||||
|             else: | ||||
|                 # INFO(ArrayManager) for ArrayManager it doesn't matter if it's | ||||
|                 # changing the dtype or not | ||||
|                 df["C"][2] = "foo" | ||||
|                 assert df.loc[2, "C"] == "foo" | ||||
|  | ||||
|     def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): | ||||
|         # operating on a copy | ||||
|         df = DataFrame( | ||||
|             {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} | ||||
|         ) | ||||
|         df_original = df.copy() | ||||
|         mask = pd.isna(df.c) | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df[["c"]][mask] = df[["b"]][mask] | ||||
|             tm.assert_frame_equal(df, df_original) | ||||
|         elif warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df[["c"]][mask] = df[["b"]][mask] | ||||
|         else: | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df[["c"]][mask] = df[["b"]][mask] | ||||
|  | ||||
|     def test_setting_with_copy_bug_no_warning(self): | ||||
|         # invalid warning as we are returning a new object | ||||
|         # GH 8730 | ||||
|         df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) | ||||
|         df2 = df1[["x"]] | ||||
|  | ||||
|         # this should not raise | ||||
|         df2["y"] = ["g", "h", "i"] | ||||
|  | ||||
|     def test_detect_chained_assignment_warnings_errors( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) | ||||
|         if using_copy_on_write or warn_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df.loc[0]["A"] = 111 | ||||
|             return | ||||
|  | ||||
|         with option_context("chained_assignment", "warn"): | ||||
|             with tm.assert_produces_warning(SettingWithCopyWarning): | ||||
|                 df.loc[0]["A"] = 111 | ||||
|  | ||||
|         with option_context("chained_assignment", "raise"): | ||||
|             with pytest.raises(SettingWithCopyError, match=msg): | ||||
|                 df.loc[0]["A"] = 111 | ||||
|  | ||||
|     @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) | ||||
|     def test_detect_chained_assignment_warning_stacklevel( | ||||
|         self, rhs, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # GH#42570 | ||||
|         df = DataFrame(np.arange(25).reshape(5, 5)) | ||||
|         df_original = df.copy() | ||||
|         chained = df.loc[:3] | ||||
|         with option_context("chained_assignment", "warn"): | ||||
|             if not using_copy_on_write and not warn_copy_on_write: | ||||
|                 with tm.assert_produces_warning(SettingWithCopyWarning) as t: | ||||
|                     chained[2] = rhs | ||||
|                     assert t[0].filename == __file__ | ||||
|             else: | ||||
|                 # INFO(CoW) no warning, and original dataframe not changed | ||||
|                 chained[2] = rhs | ||||
|                 tm.assert_frame_equal(df, df_original) | ||||
|  | ||||
|     # TODO(ArrayManager) fast_xs with array-like scalars is not yet working | ||||
|     @td.skip_array_manager_not_yet_implemented | ||||
|     def test_chained_getitem_with_lists(self): | ||||
|         # GH6394 | ||||
|         # Regression in chained getitem indexing with embedded list-like from | ||||
|         # 0.12 | ||||
|  | ||||
|         df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]}) | ||||
|         expected = df["A"].iloc[2] | ||||
|         result = df.loc[2, "A"] | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|         result2 = df.iloc[2]["A"] | ||||
|         tm.assert_numpy_array_equal(result2, expected) | ||||
|         result3 = df["A"].loc[2] | ||||
|         tm.assert_numpy_array_equal(result3, expected) | ||||
|         result4 = df["A"].iloc[2] | ||||
|         tm.assert_numpy_array_equal(result4, expected) | ||||
|  | ||||
|     def test_cache_updating(self): | ||||
|         # GH 4939, make sure to update the cache on setitem | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.zeros((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|         ) | ||||
|         df["A"]  # cache series | ||||
|         df.loc["Hello Friend"] = df.iloc[0] | ||||
|         assert "Hello Friend" in df["A"].index | ||||
|         assert "Hello Friend" in df["B"].index | ||||
|  | ||||
|     def test_cache_updating2(self, using_copy_on_write): | ||||
|         # 10264 | ||||
|         df = DataFrame( | ||||
|             np.zeros((5, 5), dtype="int64"), | ||||
|             columns=["a", "b", "c", "d", "e"], | ||||
|             index=range(5), | ||||
|         ) | ||||
|         df["f"] = 0 | ||||
|         df_orig = df.copy() | ||||
|         if using_copy_on_write: | ||||
|             with pytest.raises(ValueError, match="read-only"): | ||||
|                 df.f.values[3] = 1 | ||||
|             tm.assert_frame_equal(df, df_orig) | ||||
|             return | ||||
|  | ||||
|         df.f.values[3] = 1 | ||||
|  | ||||
|         df.f.values[3] = 2 | ||||
|         expected = DataFrame( | ||||
|             np.zeros((5, 6), dtype="int64"), | ||||
|             columns=["a", "b", "c", "d", "e", "f"], | ||||
|             index=range(5), | ||||
|         ) | ||||
|         expected.at[3, "f"] = 2 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|         expected = Series([0, 0, 0, 2, 0], name="f") | ||||
|         tm.assert_series_equal(df.f, expected) | ||||
|  | ||||
|     def test_iloc_setitem_chained_assignment(self, using_copy_on_write): | ||||
|         # GH#3970 | ||||
|         with option_context("chained_assignment", None): | ||||
|             df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) | ||||
|             df["cc"] = 0.0 | ||||
|  | ||||
|             ck = [True] * len(df) | ||||
|  | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["bb"].iloc[0] = 0.13 | ||||
|  | ||||
|             # GH#3970 this lookup used to break the chained setting to 0.15 | ||||
|             df.iloc[ck] | ||||
|  | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["bb"].iloc[0] = 0.15 | ||||
|  | ||||
|             if not using_copy_on_write: | ||||
|                 assert df["bb"].iloc[0] == 0.15 | ||||
|             else: | ||||
|                 assert df["bb"].iloc[0] == 2.2 | ||||
|  | ||||
|     def test_getitem_loc_assignment_slice_state(self): | ||||
|         # GH 13569 | ||||
|         df = DataFrame({"a": [10, 20, 30]}) | ||||
|         with tm.raises_chained_assignment_error(): | ||||
|             df["a"].loc[4] = 40 | ||||
|         tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]})) | ||||
|         tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a")) | ||||
| @ -0,0 +1,105 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.indexers import check_array_indexer | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer, expected", | ||||
|     [ | ||||
|         # integer | ||||
|         ([1, 2], np.array([1, 2], dtype=np.intp)), | ||||
|         (np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)), | ||||
|         (pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)), | ||||
|         (pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)), | ||||
|         # boolean | ||||
|         ([True, False, True], np.array([True, False, True], dtype=np.bool_)), | ||||
|         (np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)), | ||||
|         ( | ||||
|             pd.array([True, False, True], dtype="boolean"), | ||||
|             np.array([True, False, True], dtype=np.bool_), | ||||
|         ), | ||||
|         # other | ||||
|         ([], np.array([], dtype=np.intp)), | ||||
|     ], | ||||
| ) | ||||
| def test_valid_input(indexer, expected): | ||||
|     arr = np.array([1, 2, 3]) | ||||
|     result = check_array_indexer(arr, indexer) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")] | ||||
| ) | ||||
| def test_boolean_na_returns_indexer(indexer): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31503 | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     result = check_array_indexer(arr, indexer) | ||||
|     expected = np.array([True, False, False], dtype=bool) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", | ||||
|     [ | ||||
|         [True, False], | ||||
|         pd.array([True, False], dtype="boolean"), | ||||
|         np.array([True, False], dtype=np.bool_), | ||||
|     ], | ||||
| ) | ||||
| def test_bool_raise_length(indexer): | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     msg = "Boolean index has wrong length" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         check_array_indexer(arr, indexer) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")] | ||||
| ) | ||||
| def test_int_raise_missing_values(indexer): | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     msg = "Cannot index with an integer indexer containing NA values" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         check_array_indexer(arr, indexer) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "indexer", | ||||
|     [ | ||||
|         [0.0, 1.0], | ||||
|         np.array([1.0, 2.0], dtype="float64"), | ||||
|         np.array([True, False], dtype=object), | ||||
|         pd.Index([True, False], dtype=object), | ||||
|     ], | ||||
| ) | ||||
| def test_raise_invalid_array_dtypes(indexer): | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     msg = "arrays used as indices must be of integer or boolean type" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         check_array_indexer(arr, indexer) | ||||
|  | ||||
|  | ||||
| def test_raise_nullable_string_dtype(nullable_string_dtype): | ||||
|     indexer = pd.array(["a", "b"], dtype=nullable_string_dtype) | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     msg = "arrays used as indices must be of integer or boolean type" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         check_array_indexer(arr, indexer) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)]) | ||||
| def test_pass_through_non_array_likes(indexer): | ||||
|     arr = np.array([1, 2, 3]) | ||||
|  | ||||
|     result = check_array_indexer(arr, indexer) | ||||
|     assert result == indexer | ||||
| @ -0,0 +1,941 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import ( | ||||
|     IS64, | ||||
|     is_platform_windows, | ||||
| ) | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| ############################################################### | ||||
| # Index / Series common tests which may trigger dtype coercions | ||||
| ############################################################### | ||||
|  | ||||
|  | ||||
| @pytest.fixture(autouse=True, scope="class") | ||||
| def check_comprehensiveness(request): | ||||
|     # Iterate over combination of dtype, method and klass | ||||
|     # and ensure that each are contained within a collected test | ||||
|     cls = request.cls | ||||
|     combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) | ||||
|  | ||||
|     def has_test(combo): | ||||
|         klass, dtype, method = combo | ||||
|         cls_funcs = request.node.session.items | ||||
|         return any( | ||||
|             klass in x.name and dtype in x.name and method in x.name for x in cls_funcs | ||||
|         ) | ||||
|  | ||||
|     opts = request.config.option | ||||
|     if opts.lf or opts.keyword: | ||||
|         # If we are running with "last-failed" or -k foo, we expect to only | ||||
|         #  run a subset of tests. | ||||
|         yield | ||||
|  | ||||
|     else: | ||||
|         for combo in combos: | ||||
|             if not has_test(combo): | ||||
|                 raise AssertionError( | ||||
|                     f"test method is not defined: {cls.__name__}, {combo}" | ||||
|                 ) | ||||
|  | ||||
|         yield | ||||
|  | ||||
|  | ||||
| class CoercionBase: | ||||
|     klasses = ["index", "series"] | ||||
|     dtypes = [ | ||||
|         "object", | ||||
|         "int64", | ||||
|         "float64", | ||||
|         "complex128", | ||||
|         "bool", | ||||
|         "datetime64", | ||||
|         "datetime64tz", | ||||
|         "timedelta64", | ||||
|         "period", | ||||
|     ] | ||||
|  | ||||
|     @property | ||||
|     def method(self): | ||||
|         raise NotImplementedError(self) | ||||
|  | ||||
|  | ||||
| class TestSetitemCoercion(CoercionBase): | ||||
|     method = "setitem" | ||||
|  | ||||
|     # disable comprehensiveness tests, as most of these have been moved to | ||||
|     #  tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses. | ||||
|     klasses: list[str] = [] | ||||
|  | ||||
|     def test_setitem_series_no_coercion_from_values_list(self): | ||||
|         # GH35865 - int casted to str when internally calling np.array(ser.values) | ||||
|         ser = pd.Series(["a", 1]) | ||||
|         ser[:] = list(ser.values) | ||||
|  | ||||
|         expected = pd.Series(["a", 1]) | ||||
|  | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def _assert_setitem_index_conversion( | ||||
|         self, original_series, loc_key, expected_index, expected_dtype | ||||
|     ): | ||||
|         """test index's coercion triggered by assign key""" | ||||
|         temp = original_series.copy() | ||||
|         # GH#33469 pre-2.0 with int loc_key and temp.index.dtype == np.float64 | ||||
|         #  `temp[loc_key] = 5` treated loc_key as positional | ||||
|         temp[loc_key] = 5 | ||||
|         exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) | ||||
|         tm.assert_series_equal(temp, exp) | ||||
|         # check dtype explicitly for sure | ||||
|         assert temp.index.dtype == expected_dtype | ||||
|  | ||||
|         temp = original_series.copy() | ||||
|         temp.loc[loc_key] = 5 | ||||
|         exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) | ||||
|         tm.assert_series_equal(temp, exp) | ||||
|         # check dtype explicitly for sure | ||||
|         assert temp.index.dtype == expected_dtype | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)] | ||||
|     ) | ||||
|     def test_setitem_index_object(self, val, exp_dtype): | ||||
|         obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object)) | ||||
|         assert obj.index.dtype == object | ||||
|  | ||||
|         if exp_dtype is IndexError: | ||||
|             temp = obj.copy() | ||||
|             warn_msg = "Series.__setitem__ treating keys as positions is deprecated" | ||||
|             msg = "index 5 is out of bounds for axis 0 with size 4" | ||||
|             with pytest.raises(exp_dtype, match=msg): | ||||
|                 with tm.assert_produces_warning(FutureWarning, match=warn_msg): | ||||
|                     temp[5] = 5 | ||||
|         else: | ||||
|             exp_index = pd.Index(list("abcd") + [val], dtype=object) | ||||
|             self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)] | ||||
|     ) | ||||
|     def test_setitem_index_int64(self, val, exp_dtype): | ||||
|         obj = pd.Series([1, 2, 3, 4]) | ||||
|         assert obj.index.dtype == np.int64 | ||||
|  | ||||
|         exp_index = pd.Index([0, 1, 2, 3, val]) | ||||
|         self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "val,exp_dtype", [(5, np.float64), (5.1, np.float64), ("x", object)] | ||||
|     ) | ||||
|     def test_setitem_index_float64(self, val, exp_dtype, request): | ||||
|         obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) | ||||
|         assert obj.index.dtype == np.float64 | ||||
|  | ||||
|         exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val]) | ||||
|         self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_series_period(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_complex128(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_bool(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_datetime64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_datetime64tz(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_timedelta64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_setitem_index_period(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|  | ||||
| class TestInsertIndexCoercion(CoercionBase): | ||||
|     klasses = ["index"] | ||||
|     method = "insert" | ||||
|  | ||||
|     def _assert_insert_conversion(self, original, value, expected, expected_dtype): | ||||
|         """test coercion triggered by insert""" | ||||
|         target = original.copy() | ||||
|         res = target.insert(1, value) | ||||
|         tm.assert_index_equal(res, expected) | ||||
|         assert res.dtype == expected_dtype | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "insert, coerced_val, coerced_dtype", | ||||
|         [ | ||||
|             (1, 1, object), | ||||
|             (1.1, 1.1, object), | ||||
|             (False, False, object), | ||||
|             ("x", "x", object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert_index_object(self, insert, coerced_val, coerced_dtype): | ||||
|         obj = pd.Index(list("abcd"), dtype=object) | ||||
|         assert obj.dtype == object | ||||
|  | ||||
|         exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object) | ||||
|         self._assert_insert_conversion(obj, insert, exp, coerced_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "insert, coerced_val, coerced_dtype", | ||||
|         [ | ||||
|             (1, 1, None), | ||||
|             (1.1, 1.1, np.float64), | ||||
|             (False, False, object),  # GH#36319 | ||||
|             ("x", "x", object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert_int_index( | ||||
|         self, any_int_numpy_dtype, insert, coerced_val, coerced_dtype | ||||
|     ): | ||||
|         dtype = any_int_numpy_dtype | ||||
|         obj = pd.Index([1, 2, 3, 4], dtype=dtype) | ||||
|         coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype | ||||
|  | ||||
|         exp = pd.Index([1, coerced_val, 2, 3, 4], dtype=coerced_dtype) | ||||
|         self._assert_insert_conversion(obj, insert, exp, coerced_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "insert, coerced_val, coerced_dtype", | ||||
|         [ | ||||
|             (1, 1.0, None), | ||||
|             # When float_numpy_dtype=float32, this is not the case | ||||
|             # see the correction below | ||||
|             (1.1, 1.1, np.float64), | ||||
|             (False, False, object),  # GH#36319 | ||||
|             ("x", "x", object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert_float_index( | ||||
|         self, float_numpy_dtype, insert, coerced_val, coerced_dtype | ||||
|     ): | ||||
|         dtype = float_numpy_dtype | ||||
|         obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype) | ||||
|         coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype | ||||
|  | ||||
|         if np_version_gt2 and dtype == "float32" and coerced_val == 1.1: | ||||
|             # Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32 | ||||
|             # the expected dtype will be float32 if the original dtype was float32 | ||||
|             coerced_dtype = np.float32 | ||||
|         exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype) | ||||
|         self._assert_insert_conversion(obj, insert, exp, coerced_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [ | ||||
|             (pd.Timestamp("2012-01-01"), "datetime64[ns]"), | ||||
|             (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), | ||||
|         ], | ||||
|         ids=["datetime64", "datetime64tz"], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "insert_value", | ||||
|         [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1], | ||||
|     ) | ||||
|     def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): | ||||
|         obj = pd.DatetimeIndex( | ||||
|             ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz | ||||
|         ).as_unit("ns") | ||||
|         assert obj.dtype == exp_dtype | ||||
|  | ||||
|         exp = pd.DatetimeIndex( | ||||
|             ["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"], | ||||
|             tz=fill_val.tz, | ||||
|         ).as_unit("ns") | ||||
|         self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) | ||||
|  | ||||
|         if fill_val.tz: | ||||
|             # mismatched tzawareness | ||||
|             ts = pd.Timestamp("2012-01-01") | ||||
|             result = obj.insert(1, ts) | ||||
|             expected = obj.astype(object).insert(1, ts) | ||||
|             assert expected.dtype == object | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|             ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") | ||||
|             result = obj.insert(1, ts) | ||||
|             # once deprecation is enforced: | ||||
|             expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) | ||||
|             assert expected.dtype == obj.dtype | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         else: | ||||
|             # mismatched tzawareness | ||||
|             ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") | ||||
|             result = obj.insert(1, ts) | ||||
|             expected = obj.astype(object).insert(1, ts) | ||||
|             assert expected.dtype == object | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         item = 1 | ||||
|         result = obj.insert(1, item) | ||||
|         expected = obj.astype(object).insert(1, item) | ||||
|         assert expected[1] == item | ||||
|         assert expected.dtype == object | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_index_timedelta64(self): | ||||
|         obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) | ||||
|         assert obj.dtype == "timedelta64[ns]" | ||||
|  | ||||
|         # timedelta64 + timedelta64 => timedelta64 | ||||
|         exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"]) | ||||
|         self._assert_insert_conversion( | ||||
|             obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" | ||||
|         ) | ||||
|  | ||||
|         for item in [pd.Timestamp("2012-01-01"), 1]: | ||||
|             result = obj.insert(1, item) | ||||
|             expected = obj.astype(object).insert(1, item) | ||||
|             assert expected.dtype == object | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "insert, coerced_val, coerced_dtype", | ||||
|         [ | ||||
|             (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), | ||||
|             (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object), | ||||
|             (1, 1, object), | ||||
|             ("x", "x", object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert_index_period(self, insert, coerced_val, coerced_dtype): | ||||
|         obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M") | ||||
|         assert obj.dtype == "period[M]" | ||||
|  | ||||
|         data = [ | ||||
|             pd.Period("2011-01", freq="M"), | ||||
|             coerced_val, | ||||
|             pd.Period("2011-02", freq="M"), | ||||
|             pd.Period("2011-03", freq="M"), | ||||
|             pd.Period("2011-04", freq="M"), | ||||
|         ] | ||||
|         if isinstance(insert, pd.Period): | ||||
|             exp = pd.PeriodIndex(data, freq="M") | ||||
|             self._assert_insert_conversion(obj, insert, exp, coerced_dtype) | ||||
|  | ||||
|             # string that can be parsed to appropriate PeriodDtype | ||||
|             self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype) | ||||
|  | ||||
|         else: | ||||
|             result = obj.insert(0, insert) | ||||
|             expected = obj.astype(object).insert(0, insert) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|             # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M" | ||||
|             #  casts that string to Period[M], not clear that is desirable | ||||
|             if not isinstance(insert, pd.Timestamp): | ||||
|                 # non-castable string | ||||
|                 result = obj.insert(0, str(insert)) | ||||
|                 expected = obj.astype(object).insert(0, str(insert)) | ||||
|                 tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_insert_index_complex128(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_insert_index_bool(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|  | ||||
| class TestWhereCoercion(CoercionBase): | ||||
|     method = "where" | ||||
|     _cond = np.array([True, False, True, False]) | ||||
|  | ||||
|     def _assert_where_conversion( | ||||
|         self, original, cond, values, expected, expected_dtype | ||||
|     ): | ||||
|         """test coercion triggered by where""" | ||||
|         target = original.copy() | ||||
|         res = target.where(cond, values) | ||||
|         tm.assert_equal(res, expected) | ||||
|         assert res.dtype == expected_dtype | ||||
|  | ||||
|     def _construct_exp(self, obj, klass, fill_val, exp_dtype): | ||||
|         if fill_val is True: | ||||
|             values = klass([True, False, True, True]) | ||||
|         elif isinstance(fill_val, (datetime, np.datetime64)): | ||||
|             values = pd.date_range(fill_val, periods=4) | ||||
|         else: | ||||
|             values = klass(x * fill_val for x in [5, 6, 7, 8]) | ||||
|  | ||||
|         exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype) | ||||
|         return values, exp | ||||
|  | ||||
|     def _run_test(self, obj, fill_val, klass, exp_dtype): | ||||
|         cond = klass(self._cond) | ||||
|  | ||||
|         exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype) | ||||
|         self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) | ||||
|  | ||||
|         values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype) | ||||
|         self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [(1, object), (1.1, object), (1 + 1j, object), (True, object)], | ||||
|     ) | ||||
|     def test_where_object(self, index_or_series, fill_val, exp_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass(list("abcd"), dtype=object) | ||||
|         assert obj.dtype == object | ||||
|         self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], | ||||
|     ) | ||||
|     def test_where_int64(self, index_or_series, fill_val, exp_dtype, request): | ||||
|         klass = index_or_series | ||||
|  | ||||
|         obj = klass([1, 2, 3, 4]) | ||||
|         assert obj.dtype == np.int64 | ||||
|         self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val, exp_dtype", | ||||
|         [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], | ||||
|     ) | ||||
|     def test_where_float64(self, index_or_series, fill_val, exp_dtype, request): | ||||
|         klass = index_or_series | ||||
|  | ||||
|         obj = klass([1.1, 2.2, 3.3, 4.4]) | ||||
|         assert obj.dtype == np.float64 | ||||
|         self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [ | ||||
|             (1, np.complex128), | ||||
|             (1.1, np.complex128), | ||||
|             (1 + 1j, np.complex128), | ||||
|             (True, object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_where_complex128(self, index_or_series, fill_val, exp_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128) | ||||
|         assert obj.dtype == np.complex128 | ||||
|         self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)], | ||||
|     ) | ||||
|     def test_where_series_bool(self, index_or_series, fill_val, exp_dtype): | ||||
|         klass = index_or_series | ||||
|  | ||||
|         obj = klass([True, False, True, False]) | ||||
|         assert obj.dtype == np.bool_ | ||||
|         self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,exp_dtype", | ||||
|         [ | ||||
|             (pd.Timestamp("2012-01-01"), "datetime64[ns]"), | ||||
|             (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), | ||||
|         ], | ||||
|         ids=["datetime64", "datetime64tz"], | ||||
|     ) | ||||
|     def test_where_datetime64(self, index_or_series, fill_val, exp_dtype): | ||||
|         klass = index_or_series | ||||
|  | ||||
|         obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None)) | ||||
|         assert obj.dtype == "datetime64[ns]" | ||||
|  | ||||
|         fv = fill_val | ||||
|         # do the check with each of the available datetime scalars | ||||
|         if exp_dtype == "datetime64[ns]": | ||||
|             for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]: | ||||
|                 self._run_test(obj, scalar, klass, exp_dtype) | ||||
|         else: | ||||
|             for scalar in [fv, fv.to_pydatetime()]: | ||||
|                 self._run_test(obj, fill_val, klass, exp_dtype) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_where_index_complex128(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_where_index_bool(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_where_series_timedelta64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_where_series_period(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")] | ||||
|     ) | ||||
|     def test_where_index_timedelta64(self, value): | ||||
|         tdi = pd.timedelta_range("1 Day", periods=4) | ||||
|         cond = np.array([True, False, False, True]) | ||||
|  | ||||
|         expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"]) | ||||
|         result = tdi.where(cond, value) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # wrong-dtyped NaT | ||||
|         dtnat = np.datetime64("NaT", "ns") | ||||
|         expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object) | ||||
|         assert expected[1] is dtnat | ||||
|  | ||||
|         result = tdi.where(cond, dtnat) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_index_period(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3, freq="QS") | ||||
|         pi = dti.to_period("Q") | ||||
|  | ||||
|         cond = np.array([False, True, False]) | ||||
|  | ||||
|         # Passing a valid scalar | ||||
|         value = pi[-1] + pi.freq * 10 | ||||
|         expected = pd.PeriodIndex([value, pi[1], value]) | ||||
|         result = pi.where(cond, value) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # Case passing ndarray[object] of Periods | ||||
|         other = np.asarray(pi + pi.freq * 10, dtype=object) | ||||
|         result = pi.where(cond, other) | ||||
|         expected = pd.PeriodIndex([other[0], pi[1], other[2]]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # Passing a mismatched scalar -> casts to object | ||||
|         td = pd.Timedelta(days=4) | ||||
|         expected = pd.Index([td, pi[1], td], dtype=object) | ||||
|         result = pi.where(cond, td) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         per = pd.Period("2020-04-21", "D") | ||||
|         expected = pd.Index([per, pi[1], per], dtype=object) | ||||
|         result = pi.where(cond, per) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestFillnaSeriesCoercion(CoercionBase): | ||||
|     # not indexing, but place here for consistency | ||||
|  | ||||
|     method = "fillna" | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_has_comprehensive_tests(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     def _assert_fillna_conversion(self, original, value, expected, expected_dtype): | ||||
|         """test coercion triggered by fillna""" | ||||
|         target = original.copy() | ||||
|         res = target.fillna(value) | ||||
|         tm.assert_equal(res, expected) | ||||
|         assert res.dtype == expected_dtype | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val, fill_dtype", | ||||
|         [(1, object), (1.1, object), (1 + 1j, object), (True, object)], | ||||
|     ) | ||||
|     def test_fillna_object(self, index_or_series, fill_val, fill_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass(["a", np.nan, "c", "d"], dtype=object) | ||||
|         assert obj.dtype == object | ||||
|  | ||||
|         exp = klass(["a", fill_val, "c", "d"], dtype=object) | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,fill_dtype", | ||||
|         [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], | ||||
|     ) | ||||
|     def test_fillna_float64(self, index_or_series, fill_val, fill_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass([1.1, np.nan, 3.3, 4.4]) | ||||
|         assert obj.dtype == np.float64 | ||||
|  | ||||
|         exp = klass([1.1, fill_val, 3.3, 4.4]) | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,fill_dtype", | ||||
|         [ | ||||
|             (1, np.complex128), | ||||
|             (1.1, np.complex128), | ||||
|             (1 + 1j, np.complex128), | ||||
|             (True, object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128) | ||||
|         assert obj.dtype == np.complex128 | ||||
|  | ||||
|         exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,fill_dtype", | ||||
|         [ | ||||
|             (pd.Timestamp("2012-01-01"), "datetime64[ns]"), | ||||
|             (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), | ||||
|             (1, object), | ||||
|             ("x", object), | ||||
|         ], | ||||
|         ids=["datetime64", "datetime64tz", "object", "object"], | ||||
|     ) | ||||
|     def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): | ||||
|         klass = index_or_series | ||||
|         obj = klass( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01"), | ||||
|                 pd.NaT, | ||||
|                 pd.Timestamp("2011-01-03"), | ||||
|                 pd.Timestamp("2011-01-04"), | ||||
|             ] | ||||
|         ) | ||||
|         assert obj.dtype == "datetime64[ns]" | ||||
|  | ||||
|         exp = klass( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01"), | ||||
|                 fill_val, | ||||
|                 pd.Timestamp("2011-01-03"), | ||||
|                 pd.Timestamp("2011-01-04"), | ||||
|             ] | ||||
|         ) | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val,fill_dtype", | ||||
|         [ | ||||
|             (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), | ||||
|             (pd.Timestamp("2012-01-01"), object), | ||||
|             # pre-2.0 with a mismatched tz we would get object result | ||||
|             (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"), | ||||
|             (1, object), | ||||
|             ("x", object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): | ||||
|         klass = index_or_series | ||||
|         tz = "US/Eastern" | ||||
|  | ||||
|         obj = klass( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01", tz=tz), | ||||
|                 pd.NaT, | ||||
|                 pd.Timestamp("2011-01-03", tz=tz), | ||||
|                 pd.Timestamp("2011-01-04", tz=tz), | ||||
|             ] | ||||
|         ) | ||||
|         assert obj.dtype == "datetime64[ns, US/Eastern]" | ||||
|  | ||||
|         if getattr(fill_val, "tz", None) is None: | ||||
|             fv = fill_val | ||||
|         else: | ||||
|             fv = fill_val.tz_convert(tz) | ||||
|         exp = klass( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01", tz=tz), | ||||
|                 fv, | ||||
|                 pd.Timestamp("2011-01-03", tz=tz), | ||||
|                 pd.Timestamp("2011-01-04", tz=tz), | ||||
|             ] | ||||
|         ) | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val", | ||||
|         [ | ||||
|             1, | ||||
|             1.1, | ||||
|             1 + 1j, | ||||
|             True, | ||||
|             pd.Interval(1, 2, closed="left"), | ||||
|             pd.Timestamp("2012-01-01", tz="US/Eastern"), | ||||
|             pd.Timestamp("2012-01-01"), | ||||
|             pd.Timedelta(days=1), | ||||
|             pd.Period("2016-01-01", "D"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_interval(self, index_or_series, fill_val): | ||||
|         ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan) | ||||
|         assert isinstance(ii.dtype, pd.IntervalDtype) | ||||
|         obj = index_or_series(ii) | ||||
|  | ||||
|         exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object) | ||||
|  | ||||
|         fill_dtype = object | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_series_int64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_index_int64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_series_bool(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_index_bool(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_series_timedelta64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_val", | ||||
|         [ | ||||
|             1, | ||||
|             1.1, | ||||
|             1 + 1j, | ||||
|             True, | ||||
|             pd.Interval(1, 2, closed="left"), | ||||
|             pd.Timestamp("2012-01-01", tz="US/Eastern"), | ||||
|             pd.Timestamp("2012-01-01"), | ||||
|             pd.Timedelta(days=1), | ||||
|             pd.Period("2016-01-01", "W"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_series_period(self, index_or_series, fill_val): | ||||
|         pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT) | ||||
|         assert isinstance(pi.dtype, pd.PeriodDtype) | ||||
|         obj = index_or_series(pi) | ||||
|  | ||||
|         exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object) | ||||
|  | ||||
|         fill_dtype = object | ||||
|         self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_index_timedelta64(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_fillna_index_period(self): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|  | ||||
| class TestReplaceSeriesCoercion(CoercionBase): | ||||
|     klasses = ["series"] | ||||
|     method = "replace" | ||||
|  | ||||
|     rep: dict[str, list] = {} | ||||
|     rep["object"] = ["a", "b"] | ||||
|     rep["int64"] = [4, 5] | ||||
|     rep["float64"] = [1.1, 2.2] | ||||
|     rep["complex128"] = [1 + 1j, 2 + 2j] | ||||
|     rep["bool"] = [True, False] | ||||
|     rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")] | ||||
|  | ||||
|     for tz in ["UTC", "US/Eastern"]: | ||||
|         # to test tz => different tz replacement | ||||
|         key = f"datetime64[ns, {tz}]" | ||||
|         rep[key] = [ | ||||
|             pd.Timestamp("2011-01-01", tz=tz), | ||||
|             pd.Timestamp("2011-01-03", tz=tz), | ||||
|         ] | ||||
|  | ||||
|     rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] | ||||
|  | ||||
|     @pytest.fixture(params=["dict", "series"]) | ||||
|     def how(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[ | ||||
|             "object", | ||||
|             "int64", | ||||
|             "float64", | ||||
|             "complex128", | ||||
|             "bool", | ||||
|             "datetime64[ns]", | ||||
|             "datetime64[ns, UTC]", | ||||
|             "datetime64[ns, US/Eastern]", | ||||
|             "timedelta64[ns]", | ||||
|         ] | ||||
|     ) | ||||
|     def from_key(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[ | ||||
|             "object", | ||||
|             "int64", | ||||
|             "float64", | ||||
|             "complex128", | ||||
|             "bool", | ||||
|             "datetime64[ns]", | ||||
|             "datetime64[ns, UTC]", | ||||
|             "datetime64[ns, US/Eastern]", | ||||
|             "timedelta64[ns]", | ||||
|         ], | ||||
|         ids=[ | ||||
|             "object", | ||||
|             "int64", | ||||
|             "float64", | ||||
|             "complex128", | ||||
|             "bool", | ||||
|             "datetime64", | ||||
|             "datetime64tz", | ||||
|             "datetime64tz", | ||||
|             "timedelta64", | ||||
|         ], | ||||
|     ) | ||||
|     def to_key(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def replacer(self, how, from_key, to_key): | ||||
|         """ | ||||
|         Object we will pass to `Series.replace` | ||||
|         """ | ||||
|         if how == "dict": | ||||
|             replacer = dict(zip(self.rep[from_key], self.rep[to_key])) | ||||
|         elif how == "series": | ||||
|             replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) | ||||
|         else: | ||||
|             raise ValueError | ||||
|         return replacer | ||||
|  | ||||
|     def test_replace_series(self, how, to_key, from_key, replacer, using_infer_string): | ||||
|         index = pd.Index([3, 4], name="xxx") | ||||
|         obj = pd.Series(self.rep[from_key], index=index, name="yyy") | ||||
|         obj = obj.astype(from_key) | ||||
|         assert obj.dtype == from_key | ||||
|  | ||||
|         if from_key.startswith("datetime") and to_key.startswith("datetime"): | ||||
|             # tested below | ||||
|             return | ||||
|         elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]: | ||||
|             # tested below | ||||
|             return | ||||
|  | ||||
|         if (from_key == "float64" and to_key in ("int64")) or ( | ||||
|             from_key == "complex128" and to_key in ("int64", "float64") | ||||
|         ): | ||||
|             if not IS64 or is_platform_windows(): | ||||
|                 pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}") | ||||
|  | ||||
|             # Expected: do not downcast by replacement | ||||
|             exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key) | ||||
|  | ||||
|         else: | ||||
|             exp = pd.Series(self.rep[to_key], index=index, name="yyy") | ||||
|  | ||||
|         if using_infer_string and exp.dtype == "string": | ||||
|             # with infer_string, we disable the deprecated downcasting behavior | ||||
|             exp = exp.astype(object) | ||||
|  | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         warn = FutureWarning | ||||
|         if ( | ||||
|             exp.dtype == obj.dtype | ||||
|             or exp.dtype == object | ||||
|             or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc") | ||||
|         ): | ||||
|             warn = None | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = obj.replace(replacer) | ||||
|  | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "to_key", | ||||
|         ["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"], | ||||
|         indirect=True, | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True | ||||
|     ) | ||||
|     def test_replace_series_datetime_tz( | ||||
|         self, how, to_key, from_key, replacer, using_infer_string | ||||
|     ): | ||||
|         index = pd.Index([3, 4], name="xyz") | ||||
|         obj = pd.Series(self.rep[from_key], index=index, name="yyy") | ||||
|         assert obj.dtype == from_key | ||||
|  | ||||
|         exp = pd.Series(self.rep[to_key], index=index, name="yyy") | ||||
|         if using_infer_string and exp.dtype == "string": | ||||
|             # with infer_string, we disable the deprecated downcasting behavior | ||||
|             exp = exp.astype(object) | ||||
|         else: | ||||
|             assert exp.dtype == to_key | ||||
|  | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         warn = FutureWarning if exp.dtype != object else None | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = obj.replace(replacer) | ||||
|  | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "to_key", | ||||
|         ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], | ||||
|         indirect=True, | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "from_key", | ||||
|         ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], | ||||
|         indirect=True, | ||||
|     ) | ||||
|     def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer): | ||||
|         index = pd.Index([3, 4], name="xyz") | ||||
|         obj = pd.Series(self.rep[from_key], index=index, name="yyy") | ||||
|         assert obj.dtype == from_key | ||||
|  | ||||
|         exp = pd.Series(self.rep[to_key], index=index, name="yyy") | ||||
|         warn = FutureWarning | ||||
|         if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance( | ||||
|             exp.dtype, pd.DatetimeTZDtype | ||||
|         ): | ||||
|             # with mismatched tzs, we retain the original dtype as of 2.0 | ||||
|             exp = exp.astype(obj.dtype) | ||||
|             warn = None | ||||
|         else: | ||||
|             assert exp.dtype == to_key | ||||
|             if to_key == from_key: | ||||
|                 warn = None | ||||
|  | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = obj.replace(replacer) | ||||
|  | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="Test not implemented") | ||||
|     def test_replace_series_period(self): | ||||
|         raise NotImplementedError | ||||
| @ -0,0 +1,191 @@ | ||||
| import re | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndex: | ||||
|     def test_get_loc_naive_dti_aware_str_deprecated(self): | ||||
|         # GH#46903 | ||||
|         ts = Timestamp("20130101")._value | ||||
|         dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)]) | ||||
|         ser = Series(range(100), index=dti) | ||||
|  | ||||
|         key = "2013-01-01 00:00:00.000000050+0000" | ||||
|         msg = re.escape(repr(key)) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser[key] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             dti.get_loc(key) | ||||
|  | ||||
|     def test_indexing_with_datetime_tz(self): | ||||
|         # GH#8260 | ||||
|         # support datetime64 with tz | ||||
|  | ||||
|         idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") | ||||
|         dr = date_range("20130110", periods=3) | ||||
|         df = DataFrame({"A": idx, "B": dr}) | ||||
|         df["C"] = idx | ||||
|         df.iloc[1, 1] = pd.NaT | ||||
|         df.iloc[1, 2] = pd.NaT | ||||
|  | ||||
|         expected = Series( | ||||
|             [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], | ||||
|             index=list("ABC"), | ||||
|             dtype="object", | ||||
|             name=1, | ||||
|         ) | ||||
|  | ||||
|         # indexing | ||||
|         result = df.iloc[1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         result = df.loc[1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_indexing_fast_xs(self): | ||||
|         # indexing - fast_xs | ||||
|         df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) | ||||
|         result = df.iloc[5] | ||||
|         expected = Series( | ||||
|             [Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], | ||||
|             index=["a"], | ||||
|             name=5, | ||||
|             dtype="M8[ns, UTC]", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.loc[5] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # indexing - boolean | ||||
|         result = df[df.a > df.a[3]] | ||||
|         expected = df.iloc[4:] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_consistency_with_tz_aware_scalar(self): | ||||
|         # xef gh-12938 | ||||
|         # various ways of indexing the same tz-aware scalar | ||||
|         df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame() | ||||
|  | ||||
|         df = pd.concat([df, df]).reset_index(drop=True) | ||||
|         expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels") | ||||
|  | ||||
|         result = df[0][0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df.iloc[0, 0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df.loc[0, 0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df.iat[0, 0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df.at[0, 0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df[0].loc[0] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df[0].at[0] | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_indexing_with_datetimeindex_tz(self, indexer_sl): | ||||
|         # GH 12050 | ||||
|         # indexing on a series with a datetimeindex with tz | ||||
|         index = date_range("2015-01-01", periods=2, tz="utc") | ||||
|  | ||||
|         ser = Series(range(2), index=index, dtype="int64") | ||||
|  | ||||
|         # list-like indexing | ||||
|  | ||||
|         for sel in (index, list(index)): | ||||
|             # getitem | ||||
|             result = indexer_sl(ser)[sel] | ||||
|             expected = ser.copy() | ||||
|             if sel is not index: | ||||
|                 expected.index = expected.index._with_freq(None) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|             # setitem | ||||
|             result = ser.copy() | ||||
|             indexer_sl(result)[sel] = 1 | ||||
|             expected = Series(1, index=index) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # single element indexing | ||||
|  | ||||
|         # getitem | ||||
|         assert indexer_sl(ser)[index[1]] == 1 | ||||
|  | ||||
|         # setitem | ||||
|         result = ser.copy() | ||||
|         indexer_sl(result)[index[1]] = 5 | ||||
|         expected = Series([0, 5], index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nanosecond_getitem_setitem_with_tz(self): | ||||
|         # GH 11679 | ||||
|         data = ["2016-06-28 08:30:00.123456789"] | ||||
|         index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]") | ||||
|         df = DataFrame({"a": [10]}, index=index) | ||||
|         result = df.loc[df.index[0]] | ||||
|         expected = Series(10, index=["a"], name=df.index[0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.copy() | ||||
|         result.loc[df.index[0], "a"] = -1 | ||||
|         expected = DataFrame(-1, index=index, columns=["a"]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_str_slice_millisecond_resolution(self, frame_or_series): | ||||
|         # GH#33589 | ||||
|  | ||||
|         keys = [ | ||||
|             "2017-10-25T16:25:04.151", | ||||
|             "2017-10-25T16:25:04.252", | ||||
|             "2017-10-25T16:50:05.237", | ||||
|             "2017-10-25T16:50:05.238", | ||||
|         ] | ||||
|         obj = frame_or_series( | ||||
|             [1, 2, 3, 4], | ||||
|             index=[Timestamp(x) for x in keys], | ||||
|         ) | ||||
|         result = obj[keys[1] : keys[2]] | ||||
|         expected = frame_or_series( | ||||
|             [2, 3], | ||||
|             index=[ | ||||
|                 Timestamp(keys[1]), | ||||
|                 Timestamp(keys[2]), | ||||
|             ], | ||||
|         ) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_pyarrow_index(self, frame_or_series): | ||||
|         # GH 53644 | ||||
|         pytest.importorskip("pyarrow") | ||||
|         obj = frame_or_series( | ||||
|             range(5), | ||||
|             index=date_range("2020", freq="D", periods=5).astype( | ||||
|                 "timestamp[us][pyarrow]" | ||||
|             ), | ||||
|         ) | ||||
|         result = obj.loc[obj.index[:-3]] | ||||
|         expected = frame_or_series( | ||||
|             range(2), | ||||
|             index=date_range("2020", freq="D", periods=2).astype( | ||||
|                 "timestamp[us][pyarrow]" | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_equal(result, expected) | ||||
| @ -0,0 +1,689 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def gen_obj(klass, index): | ||||
|     if klass is Series: | ||||
|         obj = Series(np.arange(len(index)), index=index) | ||||
|     else: | ||||
|         obj = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((len(index), len(index))), | ||||
|             index=index, | ||||
|             columns=index, | ||||
|         ) | ||||
|     return obj | ||||
|  | ||||
|  | ||||
| class TestFloatIndexers: | ||||
|     def check(self, result, original, indexer, getitem): | ||||
|         """ | ||||
|         comparator for results | ||||
|         we need to take care if we are indexing on a | ||||
|         Series or a frame | ||||
|         """ | ||||
|         if isinstance(original, Series): | ||||
|             expected = original.iloc[indexer] | ||||
|         elif getitem: | ||||
|             expected = original.iloc[:, indexer] | ||||
|         else: | ||||
|             expected = original.iloc[indexer] | ||||
|  | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", | ||||
|         [ | ||||
|             Index(list("abcde")), | ||||
|             Index(list("abcde"), dtype="category"), | ||||
|             date_range("2020-01-01", periods=5), | ||||
|             timedelta_range("1 day", periods=5), | ||||
|             period_range("2020-01-01", periods=5), | ||||
|         ], | ||||
|     ) | ||||
|     def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl): | ||||
|         # GH 4892 | ||||
|         # float_indexers should raise exceptions | ||||
|         # on appropriate Index types & accessors | ||||
|  | ||||
|         s = gen_obj(frame_or_series, index) | ||||
|  | ||||
|         # getting | ||||
|         with pytest.raises(KeyError, match="^3.0$"): | ||||
|             indexer_sl(s)[3.0] | ||||
|  | ||||
|         # contains | ||||
|         assert 3.0 not in s | ||||
|  | ||||
|         s2 = s.copy() | ||||
|         indexer_sl(s2)[3.0] = 10 | ||||
|  | ||||
|         if indexer_sl is tm.setitem: | ||||
|             assert 3.0 in s2.axes[-1] | ||||
|         elif indexer_sl is tm.loc: | ||||
|             assert 3.0 in s2.axes[0] | ||||
|         else: | ||||
|             assert 3.0 not in s2.axes[0] | ||||
|             assert 3.0 not in s2.axes[-1] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", | ||||
|         [ | ||||
|             Index(list("abcde")), | ||||
|             Index(list("abcde"), dtype="category"), | ||||
|             date_range("2020-01-01", periods=5), | ||||
|             timedelta_range("1 day", periods=5), | ||||
|             period_range("2020-01-01", periods=5), | ||||
|         ], | ||||
|     ) | ||||
|     def test_scalar_non_numeric_series_fallback(self, index): | ||||
|         # fallsback to position selection, series only | ||||
|         s = Series(np.arange(len(index)), index=index) | ||||
|  | ||||
|         msg = "Series.__getitem__ treating keys as positions is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             s[3] | ||||
|         with pytest.raises(KeyError, match="^3.0$"): | ||||
|             s[3.0] | ||||
|  | ||||
|     def test_scalar_with_mixed(self, indexer_sl): | ||||
|         s2 = Series([1, 2, 3], index=["a", "b", "c"]) | ||||
|         s3 = Series([1, 2, 3], index=["a", "b", 1.5]) | ||||
|  | ||||
|         # lookup in a pure string index with an invalid indexer | ||||
|  | ||||
|         with pytest.raises(KeyError, match="^1.0$"): | ||||
|             indexer_sl(s2)[1.0] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^1\.0$"): | ||||
|             indexer_sl(s2)[1.0] | ||||
|  | ||||
|         result = indexer_sl(s2)["b"] | ||||
|         expected = 2 | ||||
|         assert result == expected | ||||
|  | ||||
|         # mixed index so we have label | ||||
|         # indexing | ||||
|         with pytest.raises(KeyError, match="^1.0$"): | ||||
|             indexer_sl(s3)[1.0] | ||||
|  | ||||
|         if indexer_sl is not tm.loc: | ||||
|             # __getitem__ falls back to positional | ||||
|             msg = "Series.__getitem__ treating keys as positions is deprecated" | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|                 result = s3[1] | ||||
|             expected = 2 | ||||
|             assert result == expected | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^1\.0$"): | ||||
|             indexer_sl(s3)[1.0] | ||||
|  | ||||
|         result = indexer_sl(s3)[1.5] | ||||
|         expected = 3 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)] | ||||
|     ) | ||||
|     def test_scalar_integer(self, index, frame_or_series, indexer_sl): | ||||
|         getitem = indexer_sl is not tm.loc | ||||
|  | ||||
|         # test how scalar float indexers work on int indexes | ||||
|  | ||||
|         # integer index | ||||
|         i = index | ||||
|         obj = gen_obj(frame_or_series, i) | ||||
|  | ||||
|         # coerce to equal int | ||||
|  | ||||
|         result = indexer_sl(obj)[3.0] | ||||
|         self.check(result, obj, 3, getitem) | ||||
|  | ||||
|         if isinstance(obj, Series): | ||||
|  | ||||
|             def compare(x, y): | ||||
|                 assert x == y | ||||
|  | ||||
|             expected = 100 | ||||
|         else: | ||||
|             compare = tm.assert_series_equal | ||||
|             if getitem: | ||||
|                 expected = Series(100, index=range(len(obj)), name=3) | ||||
|             else: | ||||
|                 expected = Series(100.0, index=range(len(obj)), name=3) | ||||
|  | ||||
|         s2 = obj.copy() | ||||
|         indexer_sl(s2)[3.0] = 100 | ||||
|  | ||||
|         result = indexer_sl(s2)[3.0] | ||||
|         compare(result, expected) | ||||
|  | ||||
|         result = indexer_sl(s2)[3] | ||||
|         compare(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)] | ||||
|     ) | ||||
|     def test_scalar_integer_contains_float(self, index, frame_or_series): | ||||
|         # contains | ||||
|         # integer index | ||||
|         obj = gen_obj(frame_or_series, index) | ||||
|  | ||||
|         # coerce to equal int | ||||
|         assert 3.0 in obj | ||||
|  | ||||
|     def test_scalar_float(self, frame_or_series): | ||||
|         # scalar float indexers work on a float index | ||||
|         index = Index(np.arange(5.0)) | ||||
|         s = gen_obj(frame_or_series, index) | ||||
|  | ||||
|         # assert all operations except for iloc are ok | ||||
|         indexer = index[3] | ||||
|         for idxr in [tm.loc, tm.setitem]: | ||||
|             getitem = idxr is not tm.loc | ||||
|  | ||||
|             # getting | ||||
|             result = idxr(s)[indexer] | ||||
|             self.check(result, s, 3, getitem) | ||||
|  | ||||
|             # setting | ||||
|             s2 = s.copy() | ||||
|  | ||||
|             result = idxr(s2)[indexer] | ||||
|             self.check(result, s, 3, getitem) | ||||
|  | ||||
|             # random float is a KeyError | ||||
|             with pytest.raises(KeyError, match=r"^3\.5$"): | ||||
|                 idxr(s)[3.5] | ||||
|  | ||||
|         # contains | ||||
|         assert 3.0 in s | ||||
|  | ||||
|         # iloc succeeds with an integer | ||||
|         expected = s.iloc[3] | ||||
|         s2 = s.copy() | ||||
|  | ||||
|         s2.iloc[3] = expected | ||||
|         result = s2.iloc[3] | ||||
|         self.check(result, s, 3, False) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", | ||||
|         [ | ||||
|             Index(list("abcde"), dtype=object), | ||||
|             date_range("2020-01-01", periods=5), | ||||
|             timedelta_range("1 day", periods=5), | ||||
|             period_range("2020-01-01", periods=5), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) | ||||
|     def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli): | ||||
|         # GH 4892 | ||||
|         # float_indexers should raise exceptions | ||||
|         # on appropriate Index types & accessors | ||||
|  | ||||
|         s = gen_obj(frame_or_series, index) | ||||
|  | ||||
|         # getitem | ||||
|         if indexer_sli is tm.iloc: | ||||
|             msg = ( | ||||
|                 "cannot do positional indexing " | ||||
|                 rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " | ||||
|                 "type float" | ||||
|             ) | ||||
|         else: | ||||
|             msg = ( | ||||
|                 "cannot do slice indexing " | ||||
|                 rf"on {type(index).__name__} with these indexers " | ||||
|                 r"\[(3|4)(\.0)?\] " | ||||
|                 r"of type (float|int)" | ||||
|             ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             indexer_sli(s)[idx] | ||||
|  | ||||
|         # setitem | ||||
|         if indexer_sli is tm.iloc: | ||||
|             # otherwise we keep the same message as above | ||||
|             msg = "slice indices must be integers or None or have an __index__ method" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             indexer_sli(s)[idx] = 0 | ||||
|  | ||||
|     def test_slice_integer(self): | ||||
|         # same as above, but for Integer based indexes | ||||
|         # these coerce to a like integer | ||||
|         # oob indicates if we are out of bounds | ||||
|         # of positional indexing | ||||
|         for index, oob in [ | ||||
|             (Index(np.arange(5, dtype=np.int64)), False), | ||||
|             (RangeIndex(5), False), | ||||
|             (Index(np.arange(5, dtype=np.int64) + 10), True), | ||||
|         ]: | ||||
|             # s is an in-range index | ||||
|             s = Series(range(5), index=index) | ||||
|  | ||||
|             # getitem | ||||
|             for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: | ||||
|                 result = s.loc[idx] | ||||
|  | ||||
|                 # these are all label indexing | ||||
|                 # except getitem which is positional | ||||
|                 # empty | ||||
|                 if oob: | ||||
|                     indexer = slice(0, 0) | ||||
|                 else: | ||||
|                     indexer = slice(3, 5) | ||||
|                 self.check(result, s, indexer, False) | ||||
|  | ||||
|             # getitem out-of-bounds | ||||
|             for idx in [slice(-6, 6), slice(-6.0, 6.0)]: | ||||
|                 result = s.loc[idx] | ||||
|  | ||||
|                 # these are all label indexing | ||||
|                 # except getitem which is positional | ||||
|                 # empty | ||||
|                 if oob: | ||||
|                     indexer = slice(0, 0) | ||||
|                 else: | ||||
|                     indexer = slice(-6, 6) | ||||
|                 self.check(result, s, indexer, False) | ||||
|  | ||||
|             # positional indexing | ||||
|             msg = ( | ||||
|                 "cannot do slice indexing " | ||||
|                 rf"on {type(index).__name__} with these indexers \[-6\.0\] of " | ||||
|                 "type float" | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 s[slice(-6.0, 6.0)] | ||||
|  | ||||
|             # getitem odd floats | ||||
|             for idx, res1 in [ | ||||
|                 (slice(2.5, 4), slice(3, 5)), | ||||
|                 (slice(2, 3.5), slice(2, 4)), | ||||
|                 (slice(2.5, 3.5), slice(3, 4)), | ||||
|             ]: | ||||
|                 result = s.loc[idx] | ||||
|                 if oob: | ||||
|                     res = slice(0, 0) | ||||
|                 else: | ||||
|                     res = res1 | ||||
|  | ||||
|                 self.check(result, s, res, False) | ||||
|  | ||||
|                 # positional indexing | ||||
|                 msg = ( | ||||
|                     "cannot do slice indexing " | ||||
|                     rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " | ||||
|                     "type float" | ||||
|                 ) | ||||
|                 with pytest.raises(TypeError, match=msg): | ||||
|                     s[idx] | ||||
|  | ||||
|     @pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]) | ||||
|     def test_integer_positional_indexing(self, idx): | ||||
|         """make sure that we are raising on positional indexing | ||||
|         w.r.t. an integer index | ||||
|         """ | ||||
|         s = Series(range(2, 6), index=range(2, 6)) | ||||
|  | ||||
|         result = s[2:4] | ||||
|         expected = s.iloc[2:4] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         klass = RangeIndex | ||||
|         msg = ( | ||||
|             "cannot do (slice|positional) indexing " | ||||
|             rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of " | ||||
|             "type float" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s[idx] | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s.iloc[idx] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)] | ||||
|     ) | ||||
|     def test_slice_integer_frame_getitem(self, index): | ||||
|         # similar to above, but on the getitem dim (of a DataFrame) | ||||
|         s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index) | ||||
|  | ||||
|         # getitem | ||||
|         for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: | ||||
|             result = s.loc[idx] | ||||
|             indexer = slice(0, 2) | ||||
|             self.check(result, s, indexer, False) | ||||
|  | ||||
|             # positional indexing | ||||
|             msg = ( | ||||
|                 "cannot do slice indexing " | ||||
|                 rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of " | ||||
|                 "type float" | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 s[idx] | ||||
|  | ||||
|         # getitem out-of-bounds | ||||
|         for idx in [slice(-10, 10), slice(-10.0, 10.0)]: | ||||
|             result = s.loc[idx] | ||||
|             self.check(result, s, slice(-10, 10), True) | ||||
|  | ||||
|         # positional indexing | ||||
|         msg = ( | ||||
|             "cannot do slice indexing " | ||||
|             rf"on {type(index).__name__} with these indexers \[-10\.0\] of " | ||||
|             "type float" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s[slice(-10.0, 10.0)] | ||||
|  | ||||
|         # getitem odd floats | ||||
|         for idx, res in [ | ||||
|             (slice(0.5, 1), slice(1, 2)), | ||||
|             (slice(0, 0.5), slice(0, 1)), | ||||
|             (slice(0.5, 1.5), slice(1, 2)), | ||||
|         ]: | ||||
|             result = s.loc[idx] | ||||
|             self.check(result, s, res, False) | ||||
|  | ||||
|             # positional indexing | ||||
|             msg = ( | ||||
|                 "cannot do slice indexing " | ||||
|                 rf"on {type(index).__name__} with these indexers \[0\.5\] of " | ||||
|                 "type float" | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 s[idx] | ||||
|  | ||||
|     @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)] | ||||
|     ) | ||||
|     def test_float_slice_getitem_with_integer_index_raises(self, idx, index): | ||||
|         # similar to above, but on the getitem dim (of a DataFrame) | ||||
|         s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index) | ||||
|  | ||||
|         # setitem | ||||
|         sc = s.copy() | ||||
|         sc.loc[idx] = 0 | ||||
|         result = sc.loc[idx].values.ravel() | ||||
|         assert (result == 0).all() | ||||
|  | ||||
|         # positional indexing | ||||
|         msg = ( | ||||
|             "cannot do slice indexing " | ||||
|             rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " | ||||
|             "type float" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s[idx] = 0 | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s[idx] | ||||
|  | ||||
|     @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) | ||||
|     def test_slice_float(self, idx, frame_or_series, indexer_sl): | ||||
|         # same as above, but for floats | ||||
|         index = Index(np.arange(5.0)) + 0.1 | ||||
|         s = gen_obj(frame_or_series, index) | ||||
|  | ||||
|         expected = s.iloc[3:4] | ||||
|  | ||||
|         # getitem | ||||
|         result = indexer_sl(s)[idx] | ||||
|         assert isinstance(result, type(s)) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         # setitem | ||||
|         s2 = s.copy() | ||||
|         indexer_sl(s2)[idx] = 0 | ||||
|         result = indexer_sl(s2)[idx].values.ravel() | ||||
|         assert (result == 0).all() | ||||
|  | ||||
|     def test_floating_index_doc_example(self): | ||||
|         index = Index([1.5, 2, 3, 4.5, 5]) | ||||
|         s = Series(range(5), index=index) | ||||
|         assert s[3] == 2 | ||||
|         assert s.loc[3] == 2 | ||||
|         assert s.iloc[3] == 3 | ||||
|  | ||||
|     def test_floating_misc(self, indexer_sl): | ||||
|         # related 236 | ||||
|         # scalar/slicing of a float index | ||||
|         s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) | ||||
|  | ||||
|         # label based slicing | ||||
|         result = indexer_sl(s)[1.0:3.0] | ||||
|         expected = Series(1, index=[2.5]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # exact indexing when found | ||||
|  | ||||
|         result = indexer_sl(s)[5.0] | ||||
|         assert result == 2 | ||||
|  | ||||
|         result = indexer_sl(s)[5] | ||||
|         assert result == 2 | ||||
|  | ||||
|         # value not found (and no fallbacking at all) | ||||
|  | ||||
|         # scalar integers | ||||
|         with pytest.raises(KeyError, match=r"^4$"): | ||||
|             indexer_sl(s)[4] | ||||
|  | ||||
|         # fancy floats/integers create the correct entry (as nan) | ||||
|         # fancy tests | ||||
|         expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64)) | ||||
|         for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]:  # float | ||||
|             tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) | ||||
|  | ||||
|         expected = Series([2, 0], index=Index([5, 0], dtype="float64")) | ||||
|         for fancy_idx in [[5, 0], np.array([5, 0])]: | ||||
|             tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) | ||||
|  | ||||
|         warn = FutureWarning if indexer_sl is tm.setitem else None | ||||
|         msg = r"The behavior of obj\[i:j\] with a float-dtype index" | ||||
|  | ||||
|         # all should return the same as we are slicing 'the same' | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result1 = indexer_sl(s)[2:5] | ||||
|         result2 = indexer_sl(s)[2.0:5.0] | ||||
|         result3 = indexer_sl(s)[2.0:5] | ||||
|         result4 = indexer_sl(s)[2.1:5] | ||||
|         tm.assert_series_equal(result1, result2) | ||||
|         tm.assert_series_equal(result1, result3) | ||||
|         tm.assert_series_equal(result1, result4) | ||||
|  | ||||
|         expected = Series([1, 2], index=[2.5, 5.0]) | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = indexer_sl(s)[2:5] | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # list selection | ||||
|         result1 = indexer_sl(s)[[0.0, 5, 10]] | ||||
|         result2 = s.iloc[[0, 2, 4]] | ||||
|         tm.assert_series_equal(result1, result2) | ||||
|  | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             indexer_sl(s)[[1.6, 5, 10]] | ||||
|  | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             indexer_sl(s)[[0, 1, 2]] | ||||
|  | ||||
|         result = indexer_sl(s)[[2.5, 5]] | ||||
|         tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0])) | ||||
|  | ||||
|         result = indexer_sl(s)[[2.5]] | ||||
|         tm.assert_series_equal(result, Series([1], index=[2.5])) | ||||
|  | ||||
|     def test_floatindex_slicing_bug(self, float_numpy_dtype): | ||||
|         # GH 5557, related to slicing a float index | ||||
|         dtype = float_numpy_dtype | ||||
|         ser = { | ||||
|             256: 2321.0, | ||||
|             1: 78.0, | ||||
|             2: 2716.0, | ||||
|             3: 0.0, | ||||
|             4: 369.0, | ||||
|             5: 0.0, | ||||
|             6: 269.0, | ||||
|             7: 0.0, | ||||
|             8: 0.0, | ||||
|             9: 0.0, | ||||
|             10: 3536.0, | ||||
|             11: 0.0, | ||||
|             12: 24.0, | ||||
|             13: 0.0, | ||||
|             14: 931.0, | ||||
|             15: 0.0, | ||||
|             16: 101.0, | ||||
|             17: 78.0, | ||||
|             18: 9643.0, | ||||
|             19: 0.0, | ||||
|             20: 0.0, | ||||
|             21: 0.0, | ||||
|             22: 63761.0, | ||||
|             23: 0.0, | ||||
|             24: 446.0, | ||||
|             25: 0.0, | ||||
|             26: 34773.0, | ||||
|             27: 0.0, | ||||
|             28: 729.0, | ||||
|             29: 78.0, | ||||
|             30: 0.0, | ||||
|             31: 0.0, | ||||
|             32: 3374.0, | ||||
|             33: 0.0, | ||||
|             34: 1391.0, | ||||
|             35: 0.0, | ||||
|             36: 361.0, | ||||
|             37: 0.0, | ||||
|             38: 61808.0, | ||||
|             39: 0.0, | ||||
|             40: 0.0, | ||||
|             41: 0.0, | ||||
|             42: 6677.0, | ||||
|             43: 0.0, | ||||
|             44: 802.0, | ||||
|             45: 0.0, | ||||
|             46: 2691.0, | ||||
|             47: 0.0, | ||||
|             48: 3582.0, | ||||
|             49: 0.0, | ||||
|             50: 734.0, | ||||
|             51: 0.0, | ||||
|             52: 627.0, | ||||
|             53: 70.0, | ||||
|             54: 2584.0, | ||||
|             55: 0.0, | ||||
|             56: 324.0, | ||||
|             57: 0.0, | ||||
|             58: 605.0, | ||||
|             59: 0.0, | ||||
|             60: 0.0, | ||||
|             61: 0.0, | ||||
|             62: 3989.0, | ||||
|             63: 10.0, | ||||
|             64: 42.0, | ||||
|             65: 0.0, | ||||
|             66: 904.0, | ||||
|             67: 0.0, | ||||
|             68: 88.0, | ||||
|             69: 70.0, | ||||
|             70: 8172.0, | ||||
|             71: 0.0, | ||||
|             72: 0.0, | ||||
|             73: 0.0, | ||||
|             74: 64902.0, | ||||
|             75: 0.0, | ||||
|             76: 347.0, | ||||
|             77: 0.0, | ||||
|             78: 36605.0, | ||||
|             79: 0.0, | ||||
|             80: 379.0, | ||||
|             81: 70.0, | ||||
|             82: 0.0, | ||||
|             83: 0.0, | ||||
|             84: 3001.0, | ||||
|             85: 0.0, | ||||
|             86: 1630.0, | ||||
|             87: 7.0, | ||||
|             88: 364.0, | ||||
|             89: 0.0, | ||||
|             90: 67404.0, | ||||
|             91: 9.0, | ||||
|             92: 0.0, | ||||
|             93: 0.0, | ||||
|             94: 7685.0, | ||||
|             95: 0.0, | ||||
|             96: 1017.0, | ||||
|             97: 0.0, | ||||
|             98: 2831.0, | ||||
|             99: 0.0, | ||||
|             100: 2963.0, | ||||
|             101: 0.0, | ||||
|             102: 854.0, | ||||
|             103: 0.0, | ||||
|             104: 0.0, | ||||
|             105: 0.0, | ||||
|             106: 0.0, | ||||
|             107: 0.0, | ||||
|             108: 0.0, | ||||
|             109: 0.0, | ||||
|             110: 0.0, | ||||
|             111: 0.0, | ||||
|             112: 0.0, | ||||
|             113: 0.0, | ||||
|             114: 0.0, | ||||
|             115: 0.0, | ||||
|             116: 0.0, | ||||
|             117: 0.0, | ||||
|             118: 0.0, | ||||
|             119: 0.0, | ||||
|             120: 0.0, | ||||
|             121: 0.0, | ||||
|             122: 0.0, | ||||
|             123: 0.0, | ||||
|             124: 0.0, | ||||
|             125: 0.0, | ||||
|             126: 67744.0, | ||||
|             127: 22.0, | ||||
|             128: 264.0, | ||||
|             129: 0.0, | ||||
|             260: 197.0, | ||||
|             268: 0.0, | ||||
|             265: 0.0, | ||||
|             269: 0.0, | ||||
|             261: 0.0, | ||||
|             266: 1198.0, | ||||
|             267: 0.0, | ||||
|             262: 2629.0, | ||||
|             258: 775.0, | ||||
|             257: 0.0, | ||||
|             263: 0.0, | ||||
|             259: 0.0, | ||||
|             264: 163.0, | ||||
|             250: 10326.0, | ||||
|             251: 0.0, | ||||
|             252: 1228.0, | ||||
|             253: 0.0, | ||||
|             254: 2769.0, | ||||
|             255: 0.0, | ||||
|         } | ||||
|  | ||||
|         # smoke test for the repr | ||||
|         s = Series(ser, dtype=dtype) | ||||
|         result = s.value_counts() | ||||
|         assert result.index.dtype == dtype | ||||
|         str(result) | ||||
| @ -0,0 +1,53 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_iat(float_frame): | ||||
|     for i, row in enumerate(float_frame.index): | ||||
|         for j, col in enumerate(float_frame.columns): | ||||
|             result = float_frame.iat[i, j] | ||||
|             expected = float_frame.at[row, col] | ||||
|             assert result == expected | ||||
|  | ||||
|  | ||||
| def test_iat_duplicate_columns(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/11754 | ||||
|     df = DataFrame([[1, 2]], columns=["x", "x"]) | ||||
|     assert df.iat[0, 0] == 1 | ||||
|  | ||||
|  | ||||
| def test_iat_getitem_series_with_period_index(): | ||||
|     # GH#4390, iat incorrectly indexing | ||||
|     index = period_range("1/1/2001", periods=10) | ||||
|     ser = Series(np.random.default_rng(2).standard_normal(10), index=index) | ||||
|     expected = ser[index[0]] | ||||
|     result = ser.iat[0] | ||||
|     assert expected == result | ||||
|  | ||||
|  | ||||
| def test_iat_setitem_item_cache_cleared( | ||||
|     indexer_ial, using_copy_on_write, warn_copy_on_write | ||||
| ): | ||||
|     # GH#45684 | ||||
|     data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} | ||||
|     df = DataFrame(data).copy() | ||||
|     ser = df["y"] | ||||
|  | ||||
|     # previously this iat setting would split the block and fail to clear | ||||
|     #  the item_cache. | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         indexer_ial(df)[7, 0] = 9999 | ||||
|  | ||||
|     with tm.assert_cow_warning(warn_copy_on_write): | ||||
|         indexer_ial(df)[7, 1] = 1234 | ||||
|  | ||||
|     assert df.iat[7, 1] == 1234 | ||||
|     if not using_copy_on_write: | ||||
|         assert ser.iloc[-1] == 1234 | ||||
|     assert df.iloc[-1, -1] == 1234 | ||||
							
								
								
									
										1484
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_iloc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1484
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_iloc.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,61 @@ | ||||
| # Tests aimed at pandas.core.indexers | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.indexers import ( | ||||
|     is_scalar_indexer, | ||||
|     length_of_indexer, | ||||
|     validate_indices, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_length_of_indexer(): | ||||
|     arr = np.zeros(4, dtype=bool) | ||||
|     arr[0] = 1 | ||||
|     result = length_of_indexer(arr) | ||||
|     assert result == 1 | ||||
|  | ||||
|  | ||||
| def test_is_scalar_indexer(): | ||||
|     indexer = (0, 1) | ||||
|     assert is_scalar_indexer(indexer, 2) | ||||
|     assert not is_scalar_indexer(indexer[0], 2) | ||||
|  | ||||
|     indexer = (np.array([2]), 1) | ||||
|     assert not is_scalar_indexer(indexer, 2) | ||||
|  | ||||
|     indexer = (np.array([2]), np.array([3])) | ||||
|     assert not is_scalar_indexer(indexer, 2) | ||||
|  | ||||
|     indexer = (np.array([2]), np.array([3, 4])) | ||||
|     assert not is_scalar_indexer(indexer, 2) | ||||
|  | ||||
|     assert not is_scalar_indexer(slice(None), 1) | ||||
|  | ||||
|     indexer = 0 | ||||
|     assert is_scalar_indexer(indexer, 1) | ||||
|  | ||||
|     indexer = (0,) | ||||
|     assert is_scalar_indexer(indexer, 1) | ||||
|  | ||||
|  | ||||
| class TestValidateIndices: | ||||
|     def test_validate_indices_ok(self): | ||||
|         indices = np.asarray([0, 1]) | ||||
|         validate_indices(indices, 2) | ||||
|         validate_indices(indices[:0], 0) | ||||
|         validate_indices(np.array([-1, -1]), 0) | ||||
|  | ||||
|     def test_validate_indices_low(self): | ||||
|         indices = np.asarray([0, -2]) | ||||
|         with pytest.raises(ValueError, match="'indices' contains"): | ||||
|             validate_indices(indices, 2) | ||||
|  | ||||
|     def test_validate_indices_high(self): | ||||
|         indices = np.asarray([0, 1, 2]) | ||||
|         with pytest.raises(IndexError, match="indices are out"): | ||||
|             validate_indices(indices, 2) | ||||
|  | ||||
|     def test_validate_indices_empty(self): | ||||
|         with pytest.raises(IndexError, match="indices are out"): | ||||
|             validate_indices(np.array([0, 1]), 0) | ||||
							
								
								
									
										1157
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_indexing.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1157
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_indexing.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										3392
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_loc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3392
									
								
								lib/python3.11/site-packages/pandas/tests/indexing/test_loc.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,75 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, dtype", | ||||
|     [ | ||||
|         ([], "object"), | ||||
|         ([1, 2, 3], "int64"), | ||||
|         ([1.0, 2.0, 3.0], "float64"), | ||||
|         (["a", "b", "c"], "object"), | ||||
|         (["a", "b", "c"], "string"), | ||||
|         ([1, 2, 3], "datetime64[ns]"), | ||||
|         ([1, 2, 3], "datetime64[ns, CET]"), | ||||
|         ([1, 2, 3], "timedelta64[ns]"), | ||||
|         (["2000", "2001", "2002"], "Period[D]"), | ||||
|         ([1, 0, 3], "Sparse"), | ||||
|         ([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "mask", [[True, False, False], [True, True, True], [False, False, False]] | ||||
| ) | ||||
| @pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series]) | ||||
| @pytest.mark.parametrize("frame", [True, False]) | ||||
| def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): | ||||
|     # In case len(values) < 3 | ||||
|     index = ["a", "b", "c"][: len(values)] | ||||
|     mask = mask[: len(values)] | ||||
|  | ||||
|     obj = pd.Series(values, dtype=dtype, index=index) | ||||
|     if frame: | ||||
|         if len(values) == 0: | ||||
|             # Otherwise obj is an empty DataFrame with shape (0, 1) | ||||
|             obj = pd.DataFrame(dtype=dtype, index=index) | ||||
|         else: | ||||
|             obj = obj.to_frame() | ||||
|  | ||||
|     if indexer_class is pd.array: | ||||
|         mask = pd.array(mask, dtype="boolean") | ||||
|     elif indexer_class is pd.Series: | ||||
|         mask = pd.Series(mask, index=obj.index, dtype="boolean") | ||||
|     else: | ||||
|         mask = indexer_class(mask) | ||||
|  | ||||
|     expected = obj[mask] | ||||
|  | ||||
|     result = obj[mask] | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|     if indexer_class is pd.Series: | ||||
|         msg = "iLocation based boolean indexing cannot use an indexable as a mask" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             result = obj.iloc[mask] | ||||
|             tm.assert_equal(result, expected) | ||||
|     else: | ||||
|         result = obj.iloc[mask] | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     result = obj.loc[mask] | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_na_treated_as_false(frame_or_series, indexer_sli): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31503 | ||||
|     obj = frame_or_series([1, 2, 3]) | ||||
|  | ||||
|     mask = pd.array([True, False, None], dtype="boolean") | ||||
|  | ||||
|     result = indexer_sli(obj)[mask] | ||||
|     expected = indexer_sli(obj)[mask.fillna(False)] | ||||
|  | ||||
|     tm.assert_equal(result, expected) | ||||
| @ -0,0 +1,696 @@ | ||||
| """ | ||||
| test setting *parts* of objects both positionally and label based | ||||
|  | ||||
| TODO: these should be split among the indexer tests | ||||
| """ | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Period, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestEmptyFrameSetitemExpansion: | ||||
|     def test_empty_frame_setitem_index_name_retained(self): | ||||
|         # GH#31368 empty frame has non-None index.name -> retained | ||||
|         df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) | ||||
|         series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) | ||||
|  | ||||
|         df["series"] = series | ||||
|         expected = DataFrame( | ||||
|             {"series": [1.23] * 4}, | ||||
|             index=pd.RangeIndex(4, name="df_index"), | ||||
|             columns=Index(["series"]), | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_empty_frame_setitem_index_name_inherited(self): | ||||
|         # GH#36527 empty frame has None index.name -> not retained | ||||
|         df = DataFrame() | ||||
|         series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) | ||||
|         df["series"] = series | ||||
|         expected = DataFrame( | ||||
|             {"series": [1.23] * 4}, | ||||
|             index=pd.RangeIndex(4, name="series_index"), | ||||
|             columns=Index(["series"]), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_loc_setitem_zerolen_series_columns_align(self): | ||||
|         # columns will align | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|         df.loc[0] = Series(1, index=range(4)) | ||||
|         expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # columns will align | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|         df.loc[0] = Series(1, index=["B"]) | ||||
|  | ||||
|         exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|  | ||||
|     def test_loc_setitem_zerolen_list_length_must_match_columns(self): | ||||
|         # list-like must conform | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|  | ||||
|         msg = "cannot set a row with mismatched columns" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[0] = [1, 2, 3] | ||||
|  | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|         df.loc[3] = [6, 7]  # length matches len(df.columns) --> OK! | ||||
|  | ||||
|         exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|  | ||||
|     def test_partial_set_empty_frame(self): | ||||
|         # partially set with an empty object | ||||
|         # frame | ||||
|         df = DataFrame() | ||||
|  | ||||
|         msg = "cannot set a frame with no defined columns" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[1] = 1 | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[1] = Series([1], index=["foo"]) | ||||
|  | ||||
|         msg = "cannot set a frame with no defined index and a scalar" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.loc[:, 1] = 1 | ||||
|  | ||||
|     def test_partial_set_empty_frame2(self): | ||||
|         # these work as they don't really change | ||||
|         # anything but the index | ||||
|         # GH#5632 | ||||
|         expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="object")) | ||||
|  | ||||
|         df = DataFrame(index=Index([], dtype="object")) | ||||
|         df["foo"] = Series([], dtype="object") | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(index=Index([])) | ||||
|         df["foo"] = Series(df.index) | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(index=Index([])) | ||||
|         df["foo"] = df.index | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame3(self): | ||||
|         expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) | ||||
|         expected["foo"] = expected["foo"].astype("float64") | ||||
|  | ||||
|         df = DataFrame(index=Index([], dtype="int64")) | ||||
|         df["foo"] = [] | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(index=Index([], dtype="int64")) | ||||
|         df["foo"] = Series(np.arange(len(df)), dtype="float64") | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame4(self): | ||||
|         df = DataFrame(index=Index([], dtype="int64")) | ||||
|         df["foo"] = range(len(df)) | ||||
|  | ||||
|         expected = DataFrame(columns=Index(["foo"]), index=Index([], dtype="int64")) | ||||
|         # range is int-dtype-like, so we get int64 dtype | ||||
|         expected["foo"] = expected["foo"].astype("int64") | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame5(self): | ||||
|         df = DataFrame() | ||||
|         tm.assert_index_equal(df.columns, pd.RangeIndex(0)) | ||||
|         df2 = DataFrame() | ||||
|         df2[1] = Series([1], index=["foo"]) | ||||
|         df.loc[:, 1] = Series([1], index=["foo"]) | ||||
|         tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) | ||||
|         tm.assert_frame_equal(df, df2) | ||||
|  | ||||
|     def test_partial_set_empty_frame_no_index(self): | ||||
|         # no index to start | ||||
|         expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) | ||||
|  | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|         df[0] = Series(1, index=range(4)) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(columns=["A", "B"]) | ||||
|         df.loc[:, 0] = Series(1, index=range(4)) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame_row(self): | ||||
|         # GH#5720, GH#5744 | ||||
|         # don't create rows when empty | ||||
|         expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) | ||||
|         expected["A"] = expected["A"].astype("int64") | ||||
|         expected["B"] = expected["B"].astype("float64") | ||||
|         expected["New"] = expected["New"].astype("float64") | ||||
|  | ||||
|         df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) | ||||
|         y = df[df.A > 5] | ||||
|         y["New"] = np.nan | ||||
|         tm.assert_frame_equal(y, expected) | ||||
|  | ||||
|         expected = DataFrame(columns=["a", "b", "c c", "d"]) | ||||
|         expected["d"] = expected["d"].astype("int64") | ||||
|         df = DataFrame(columns=["a", "b", "c c"]) | ||||
|         df["d"] = 3 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|         tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) | ||||
|  | ||||
|         # reindex columns is ok | ||||
|         df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) | ||||
|         y = df[df.A > 5] | ||||
|         result = y.reindex(columns=["A", "B", "C"]) | ||||
|         expected = DataFrame(columns=["A", "B", "C"]) | ||||
|         expected["A"] = expected["A"].astype("int64") | ||||
|         expected["B"] = expected["B"].astype("float64") | ||||
|         expected["C"] = expected["C"].astype("float64") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame_set_series(self): | ||||
|         # GH#5756 | ||||
|         # setting with empty Series | ||||
|         df = DataFrame(Series(dtype=object)) | ||||
|         expected = DataFrame({0: Series(dtype=object)}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(Series(name="foo", dtype=object)) | ||||
|         expected = DataFrame({"foo": Series(dtype=object)}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame_empty_copy_assignment(self): | ||||
|         # GH#5932 | ||||
|         # copy on empty with assignment fails | ||||
|         df = DataFrame(index=[0]) | ||||
|         df = df.copy() | ||||
|         df["a"] = 0 | ||||
|         expected = DataFrame(0, index=[0], columns=Index(["a"])) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string): | ||||
|         # GH#6171 | ||||
|         # consistency on empty frames | ||||
|         df = DataFrame(columns=["x", "y"]) | ||||
|         df["x"] = [1, 2] | ||||
|         expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) | ||||
|         tm.assert_frame_equal(df, expected, check_dtype=False) | ||||
|  | ||||
|         df = DataFrame(columns=["x", "y"]) | ||||
|         df["x"] = ["1", "2"] | ||||
|         expected = DataFrame( | ||||
|             { | ||||
|                 "x": Series( | ||||
|                     ["1", "2"], | ||||
|                     dtype=object if not using_infer_string else "str", | ||||
|                 ), | ||||
|                 "y": Series([np.nan, np.nan], dtype=object), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = DataFrame(columns=["x", "y"]) | ||||
|         df.loc[0, "x"] = 1 | ||||
|         expected = DataFrame({"x": [1], "y": [np.nan]}) | ||||
|         tm.assert_frame_equal(df, expected, check_dtype=False) | ||||
|  | ||||
|  | ||||
| class TestPartialSetting: | ||||
|     def test_partial_setting(self): | ||||
|         # GH2578, allow ix and friends to partially set | ||||
|  | ||||
|         # series | ||||
|         s_orig = Series([1, 2, 3]) | ||||
|  | ||||
|         s = s_orig.copy() | ||||
|         s[5] = 5 | ||||
|         expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) | ||||
|         tm.assert_series_equal(s, expected) | ||||
|  | ||||
|         s = s_orig.copy() | ||||
|         s.loc[5] = 5 | ||||
|         expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) | ||||
|         tm.assert_series_equal(s, expected) | ||||
|  | ||||
|         s = s_orig.copy() | ||||
|         s[5] = 5.0 | ||||
|         expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) | ||||
|         tm.assert_series_equal(s, expected) | ||||
|  | ||||
|         s = s_orig.copy() | ||||
|         s.loc[5] = 5.0 | ||||
|         expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) | ||||
|         tm.assert_series_equal(s, expected) | ||||
|  | ||||
|         # iloc/iat raise | ||||
|         s = s_orig.copy() | ||||
|  | ||||
|         msg = "iloc cannot enlarge its target object" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             s.iloc[3] = 5.0 | ||||
|  | ||||
|         msg = "index 3 is out of bounds for axis 0 with size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             s.iat[3] = 5.0 | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
|     def test_partial_setting_frame(self, using_array_manager): | ||||
|         df_orig = DataFrame( | ||||
|             np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" | ||||
|         ) | ||||
|  | ||||
|         # iloc/iat raise | ||||
|         df = df_orig.copy() | ||||
|  | ||||
|         msg = "iloc cannot enlarge its target object" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             df.iloc[4, 2] = 5.0 | ||||
|  | ||||
|         msg = "index 2 is out of bounds for axis 0 with size 2" | ||||
|         if using_array_manager: | ||||
|             msg = "list index out of range" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             df.iat[4, 2] = 5.0 | ||||
|  | ||||
|         # row setting where it exists | ||||
|         expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]}) | ||||
|         df = df_orig.copy() | ||||
|         df.iloc[1] = df.iloc[2] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]}) | ||||
|         df = df_orig.copy() | ||||
|         df.loc[1] = df.loc[2] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # like 2578, partial setting with dtype preservation | ||||
|         expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}) | ||||
|         df = df_orig.copy() | ||||
|         df.loc[3] = df.loc[2] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # single dtype frame, overwrite | ||||
|         expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]}) | ||||
|         df = df_orig.copy() | ||||
|         df.loc[:, "B"] = df.loc[:, "A"] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # mixed dtype frame, overwrite | ||||
|         expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])}) | ||||
|         df = df_orig.copy() | ||||
|         df["B"] = df["B"].astype(np.float64) | ||||
|         # as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at | ||||
|         #  setting inplace | ||||
|         df.loc[:, "B"] = df.loc[:, "A"] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # single dtype frame, partial setting | ||||
|         expected = df_orig.copy() | ||||
|         expected["C"] = df["A"] | ||||
|         df = df_orig.copy() | ||||
|         df.loc[:, "C"] = df.loc[:, "A"] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # mixed frame, partial setting | ||||
|         expected = df_orig.copy() | ||||
|         expected["C"] = df["A"] | ||||
|         df = df_orig.copy() | ||||
|         df.loc[:, "C"] = df.loc[:, "A"] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_setting2(self): | ||||
|         # GH 8473 | ||||
|         dates = date_range("1/1/2000", periods=8) | ||||
|         df_orig = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((8, 4)), | ||||
|             index=dates, | ||||
|             columns=["A", "B", "C", "D"], | ||||
|         ) | ||||
|  | ||||
|         expected = pd.concat( | ||||
|             [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True | ||||
|         ) | ||||
|         df = df_orig.copy() | ||||
|         df.loc[dates[-1] + dates.freq, "A"] = 7 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|         df = df_orig.copy() | ||||
|         df.at[dates[-1] + dates.freq, "A"] = 7 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq) | ||||
|         expected = pd.concat([df_orig, exp_other], axis=1) | ||||
|  | ||||
|         df = df_orig.copy() | ||||
|         df.loc[dates[-1] + dates.freq, 0] = 7 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|         df = df_orig.copy() | ||||
|         df.at[dates[-1] + dates.freq, 0] = 7 | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_setting_mixed_dtype(self): | ||||
|         # in a mixed dtype environment, try to preserve dtypes | ||||
|         # by appending | ||||
|         df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) | ||||
|  | ||||
|         s = df.loc[1].copy() | ||||
|         s.name = 2 | ||||
|         expected = pd.concat([df, DataFrame(s).T.infer_objects()]) | ||||
|  | ||||
|         df.loc[2] = df.loc[1] | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_series_partial_set(self): | ||||
|         # partial set with new index | ||||
|         # Regression from GH4825 | ||||
|         ser = Series([0.1, 0.2], index=[1, 2]) | ||||
|  | ||||
|         # loc equiv to .reindex | ||||
|         expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) | ||||
|         with pytest.raises(KeyError, match=r"not in index"): | ||||
|             ser.loc[[3, 2, 3]] | ||||
|  | ||||
|         result = ser.reindex([3, 2, 3]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             ser.loc[[3, 2, 3, "x"]] | ||||
|  | ||||
|         result = ser.reindex([3, 2, 3, "x"]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) | ||||
|         result = ser.loc[[2, 2, 1]] | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             ser.loc[[2, 2, "x", 1]] | ||||
|  | ||||
|         result = ser.reindex([2, 2, "x", 1]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         # raises as nothing is in the index | ||||
|         msg = ( | ||||
|             rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] " | ||||
|             r"are in the \[index\]\"" | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.loc[[3, 3, 3]] | ||||
|  | ||||
|         expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             ser.loc[[2, 2, 3]] | ||||
|  | ||||
|         result = ser.reindex([2, 2, 3]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) | ||||
|         expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             s.loc[[3, 4, 4]] | ||||
|  | ||||
|         result = s.reindex([3, 4, 4]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) | ||||
|         expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             s.loc[[5, 3, 3]] | ||||
|  | ||||
|         result = s.reindex([5, 3, 3]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) | ||||
|         expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             s.loc[[5, 4, 4]] | ||||
|  | ||||
|         result = s.reindex([5, 4, 4]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) | ||||
|         expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             s.loc[[7, 2, 2]] | ||||
|  | ||||
|         result = s.reindex([7, 2, 2]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) | ||||
|         expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             s.loc[[4, 5, 5]] | ||||
|  | ||||
|         result = s.reindex([4, 5, 5]) | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         # iloc | ||||
|         expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) | ||||
|         result = ser.iloc[[1, 1, 0, 0]] | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     def test_series_partial_set_with_name(self): | ||||
|         # GH 11497 | ||||
|  | ||||
|         idx = Index([1, 2], dtype="int64", name="idx") | ||||
|         ser = Series([0.1, 0.2], index=idx, name="s") | ||||
|  | ||||
|         # loc | ||||
|         with pytest.raises(KeyError, match=r"\[3\] not in index"): | ||||
|             ser.loc[[3, 2, 3]] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"not in index"): | ||||
|             ser.loc[[3, 2, 3, "x"]] | ||||
|  | ||||
|         exp_idx = Index([2, 2, 1], dtype="int64", name="idx") | ||||
|         expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") | ||||
|         result = ser.loc[[2, 2, 1]] | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"\['x'\] not in index"): | ||||
|             ser.loc[[2, 2, "x", 1]] | ||||
|  | ||||
|         # raises as nothing is in the index | ||||
|         msg = ( | ||||
|             rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', " | ||||
|             r"name='idx'\)\] are in the \[index\]\"" | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.loc[[3, 3, 3]] | ||||
|  | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             ser.loc[[2, 2, 3]] | ||||
|  | ||||
|         idx = Index([1, 2, 3], dtype="int64", name="idx") | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] | ||||
|  | ||||
|         idx = Index([1, 2, 3, 4], dtype="int64", name="idx") | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] | ||||
|  | ||||
|         idx = Index([1, 2, 3, 4], dtype="int64", name="idx") | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] | ||||
|  | ||||
|         idx = Index([4, 5, 6, 7], dtype="int64", name="idx") | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] | ||||
|  | ||||
|         idx = Index([1, 2, 3, 4], dtype="int64", name="idx") | ||||
|         with pytest.raises(KeyError, match="not in index"): | ||||
|             Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] | ||||
|  | ||||
|         # iloc | ||||
|         exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") | ||||
|         expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") | ||||
|         result = ser.iloc[[1, 1, 0, 0]] | ||||
|         tm.assert_series_equal(result, expected, check_index_type=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("key", [100, 100.0]) | ||||
|     def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): | ||||
|         # GH#4940 inserting non-strings | ||||
|         orig = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|         df = orig.copy() | ||||
|  | ||||
|         df.loc[key, :] = df.iloc[0] | ||||
|         ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) | ||||
|         ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) | ||||
|         expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) | ||||
|  | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_partial_set_invalid(self): | ||||
|         # GH 4940 | ||||
|         # allow only setting of 'valid' values | ||||
|  | ||||
|         orig = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|  | ||||
|         # allow object conversion here | ||||
|         df = orig.copy() | ||||
|         df.loc["a", :] = df.iloc[0] | ||||
|         ser = Series(df.iloc[0], name="a") | ||||
|         exp = pd.concat([orig, DataFrame(ser).T.infer_objects()]) | ||||
|         tm.assert_frame_equal(df, exp) | ||||
|         tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) | ||||
|         assert df.index.dtype == "object" | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx,labels,expected_idx", | ||||
|         [ | ||||
|             ( | ||||
|                 period_range(start="2000", periods=20, freq="D"), | ||||
|                 ["2000-01-04", "2000-01-08", "2000-01-12"], | ||||
|                 [ | ||||
|                     Period("2000-01-04", freq="D"), | ||||
|                     Period("2000-01-08", freq="D"), | ||||
|                     Period("2000-01-12", freq="D"), | ||||
|                 ], | ||||
|             ), | ||||
|             ( | ||||
|                 date_range(start="2000", periods=20, freq="D"), | ||||
|                 ["2000-01-04", "2000-01-08", "2000-01-12"], | ||||
|                 [ | ||||
|                     Timestamp("2000-01-04"), | ||||
|                     Timestamp("2000-01-08"), | ||||
|                     Timestamp("2000-01-12"), | ||||
|                 ], | ||||
|             ), | ||||
|             ( | ||||
|                 pd.timedelta_range(start="1 day", periods=20), | ||||
|                 ["4D", "8D", "12D"], | ||||
|                 [pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_loc_with_list_of_strings_representing_datetimes( | ||||
|         self, idx, labels, expected_idx, frame_or_series | ||||
|     ): | ||||
|         # GH 11278 | ||||
|         obj = frame_or_series(range(20), index=idx) | ||||
|  | ||||
|         expected_value = [3, 7, 11] | ||||
|         expected = frame_or_series(expected_value, expected_idx) | ||||
|  | ||||
|         tm.assert_equal(expected, obj.loc[labels]) | ||||
|         if frame_or_series is Series: | ||||
|             tm.assert_series_equal(expected, obj[labels]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx,labels", | ||||
|         [ | ||||
|             ( | ||||
|                 period_range(start="2000", periods=20, freq="D"), | ||||
|                 ["2000-01-04", "2000-01-30"], | ||||
|             ), | ||||
|             ( | ||||
|                 date_range(start="2000", periods=20, freq="D"), | ||||
|                 ["2000-01-04", "2000-01-30"], | ||||
|             ), | ||||
|             (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_loc_with_list_of_strings_representing_datetimes_missing_value( | ||||
|         self, idx, labels | ||||
|     ): | ||||
|         # GH 11278 | ||||
|         ser = Series(range(20), index=idx) | ||||
|         df = DataFrame(range(20), index=idx) | ||||
|         msg = r"not in index" | ||||
|  | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.loc[labels] | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser[labels] | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             df.loc[labels] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx,labels,msg", | ||||
|         [ | ||||
|             ( | ||||
|                 period_range(start="2000", periods=20, freq="D"), | ||||
|                 Index(["4D", "8D"], dtype=object), | ||||
|                 ( | ||||
|                     r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " | ||||
|                     r"are in the \[index\]" | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 date_range(start="2000", periods=20, freq="D"), | ||||
|                 Index(["4D", "8D"], dtype=object), | ||||
|                 ( | ||||
|                     r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " | ||||
|                     r"are in the \[index\]" | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 pd.timedelta_range(start="1 day", periods=20), | ||||
|                 Index(["2000-01-04", "2000-01-08"], dtype=object), | ||||
|                 ( | ||||
|                     r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " | ||||
|                     r"dtype='object'\)\] are in the \[index\]" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( | ||||
|         self, idx, labels, msg | ||||
|     ): | ||||
|         # GH 11278 | ||||
|         ser = Series(range(20), index=idx) | ||||
|         df = DataFrame(range(20), index=idx) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.loc[labels] | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser[labels] | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             df.loc[labels] | ||||
|  | ||||
|  | ||||
| class TestStringSlicing: | ||||
|     def test_slice_irregular_datetime_index_with_nan(self): | ||||
|         # GH36953 | ||||
|         index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None]) | ||||
|         df = DataFrame(range(len(index)), index=index) | ||||
|         expected = DataFrame(range(len(index[:3])), index=index[:3]) | ||||
|         with pytest.raises(KeyError, match="non-existing keys is not allowed"): | ||||
|             # Upper bound is not in index (which is unordered) | ||||
|             # GH53983 | ||||
|             # GH37819 | ||||
|             df["2012-01-01":"2012-01-04"] | ||||
|         # Need this precision for right bound since the right slice | ||||
|         # bound is "rounded" up to the largest timepoint smaller than | ||||
|         # the next "resolution"-step of the provided point. | ||||
|         # e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns | ||||
|         result = df["2012-01-01":"2012-01-03 00:00:00.000000000"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,303 @@ | ||||
| """ test scalar indexing, including at and iat """ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def generate_indices(f, values=False): | ||||
|     """ | ||||
|     generate the indices | ||||
|     if values is True , use the axis values | ||||
|     is False, use the range | ||||
|     """ | ||||
|     axes = f.axes | ||||
|     if values: | ||||
|         axes = (list(range(len(ax))) for ax in axes) | ||||
|  | ||||
|     return itertools.product(*axes) | ||||
|  | ||||
|  | ||||
| class TestScalar: | ||||
|     @pytest.mark.parametrize("kind", ["series", "frame"]) | ||||
|     @pytest.mark.parametrize("col", ["ints", "uints"]) | ||||
|     def test_iat_set_ints(self, kind, col, request): | ||||
|         f = request.getfixturevalue(f"{kind}_{col}") | ||||
|         indices = generate_indices(f, True) | ||||
|         for i in indices: | ||||
|             f.iat[i] = 1 | ||||
|             expected = f.values[i] | ||||
|             tm.assert_almost_equal(expected, 1) | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["series", "frame"]) | ||||
|     @pytest.mark.parametrize("col", ["labels", "ts", "floats"]) | ||||
|     def test_iat_set_other(self, kind, col, request): | ||||
|         f = request.getfixturevalue(f"{kind}_{col}") | ||||
|         msg = "iAt based indexing can only have integer indexers" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx = next(generate_indices(f, False)) | ||||
|             f.iat[idx] = 1 | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["series", "frame"]) | ||||
|     @pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"]) | ||||
|     def test_at_set_ints_other(self, kind, col, request): | ||||
|         f = request.getfixturevalue(f"{kind}_{col}") | ||||
|         indices = generate_indices(f, False) | ||||
|         for i in indices: | ||||
|             f.at[i] = 1 | ||||
|             expected = f.loc[i] | ||||
|             tm.assert_almost_equal(expected, 1) | ||||
|  | ||||
|  | ||||
| class TestAtAndiAT: | ||||
|     # at and iat tests that don't need Base class | ||||
|  | ||||
|     def test_float_index_at_iat(self): | ||||
|         ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) | ||||
|         for el, item in ser.items(): | ||||
|             assert ser.at[el] == item | ||||
|         for i in range(len(ser)): | ||||
|             assert ser.iat[i] == i + 1 | ||||
|  | ||||
|     def test_at_iat_coercion(self): | ||||
|         # as timestamp is not a tuple! | ||||
|         dates = date_range("1/1/2000", periods=8) | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((8, 4)), | ||||
|             index=dates, | ||||
|             columns=["A", "B", "C", "D"], | ||||
|         ) | ||||
|         s = df["A"] | ||||
|  | ||||
|         result = s.at[dates[5]] | ||||
|         xp = s.values[5] | ||||
|         assert result == xp | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser, expected", | ||||
|         [ | ||||
|             [ | ||||
|                 Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]"), | ||||
|                 Timestamp("2014-02-02"), | ||||
|             ], | ||||
|             [ | ||||
|                 Series(["1 days", "2 days"], dtype="timedelta64[ns]"), | ||||
|                 Timedelta("2 days"), | ||||
|             ], | ||||
|         ], | ||||
|     ) | ||||
|     def test_iloc_iat_coercion_datelike(self, indexer_ial, ser, expected): | ||||
|         # GH 7729 | ||||
|         # make sure we are boxing the returns | ||||
|         result = indexer_ial(ser)[1] | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_imethods_with_dups(self): | ||||
|         # GH6493 | ||||
|         # iat/iloc with dups | ||||
|  | ||||
|         s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64") | ||||
|         result = s.iloc[2] | ||||
|         assert result == 2 | ||||
|         result = s.iat[2] | ||||
|         assert result == 2 | ||||
|  | ||||
|         msg = "index 10 is out of bounds for axis 0 with size 5" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             s.iat[10] | ||||
|         msg = "index -10 is out of bounds for axis 0 with size 5" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             s.iat[-10] | ||||
|  | ||||
|         result = s.iloc[[2, 3]] | ||||
|         expected = Series([2, 3], [2, 2], dtype="int64") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         df = s.to_frame() | ||||
|         result = df.iloc[2] | ||||
|         expected = Series(2, index=[0], name=2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.iat[2, 0] | ||||
|         assert result == 2 | ||||
|  | ||||
|     def test_frame_at_with_duplicate_axes(self): | ||||
|         # GH#33041 | ||||
|         arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2) | ||||
|         df = DataFrame(arr, columns=["A", "A"]) | ||||
|  | ||||
|         result = df.at[0, "A"] | ||||
|         expected = df.iloc[0].copy() | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = df.T.at["A", 0] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # setter | ||||
|         df.at[1, "A"] = 2 | ||||
|         expected = Series([2.0, 2.0], index=["A", "A"], name=1) | ||||
|         tm.assert_series_equal(df.iloc[1], expected) | ||||
|  | ||||
|     def test_at_getitem_dt64tz_values(self): | ||||
|         # gh-15822 | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "name": ["John", "Anderson"], | ||||
|                 "date": [ | ||||
|                     Timestamp(2017, 3, 13, 13, 32, 56), | ||||
|                     Timestamp(2017, 2, 16, 12, 10, 3), | ||||
|                 ], | ||||
|             } | ||||
|         ) | ||||
|         df["date"] = df["date"].dt.tz_localize("Asia/Shanghai") | ||||
|  | ||||
|         expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai") | ||||
|  | ||||
|         result = df.loc[0, "date"] | ||||
|         assert result == expected | ||||
|  | ||||
|         result = df.at[0, "date"] | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_mixed_index_at_iat_loc_iloc_series(self): | ||||
|         # GH 19860 | ||||
|         s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) | ||||
|         for el, item in s.items(): | ||||
|             assert s.at[el] == s.loc[el] == item | ||||
|         for i in range(len(s)): | ||||
|             assert s.iat[i] == s.iloc[i] == i + 1 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="^4$"): | ||||
|             s.at[4] | ||||
|         with pytest.raises(KeyError, match="^4$"): | ||||
|             s.loc[4] | ||||
|  | ||||
|     def test_mixed_index_at_iat_loc_iloc_dataframe(self): | ||||
|         # GH 19860 | ||||
|         df = DataFrame( | ||||
|             [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2] | ||||
|         ) | ||||
|         for rowIdx, row in df.iterrows(): | ||||
|             for el, item in row.items(): | ||||
|                 assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item | ||||
|  | ||||
|         for row in range(2): | ||||
|             for i in range(5): | ||||
|                 assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i | ||||
|  | ||||
|         with pytest.raises(KeyError, match="^3$"): | ||||
|             df.at[0, 3] | ||||
|         with pytest.raises(KeyError, match="^3$"): | ||||
|             df.loc[0, 3] | ||||
|  | ||||
|     def test_iat_setter_incompatible_assignment(self): | ||||
|         # GH 23236 | ||||
|         result = DataFrame({"a": [0.0, 1.0], "b": [4, 5]}) | ||||
|         result.iat[0, 0] = None | ||||
|         expected = DataFrame({"a": [None, 1], "b": [4, 5]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_iat_dont_wrap_object_datetimelike(): | ||||
|     # GH#32809 .iat calls go through DataFrame._get_value, should not | ||||
|     #  call maybe_box_datetimelike | ||||
|     dti = date_range("2016-01-01", periods=3) | ||||
|     tdi = dti - dti | ||||
|     ser = Series(dti.to_pydatetime(), dtype=object) | ||||
|     ser2 = Series(tdi.to_pytimedelta(), dtype=object) | ||||
|     df = DataFrame({"A": ser, "B": ser2}) | ||||
|     assert (df.dtypes == object).all() | ||||
|  | ||||
|     for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]: | ||||
|         assert result is ser[0] | ||||
|         assert isinstance(result, datetime) | ||||
|         assert not isinstance(result, Timestamp) | ||||
|  | ||||
|     for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]: | ||||
|         assert result is ser2[1] | ||||
|         assert isinstance(result, timedelta) | ||||
|         assert not isinstance(result, Timedelta) | ||||
|  | ||||
|  | ||||
| def test_at_with_tuple_index_get(): | ||||
|     # GH 26989 | ||||
|     # DataFrame.at getter works with Index of tuples | ||||
|     df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) | ||||
|     assert df.index.nlevels == 1 | ||||
|     assert df.at[(1, 2), "a"] == 1 | ||||
|  | ||||
|     # Series.at getter works with Index of tuples | ||||
|     series = df["a"] | ||||
|     assert series.index.nlevels == 1 | ||||
|     assert series.at[(1, 2)] == 1 | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
| def test_at_with_tuple_index_set(): | ||||
|     # GH 26989 | ||||
|     # DataFrame.at setter works with Index of tuples | ||||
|     df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)]) | ||||
|     assert df.index.nlevels == 1 | ||||
|     df.at[(1, 2), "a"] = 2 | ||||
|     assert df.at[(1, 2), "a"] == 2 | ||||
|  | ||||
|     # Series.at setter works with Index of tuples | ||||
|     series = df["a"] | ||||
|     assert series.index.nlevels == 1 | ||||
|     series.at[1, 2] = 3 | ||||
|     assert series.at[1, 2] == 3 | ||||
|  | ||||
|  | ||||
| class TestMultiIndexScalar: | ||||
|     def test_multiindex_at_get(self): | ||||
|         # GH 26989 | ||||
|         # DataFrame.at and DataFrame.loc getter works with MultiIndex | ||||
|         df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) | ||||
|         assert df.index.nlevels == 2 | ||||
|         assert df.at[(1, 3), "a"] == 1 | ||||
|         assert df.loc[(1, 3), "a"] == 1 | ||||
|  | ||||
|         # Series.at and Series.loc getter works with MultiIndex | ||||
|         series = df["a"] | ||||
|         assert series.index.nlevels == 2 | ||||
|         assert series.at[1, 3] == 1 | ||||
|         assert series.loc[1, 3] == 1 | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
|     def test_multiindex_at_set(self): | ||||
|         # GH 26989 | ||||
|         # DataFrame.at and DataFrame.loc setter works with MultiIndex | ||||
|         df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]]) | ||||
|         assert df.index.nlevels == 2 | ||||
|         df.at[(1, 3), "a"] = 3 | ||||
|         assert df.at[(1, 3), "a"] == 3 | ||||
|         df.loc[(1, 3), "a"] = 4 | ||||
|         assert df.loc[(1, 3), "a"] == 4 | ||||
|  | ||||
|         # Series.at and Series.loc setter works with MultiIndex | ||||
|         series = df["a"] | ||||
|         assert series.index.nlevels == 2 | ||||
|         series.at[1, 3] = 5 | ||||
|         assert series.at[1, 3] == 5 | ||||
|         series.loc[1, 3] = 6 | ||||
|         assert series.loc[1, 3] == 6 | ||||
|  | ||||
|     def test_multiindex_at_get_one_level(self): | ||||
|         # GH#38053 | ||||
|         s2 = Series((0, 1), index=[[False, True]]) | ||||
|         result = s2.at[False] | ||||
|         assert result == 0 | ||||
		Reference in New Issue
	
	Block a user