done
This commit is contained in:
		| @ -0,0 +1,62 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAppend: | ||||
|     @pytest.fixture | ||||
|     def ci(self): | ||||
|         categories = list("cab") | ||||
|         return CategoricalIndex(list("aabbca"), categories=categories, ordered=False) | ||||
|  | ||||
|     def test_append(self, ci): | ||||
|         # append cats with the same categories | ||||
|         result = ci[:3].append(ci[3:]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         foos = [ci[:1], ci[1:3], ci[3:]] | ||||
|         result = foos[0].append(foos[1:]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|     def test_append_empty(self, ci): | ||||
|         # empty | ||||
|         result = ci.append([]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|     def test_append_mismatched_categories(self, ci): | ||||
|         # appending with different categories or reordered is not ok | ||||
|         msg = "all inputs must be Index" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci.append(ci.values.set_categories(list("abcd"))) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci.append(ci.values.reorder_categories(list("abc"))) | ||||
|  | ||||
|     def test_append_category_objects(self, ci): | ||||
|         # with objects | ||||
|         result = ci.append(Index(["c", "a"])) | ||||
|         expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_non_categories(self, ci): | ||||
|         # invalid objects -> cast to object via concat_compat | ||||
|         result = ci.append(Index(["a", "d"])) | ||||
|         expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"]) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_object(self, ci): | ||||
|         # GH#14298 - if base object is not categorical -> coerce to object | ||||
|         result = Index(["c", "a"]).append(ci) | ||||
|         expected = Index(list("caaabbca")) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_to_another(self): | ||||
|         # hits Index._concat | ||||
|         fst = Index(["a", "b"]) | ||||
|         snd = CategoricalIndex(["d", "e"]) | ||||
|         result = fst.append(snd) | ||||
|         expected = Index(["a", "b", "d", "e"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,90 @@ | ||||
| from datetime import date | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     def test_astype(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|         result = ci.astype(object) | ||||
|         tm.assert_index_equal(result, Index(np.array(ci), dtype=object)) | ||||
|  | ||||
|         # this IS equal, but not the same class | ||||
|         assert result.equals(ci) | ||||
|         assert isinstance(result, Index) | ||||
|         assert not isinstance(result, CategoricalIndex) | ||||
|  | ||||
|         # interval | ||||
|         ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right") | ||||
|  | ||||
|         ci = CategoricalIndex( | ||||
|             Categorical.from_codes([0, 1, -1], categories=ii, ordered=True) | ||||
|         ) | ||||
|  | ||||
|         result = ci.astype("interval") | ||||
|         expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = IntervalIndex(result.values) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("name", [None, "foo"]) | ||||
|     @pytest.mark.parametrize("dtype_ordered", [True, False]) | ||||
|     @pytest.mark.parametrize("index_ordered", [True, False]) | ||||
|     def test_astype_category(self, name, dtype_ordered, index_ordered): | ||||
|         # GH#18630 | ||||
|         index = CategoricalIndex( | ||||
|             list("aabbca"), categories=list("cab"), ordered=index_ordered | ||||
|         ) | ||||
|         if name: | ||||
|             index = index.rename(name) | ||||
|  | ||||
|         # standard categories | ||||
|         dtype = CategoricalDtype(ordered=dtype_ordered) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex( | ||||
|             index.tolist(), | ||||
|             name=name, | ||||
|             categories=index.categories, | ||||
|             ordered=dtype_ordered, | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # non-standard categories | ||||
|         dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         if dtype_ordered is False: | ||||
|             # dtype='category' can't specify ordered, so only test once | ||||
|             result = index.astype("category") | ||||
|             expected = index | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [True, False]) | ||||
|     def test_categorical_date_roundtrip(self, box): | ||||
|         # astype to categorical and back should preserve date objects | ||||
|         v = date.today() | ||||
|  | ||||
|         obj = Index([v, v]) | ||||
|         assert obj.dtype == object | ||||
|         if box: | ||||
|             obj = obj.array | ||||
|  | ||||
|         cat = obj.astype("category") | ||||
|  | ||||
|         rtrip = cat.astype(object) | ||||
|         assert rtrip.dtype == object | ||||
|         assert type(rtrip[0]) is date | ||||
| @ -0,0 +1,391 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
| from pandas._libs.arrays import NDArrayBacked | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.indexes.api import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndex: | ||||
|     @pytest.fixture | ||||
|     def simple_index(self) -> CategoricalIndex: | ||||
|         return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|     def test_can_hold_identifiers(self): | ||||
|         idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False) | ||||
|         key = idx[0] | ||||
|         assert idx._can_hold_identifiers_and_holds_name(key) is True | ||||
|  | ||||
|     def test_insert(self, simple_index): | ||||
|         ci = simple_index | ||||
|         categories = ci.categories | ||||
|  | ||||
|         # test 0th element | ||||
|         result = ci.insert(0, "a") | ||||
|         expected = CategoricalIndex(list("aaabbca"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # test Nth element that follows Python list behavior | ||||
|         result = ci.insert(-1, "a") | ||||
|         expected = CategoricalIndex(list("aabbcaa"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # test empty | ||||
|         result = CategoricalIndex([], categories=categories).insert(0, "a") | ||||
|         expected = CategoricalIndex(["a"], categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # invalid -> cast to object | ||||
|         expected = ci.astype(object).insert(0, "d") | ||||
|         result = ci.insert(0, "d").astype(object) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # GH 18295 (test missing) | ||||
|         expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"]) | ||||
|         for na in (np.nan, pd.NaT, None): | ||||
|             result = CategoricalIndex(list("aabcb")).insert(1, na) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_na_mismatched_dtype(self): | ||||
|         ci = CategoricalIndex([0, 1, 1]) | ||||
|         result = ci.insert(0, pd.NaT) | ||||
|         expected = Index([pd.NaT, 0, 1, 1], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_delete(self, simple_index): | ||||
|         ci = simple_index | ||||
|         categories = ci.categories | ||||
|  | ||||
|         result = ci.delete(0) | ||||
|         expected = CategoricalIndex(list("abbca"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         result = ci.delete(-1) | ||||
|         expected = CategoricalIndex(list("aabbc"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         with tm.external_error_raised((IndexError, ValueError)): | ||||
|             # Either depending on NumPy version | ||||
|             ci.delete(10) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, non_lexsorted_data", | ||||
|         [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], | ||||
|     ) | ||||
|     def test_is_monotonic(self, data, non_lexsorted_data): | ||||
|         c = CategoricalIndex(data) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(data, ordered=True) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(data, categories=reversed(data)) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is True | ||||
|  | ||||
|         c = CategoricalIndex(data, categories=reversed(data), ordered=True) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is True | ||||
|  | ||||
|         # test when data is neither monotonic increasing nor decreasing | ||||
|         reordered_data = [data[0], data[2], data[1]] | ||||
|         c = CategoricalIndex(reordered_data, categories=reversed(data)) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         # non lexsorted categories | ||||
|         categories = non_lexsorted_data | ||||
|  | ||||
|         c = CategoricalIndex(categories[:2], categories=categories) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(categories[1:3], categories=categories) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|     def test_has_duplicates(self): | ||||
|         idx = CategoricalIndex([0, 0, 0], name="foo") | ||||
|         assert idx.is_unique is False | ||||
|         assert idx.has_duplicates is True | ||||
|  | ||||
|         idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo") | ||||
|         assert idx.is_unique is False | ||||
|         assert idx.has_duplicates is True | ||||
|  | ||||
|         idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo") | ||||
|         assert idx.is_unique is True | ||||
|         assert idx.has_duplicates is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, categories, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 [1, 1, 1], | ||||
|                 [1, 2, 3], | ||||
|                 { | ||||
|                     "first": np.array([False, True, True]), | ||||
|                     "last": np.array([True, True, False]), | ||||
|                     False: np.array([True, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 [1, 1, 1], | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.array([False, True, True]), | ||||
|                     "last": np.array([True, True, False]), | ||||
|                     False: np.array([True, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 [2, "a", "b"], | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.zeros(shape=(3), dtype=np.bool_), | ||||
|                     "last": np.zeros(shape=(3), dtype=np.bool_), | ||||
|                     False: np.zeros(shape=(3), dtype=np.bool_), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 list("abb"), | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.array([False, False, True]), | ||||
|                     "last": np.array([False, True, False]), | ||||
|                     False: np.array([False, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_drop_duplicates(self, data, categories, expected): | ||||
|         idx = CategoricalIndex(data, categories=categories, name="foo") | ||||
|         for keep, e in expected.items(): | ||||
|             tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) | ||||
|             e = idx[~e] | ||||
|             result = idx.drop_duplicates(keep=keep) | ||||
|             tm.assert_index_equal(result, e) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, categories, expected_data", | ||||
|         [ | ||||
|             ([1, 1, 1], [1, 2, 3], [1]), | ||||
|             ([1, 1, 1], list("abc"), [np.nan]), | ||||
|             ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]), | ||||
|             ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_unique(self, data, categories, expected_data, ordered): | ||||
|         dtype = CategoricalDtype(categories, ordered=ordered) | ||||
|  | ||||
|         idx = CategoricalIndex(data, dtype=dtype) | ||||
|         expected = CategoricalIndex(expected_data, dtype=dtype) | ||||
|         tm.assert_index_equal(idx.unique(), expected) | ||||
|  | ||||
|     def test_repr_roundtrip(self): | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         str(ci) | ||||
|         tm.assert_index_equal(eval(repr(ci)), ci, exact=True) | ||||
|  | ||||
|         # formatting | ||||
|         str(ci) | ||||
|  | ||||
|         # long format | ||||
|         # this is not reprable | ||||
|         ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100)) | ||||
|         str(ci) | ||||
|  | ||||
|     def test_isin(self): | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c"]), np.array([False, False, False, True, False, False]) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False]) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6) | ||||
|         ) | ||||
|  | ||||
|         # mismatched categorical -> coerced to ndarray so doesn't matter | ||||
|         result = ci.isin(ci.set_categories(list("abcdefghi"))) | ||||
|         expected = np.array([True] * 6) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = ci.isin(ci.set_categories(list("defghi"))) | ||||
|         expected = np.array([False] * 5 + [True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_isin_overlapping_intervals(self): | ||||
|         # GH 34974 | ||||
|         idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)]) | ||||
|         result = CategoricalIndex(idx).isin(idx) | ||||
|         expected = np.array([True, True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_identical(self): | ||||
|         ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) | ||||
|         assert ci1.identical(ci1) | ||||
|         assert ci1.identical(ci1.copy()) | ||||
|         assert not ci1.identical(ci2) | ||||
|  | ||||
|     def test_ensure_copied_data(self): | ||||
|         # gh-12309: Check the "copy" argument of each | ||||
|         # Index.__new__ is honored. | ||||
|         # | ||||
|         # Must be tested separately from other indexes because | ||||
|         # self.values is not an ndarray. | ||||
|         index = CategoricalIndex(list("ab") * 5) | ||||
|  | ||||
|         result = CategoricalIndex(index.values, copy=True) | ||||
|         tm.assert_index_equal(index, result) | ||||
|         assert not np.shares_memory(result._data._codes, index._data._codes) | ||||
|  | ||||
|         result = CategoricalIndex(index.values, copy=False) | ||||
|         assert result._data._codes is index._data._codes | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndex2: | ||||
|     def test_view_i8(self): | ||||
|         # GH#25464 | ||||
|         ci = CategoricalIndex(list("ab") * 50) | ||||
|         msg = "When changing to a larger dtype, its size must be a divisor" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.view("i8") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci._data.view("i8") | ||||
|  | ||||
|         ci = ci[:-4]  # length divisible by 8 | ||||
|  | ||||
|         res = ci.view("i8") | ||||
|         expected = ci._data.codes.view("i8") | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         cat = ci._data | ||||
|         tm.assert_numpy_array_equal(cat.view("i8"), expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, engine_type", | ||||
|         [ | ||||
|             (np.int8, libindex.Int8Engine), | ||||
|             (np.int16, libindex.Int16Engine), | ||||
|             (np.int32, libindex.Int32Engine), | ||||
|             (np.int64, libindex.Int64Engine), | ||||
|         ], | ||||
|     ) | ||||
|     def test_engine_type(self, dtype, engine_type): | ||||
|         if dtype != np.int64: | ||||
|             # num. of uniques required to push CategoricalIndex.codes to a | ||||
|             # dtype (128 categories required for .codes dtype to be int16 etc.) | ||||
|             num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] | ||||
|             ci = CategoricalIndex(range(num_uniques)) | ||||
|         else: | ||||
|             # having 2**32 - 2**31 categories would be very memory-intensive, | ||||
|             # so we cheat a bit with the dtype | ||||
|             ci = CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1) | ||||
|             arr = ci.values._ndarray.astype("int64") | ||||
|             NDArrayBacked.__init__(ci._data, arr, ci.dtype) | ||||
|         assert np.issubdtype(ci.codes.dtype, dtype) | ||||
|         assert isinstance(ci._engine, engine_type) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "func,op_name", | ||||
|         [ | ||||
|             (lambda idx: idx - idx, "__sub__"), | ||||
|             (lambda idx: idx + idx, "__add__"), | ||||
|             (lambda idx: idx - ["a", "b"], "__sub__"), | ||||
|             (lambda idx: idx + ["a", "b"], "__add__"), | ||||
|             (lambda idx: ["a", "b"] - idx, "__rsub__"), | ||||
|             (lambda idx: ["a", "b"] + idx, "__radd__"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_disallow_addsub_ops(self, func, op_name): | ||||
|         # GH 10039 | ||||
|         # set ops (+/-) raise TypeError | ||||
|         idx = Index(Categorical(["a", "b"])) | ||||
|         cat_or_list = "'(Categorical|list)' and '(Categorical|list)'" | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 f"cannot perform {op_name} with this index type: CategoricalIndex", | ||||
|                 "can only concatenate list", | ||||
|                 rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             func(idx) | ||||
|  | ||||
|     def test_method_delegation(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.set_categories(list("cab")) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.rename_categories(list("efg")) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("ffggef"), categories=list("efg")) | ||||
|         ) | ||||
|  | ||||
|         # GH18862 (let rename_categories take callables) | ||||
|         result = ci.rename_categories(lambda x: x.upper()) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("AABBCA"), categories=list("CAB")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.add_categories(["d"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("aabbca"), categories=list("cabd")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.remove_categories(["c"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.as_unordered() | ||||
|         tm.assert_index_equal(result, ci) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.as_ordered() | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), | ||||
|         ) | ||||
|  | ||||
|         # invalid | ||||
|         msg = "cannot use inplace with CategoricalIndex" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.set_categories(list("cab"), inplace=True) | ||||
|  | ||||
|     def test_remove_maintains_order(self): | ||||
|         ci = CategoricalIndex(list("abcdda"), categories=list("abcd")) | ||||
|         result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True), | ||||
|         ) | ||||
|         result = result.remove_categories(["c"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex( | ||||
|                 ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True | ||||
|             ), | ||||
|         ) | ||||
| @ -0,0 +1,142 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexConstructors: | ||||
|     def test_construction_disallows_scalar(self): | ||||
|         msg = "must be called with a collection of some kind" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             CategoricalIndex(data=1, categories=list("abcd"), ordered=False) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             CategoricalIndex(categories=list("abcd"), ordered=False) | ||||
|  | ||||
|     def test_construction(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False) | ||||
|         categories = ci.categories | ||||
|  | ||||
|         result = Index(ci) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = Index(ci.values) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         # empty | ||||
|         result = CategoricalIndex([], categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8")) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         # passing categories | ||||
|         result = CategoricalIndex(list("aabbca"), categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|  | ||||
|         c = Categorical(list("aabbca")) | ||||
|         result = CategoricalIndex(c) | ||||
|         tm.assert_index_equal(result.categories, Index(list("abc"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(c, categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         ci = CategoricalIndex(c, categories=list("abcd")) | ||||
|         result = CategoricalIndex(ci) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab")) | ||||
|         tm.assert_index_equal(result.categories, Index(list("ab"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab"), ordered=True) | ||||
|         tm.assert_index_equal(result.categories, Index(list("ab"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") | ||||
|         ) | ||||
|         assert result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab"), ordered=True) | ||||
|         expected = CategoricalIndex( | ||||
|             ci, categories=list("ab"), ordered=True, dtype="category" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # turn me to an Index | ||||
|         result = Index(np.array(ci)) | ||||
|         assert isinstance(result, Index) | ||||
|         assert not isinstance(result, CategoricalIndex) | ||||
|  | ||||
|     def test_construction_with_dtype(self): | ||||
|         # specify dtype | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False) | ||||
|  | ||||
|         result = Index(np.array(ci), dtype="category") | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         result = Index(np.array(ci).tolist(), dtype="category") | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         # these are generally only equal when the categories are reordered | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|         result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         # make sure indexes are handled | ||||
|         idx = Index(range(3)) | ||||
|         expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True) | ||||
|         result = CategoricalIndex(idx, categories=idx, ordered=True) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_construction_empty_with_bool_categories(self): | ||||
|         # see GH#22702 | ||||
|         cat = CategoricalIndex([], categories=[True, False]) | ||||
|         categories = sorted(cat.categories.tolist()) | ||||
|         assert categories == [False, True] | ||||
|  | ||||
|     def test_construction_with_categorical_dtype(self): | ||||
|         # construction with CategoricalDtype | ||||
|         # GH#18109 | ||||
|         data, cats, ordered = "a a b b".split(), "c b a".split(), True | ||||
|         dtype = CategoricalDtype(categories=cats, ordered=ordered) | ||||
|  | ||||
|         result = CategoricalIndex(data, dtype=dtype) | ||||
|         expected = CategoricalIndex(data, categories=cats, ordered=ordered) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # GH#19032 | ||||
|         result = Index(data, dtype=dtype) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # error when combining categories/ordered and dtype kwargs | ||||
|         msg = "Cannot specify `categories` or `ordered` together with `dtype`." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             CategoricalIndex(data, categories=cats, dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             CategoricalIndex(data, ordered=ordered, dtype=dtype) | ||||
| @ -0,0 +1,96 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestEquals: | ||||
|     def test_equals_categorical(self): | ||||
|         ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) | ||||
|  | ||||
|         assert ci1.equals(ci1) | ||||
|         assert not ci1.equals(ci2) | ||||
|         assert ci1.equals(ci1.astype(object)) | ||||
|         assert ci1.astype(object).equals(ci1) | ||||
|  | ||||
|         assert (ci1 == ci1).all() | ||||
|         assert not (ci1 != ci1).all() | ||||
|         assert not (ci1 > ci1).all() | ||||
|         assert not (ci1 < ci1).all() | ||||
|         assert (ci1 <= ci1).all() | ||||
|         assert (ci1 >= ci1).all() | ||||
|  | ||||
|         assert not (ci1 == 1).all() | ||||
|         assert (ci1 == Index(["a", "b"])).all() | ||||
|         assert (ci1 == ci1.values).all() | ||||
|  | ||||
|         # invalid comparisons | ||||
|         with pytest.raises(ValueError, match="Lengths must match"): | ||||
|             ci1 == Index(["a", "b", "c"]) | ||||
|  | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == ci2 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == Categorical(ci1.values, ordered=False) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == Categorical(ci1.values, categories=list("abc")) | ||||
|  | ||||
|         # tests | ||||
|         # make sure that we are testing for category inclusion properly | ||||
|         ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca")) | ||||
|         # Same categories, but different order | ||||
|         # Unordered | ||||
|         assert ci.equals(CategoricalIndex(list("aabca"))) | ||||
|         # Ordered | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca")) | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca"))) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca") + [np.nan]) | ||||
|         assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True)) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|     def test_equals_categorical_unordered(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16603 | ||||
|         a = CategoricalIndex(["A"], categories=["A", "B"]) | ||||
|         b = CategoricalIndex(["A"], categories=["B", "A"]) | ||||
|         c = CategoricalIndex(["C"], categories=["B", "A"]) | ||||
|         assert a.equals(b) | ||||
|         assert not a.equals(c) | ||||
|         assert not b.equals(c) | ||||
|  | ||||
|     def test_equals_non_category(self): | ||||
|         # GH#37667 Case where other contains a value not among ci's | ||||
|         #  categories ("D") and also contains np.nan | ||||
|         ci = CategoricalIndex(["A", "B", np.nan, np.nan]) | ||||
|         other = Index(["A", "B", "D", np.nan]) | ||||
|  | ||||
|         assert not ci.equals(other) | ||||
|  | ||||
|     def test_equals_multiindex(self): | ||||
|         # dont raise NotImplementedError when calling is_dtype_compat | ||||
|  | ||||
|         mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)]) | ||||
|         ci = mi.to_flat_index().astype("category") | ||||
|  | ||||
|         assert not ci.equals(mi) | ||||
|  | ||||
|     def test_equals_string_dtype(self, any_string_dtype): | ||||
|         # GH#55364 | ||||
|         idx = CategoricalIndex(list("abc"), name="B") | ||||
|         other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype) | ||||
|         assert idx.equals(other) | ||||
| @ -0,0 +1,54 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import CategoricalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestFillNA: | ||||
|     def test_fillna_categorical(self): | ||||
|         # GH#11343 | ||||
|         idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") | ||||
|         # fill by value in categories | ||||
|         exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") | ||||
|         tm.assert_index_equal(idx.fillna(1.0), exp) | ||||
|  | ||||
|         cat = idx._data | ||||
|  | ||||
|         # fill by value not in categories raises TypeError on EA, casts on CI | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(2.0) | ||||
|  | ||||
|         result = idx.fillna(2.0) | ||||
|         expected = idx.astype(object).fillna(2.0) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_copies_with_no_nas(self): | ||||
|         # Nothing to fill, should still get a copy for the Categorical method, | ||||
|         #  but OK to get a view on CategoricalIndex method | ||||
|         ci = CategoricalIndex([0, 1, 1]) | ||||
|         result = ci.fillna(0) | ||||
|         assert result is not ci | ||||
|         assert tm.shares_memory(result, ci) | ||||
|  | ||||
|         # But at the EA level we always get a copy. | ||||
|         cat = ci._data | ||||
|         result = cat.fillna(0) | ||||
|         assert result._ndarray is not cat._ndarray | ||||
|         assert result._ndarray.base is None | ||||
|         assert not tm.shares_memory(result, cat) | ||||
|  | ||||
|     def test_fillna_validates_with_no_nas(self): | ||||
|         # We validate the fill value even if fillna is a no-op | ||||
|         ci = CategoricalIndex([2, 3, 3]) | ||||
|         cat = ci._data | ||||
|  | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         res = ci.fillna(False) | ||||
|         # nothing to fill, so we dont cast | ||||
|         tm.assert_index_equal(res, ci) | ||||
|  | ||||
|         # Same check directly on the Categorical | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(False) | ||||
| @ -0,0 +1,120 @@ | ||||
| """ | ||||
| Tests for CategoricalIndex.__repr__ and related methods. | ||||
| """ | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
| import pandas._config.config as cf | ||||
|  | ||||
| from pandas import CategoricalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexRepr: | ||||
|     def test_format_different_scalar_lengths(self): | ||||
|         # GH#35439 | ||||
|         idx = CategoricalIndex(["aaaaaaaaa", "b"]) | ||||
|         expected = ["aaaaaaaaa", "b"] | ||||
|         msg = r"CategoricalIndex\.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert idx.format() == expected | ||||
|  | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="repr different") | ||||
|     def test_string_categorical_index_repr(self): | ||||
|         # short | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"]) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # multiple lines | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"] * 10) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', | ||||
|                   'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', | ||||
|                   'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], | ||||
|                  categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # truncated | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"] * 100) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', | ||||
|                   ... | ||||
|                   'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], | ||||
|                  categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # larger categories | ||||
|         idx = CategoricalIndex(list("abcdefghijklmmo")) | ||||
|         expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', | ||||
|                   'm', 'm', 'o'], | ||||
|                  categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # short | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"]) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # multiple lines | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', | ||||
|                   'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # truncated | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', | ||||
|                   ... | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # larger categories | ||||
|         idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) | ||||
|         expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', | ||||
|                   'す', 'せ', 'そ'], | ||||
|                  categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # Enable Unicode option ----------------------------------------- | ||||
|         with cf.option_context("display.unicode.east_asian_width", True): | ||||
|             # short | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"]) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # multiple lines | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', | ||||
|                   'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # truncated | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', | ||||
|                   ... | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', | ||||
|                   'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # larger categories | ||||
|             idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) | ||||
|             expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', | ||||
|                   'さ', 'し', 'す', 'せ', 'そ'], | ||||
|                  categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
| @ -0,0 +1,420 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take_fill_value(self): | ||||
|         # GH 12631 | ||||
|  | ||||
|         # numeric category | ||||
|         idx = CategoricalIndex([1, 2, 3], name="xxx") | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = CategoricalIndex([2, 1, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = CategoricalIndex([2, 1, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # object category | ||||
|         idx = CategoricalIndex( | ||||
|             list("CBA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = CategoricalIndex( | ||||
|             list("BCA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = CategoricalIndex( | ||||
|             ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = CategoricalIndex( | ||||
|             list("BCA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "index -5 is out of bounds for (axis 0 with )?size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|     def test_take_fill_value_datetime(self): | ||||
|         # datetime category | ||||
|         idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") | ||||
|         idx = CategoricalIndex(idx) | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = pd.DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" | ||||
|         ) | ||||
|         expected = CategoricalIndex(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") | ||||
|         exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) | ||||
|         expected = CategoricalIndex(expected, categories=exp_cats) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = pd.DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" | ||||
|         ) | ||||
|         expected = CategoricalIndex(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "index -5 is out of bounds for (axis 0 with )?size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|     def test_take_invalid_kwargs(self): | ||||
|         idx = CategoricalIndex([1, 2, 3], name="foo") | ||||
|         indices = [1, 0, -1] | ||||
|  | ||||
|         msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             idx.take(indices, foo=2) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, out=indices) | ||||
|  | ||||
|         msg = "the 'mode' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, mode="clip") | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     def test_get_loc(self): | ||||
|         # GH 12531 | ||||
|         cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) | ||||
|         idx1 = Index(list("abcde")) | ||||
|         assert cidx1.get_loc("a") == idx1.get_loc("a") | ||||
|         assert cidx1.get_loc("e") == idx1.get_loc("e") | ||||
|  | ||||
|         for i in [cidx1, idx1]: | ||||
|             with pytest.raises(KeyError, match="'NOT-EXIST'"): | ||||
|                 i.get_loc("NOT-EXIST") | ||||
|  | ||||
|         # non-unique | ||||
|         cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) | ||||
|         idx2 = Index(list("aacded")) | ||||
|  | ||||
|         # results in bool array | ||||
|         res = cidx2.get_loc("d") | ||||
|         tm.assert_numpy_array_equal(res, idx2.get_loc("d")) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             res, np.array([False, False, False, True, False, True]) | ||||
|         ) | ||||
|         # unique element results in scalar | ||||
|         res = cidx2.get_loc("e") | ||||
|         assert res == idx2.get_loc("e") | ||||
|         assert res == 4 | ||||
|  | ||||
|         for i in [cidx2, idx2]: | ||||
|             with pytest.raises(KeyError, match="'NOT-EXIST'"): | ||||
|                 i.get_loc("NOT-EXIST") | ||||
|  | ||||
|         # non-unique, sliceable | ||||
|         cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) | ||||
|         idx3 = Index(list("aabbb")) | ||||
|  | ||||
|         # results in slice | ||||
|         res = cidx3.get_loc("a") | ||||
|         assert res == idx3.get_loc("a") | ||||
|         assert res == slice(0, 2, None) | ||||
|  | ||||
|         res = cidx3.get_loc("b") | ||||
|         assert res == idx3.get_loc("b") | ||||
|         assert res == slice(2, 5, None) | ||||
|  | ||||
|         for i in [cidx3, idx3]: | ||||
|             with pytest.raises(KeyError, match="'c'"): | ||||
|                 i.get_loc("c") | ||||
|  | ||||
|     def test_get_loc_unique(self): | ||||
|         cidx = CategoricalIndex(list("abc")) | ||||
|         result = cidx.get_loc("b") | ||||
|         assert result == 1 | ||||
|  | ||||
|     def test_get_loc_monotonic_nonunique(self): | ||||
|         cidx = CategoricalIndex(list("abbc")) | ||||
|         result = cidx.get_loc("b") | ||||
|         expected = slice(1, 3, None) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_get_loc_nonmonotonic_nonunique(self): | ||||
|         cidx = CategoricalIndex(list("abcb")) | ||||
|         result = cidx.get_loc("b") | ||||
|         expected = np.array([False, True, False, True], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_loc_nan(self): | ||||
|         # GH#41933 | ||||
|         ci = CategoricalIndex(["A", "B", np.nan]) | ||||
|         res = ci.get_loc(np.nan) | ||||
|  | ||||
|         assert res == 2 | ||||
|  | ||||
|  | ||||
| class TestGetIndexer: | ||||
|     def test_get_indexer_base(self): | ||||
|         # Determined by cat ordering. | ||||
|         idx = CategoricalIndex(list("cab"), categories=list("cab")) | ||||
|         expected = np.arange(len(idx), dtype=np.intp) | ||||
|  | ||||
|         actual = idx.get_indexer(idx) | ||||
|         tm.assert_numpy_array_equal(expected, actual) | ||||
|  | ||||
|         with pytest.raises(ValueError, match="Invalid fill method"): | ||||
|             idx.get_indexer(idx, method="invalid") | ||||
|  | ||||
|     def test_get_indexer_requires_unique(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|         oidx = Index(np.array(ci)) | ||||
|  | ||||
|         msg = "Reindexing only valid with uniquely valued Index objects" | ||||
|  | ||||
|         for n in [1, 2, 5, len(ci)]: | ||||
|             finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)] | ||||
|  | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 ci.get_indexer(finder) | ||||
|  | ||||
|         # see gh-17323 | ||||
|         # | ||||
|         # Even when indexer is equal to the | ||||
|         # members in the index, we should | ||||
|         # respect duplicates instead of taking | ||||
|         # the fast-track path. | ||||
|         for finder in [list("aabbca"), list("aababca")]: | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 ci.get_indexer(finder) | ||||
|  | ||||
|     def test_get_indexer_non_unique(self): | ||||
|         idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) | ||||
|         idx2 = CategoricalIndex(list("abf")) | ||||
|  | ||||
|         for indexer in [idx2, list("abf"), Index(list("abf"))]: | ||||
|             msg = "Reindexing only valid with uniquely valued Index objects" | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 idx1.get_indexer(indexer) | ||||
|  | ||||
|             r1, _ = idx1.get_indexer_non_unique(indexer) | ||||
|             expected = np.array([0, 1, 2, -1], dtype=np.intp) | ||||
|             tm.assert_almost_equal(r1, expected) | ||||
|  | ||||
|     def test_get_indexer_method(self): | ||||
|         idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) | ||||
|         idx2 = CategoricalIndex(list("abf")) | ||||
|  | ||||
|         msg = "method pad not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="pad") | ||||
|         msg = "method backfill not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="backfill") | ||||
|  | ||||
|         msg = "method nearest not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="nearest") | ||||
|  | ||||
|     def test_get_indexer_array(self): | ||||
|         arr = np.array( | ||||
|             [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] | ||||
|         ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") | ||||
|         result = ci.get_indexer(arr) | ||||
|         expected = np.array([0, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_same_categories_same_order(self): | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) | ||||
|  | ||||
|         result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"])) | ||||
|         expected = np.array([1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_same_categories_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/19551 | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) | ||||
|  | ||||
|         result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) | ||||
|         expected = np.array([1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_nans_in_index_and_target(self): | ||||
|         # GH 45361 | ||||
|         ci = CategoricalIndex([1, 2, np.nan, 3]) | ||||
|         other1 = [2, 3, 4, np.nan] | ||||
|         res1 = ci.get_indexer(other1) | ||||
|         expected1 = np.array([1, 3, -1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res1, expected1) | ||||
|         other2 = [1, 4, 2, 3] | ||||
|         res2 = ci.get_indexer(other2) | ||||
|         expected2 = np.array([0, -1, 1, 3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res2, expected2) | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where(self, listlike_box): | ||||
|         klass = listlike_box | ||||
|  | ||||
|         i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|         cond = [True] * len(i) | ||||
|         expected = i | ||||
|         result = i.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         cond = [False] + [True] * (len(i) - 1) | ||||
|         expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories) | ||||
|         result = i.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_non_categories(self): | ||||
|         ci = CategoricalIndex(["a", "b", "c", "d"]) | ||||
|         mask = np.array([True, False, True, False]) | ||||
|  | ||||
|         result = ci.where(mask, 2) | ||||
|         expected = Index(["a", 2, "c", 2], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # Test the Categorical method directly | ||||
|             ci._data._where(mask, 2) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     def test_contains(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False) | ||||
|  | ||||
|         assert "a" in ci | ||||
|         assert "z" not in ci | ||||
|         assert "e" not in ci | ||||
|         assert np.nan not in ci | ||||
|  | ||||
|         # assert codes NOT in index | ||||
|         assert 0 not in ci | ||||
|         assert 1 not in ci | ||||
|  | ||||
|     def test_contains_nan(self): | ||||
|         ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) | ||||
|         assert np.nan in ci | ||||
|  | ||||
|     @pytest.mark.parametrize("unwrap", [True, False]) | ||||
|     def test_contains_na_dtype(self, unwrap): | ||||
|         dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT) | ||||
|         pi = dti.to_period("D") | ||||
|         tdi = dti - dti[-1] | ||||
|         ci = CategoricalIndex(dti) | ||||
|  | ||||
|         obj = ci | ||||
|         if unwrap: | ||||
|             obj = ci._data | ||||
|  | ||||
|         assert np.nan in obj | ||||
|         assert None in obj | ||||
|         assert pd.NaT in obj | ||||
|         assert np.datetime64("NaT") in obj | ||||
|         assert np.timedelta64("NaT") not in obj | ||||
|  | ||||
|         obj2 = CategoricalIndex(tdi) | ||||
|         if unwrap: | ||||
|             obj2 = obj2._data | ||||
|  | ||||
|         assert np.nan in obj2 | ||||
|         assert None in obj2 | ||||
|         assert pd.NaT in obj2 | ||||
|         assert np.datetime64("NaT") not in obj2 | ||||
|         assert np.timedelta64("NaT") in obj2 | ||||
|  | ||||
|         obj3 = CategoricalIndex(pi) | ||||
|         if unwrap: | ||||
|             obj3 = obj3._data | ||||
|  | ||||
|         assert np.nan in obj3 | ||||
|         assert None in obj3 | ||||
|         assert pd.NaT in obj3 | ||||
|         assert np.datetime64("NaT") not in obj3 | ||||
|         assert np.timedelta64("NaT") not in obj3 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "item, expected", | ||||
|         [ | ||||
|             (pd.Interval(0, 1), True), | ||||
|             (1.5, True), | ||||
|             (pd.Interval(0.5, 1.5), False), | ||||
|             ("a", False), | ||||
|             (Timestamp(1), False), | ||||
|             (pd.Timedelta(1), False), | ||||
|         ], | ||||
|         ids=str, | ||||
|     ) | ||||
|     def test_contains_interval(self, item, expected): | ||||
|         # GH 23705 | ||||
|         ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) | ||||
|         result = item in ci | ||||
|         assert result is expected | ||||
|  | ||||
|     def test_contains_list(self): | ||||
|         # GH#21729 | ||||
|         idx = CategoricalIndex([1, 2, 3]) | ||||
|  | ||||
|         assert "a" not in idx | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a"] in idx | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a", "b"] in idx | ||||
| @ -0,0 +1,144 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, categories", | ||||
|     [ | ||||
|         (list("abcbca"), list("cab")), | ||||
|         (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), | ||||
|     ], | ||||
|     ids=["string", "interval"], | ||||
| ) | ||||
| def test_map_str(data, categories, ordered): | ||||
|     # GH 31202 - override base class since we want to maintain categorical/ordered | ||||
|     index = CategoricalIndex(data, categories=categories, ordered=ordered) | ||||
|     result = index.map(str) | ||||
|     expected = CategoricalIndex( | ||||
|         map(str, data), categories=map(str, categories), ordered=ordered | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(): | ||||
|     ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|     result = ci.map(lambda x: x.lower()) | ||||
|     exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     ci = CategoricalIndex( | ||||
|         list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" | ||||
|     ) | ||||
|     result = ci.map(lambda x: x.lower()) | ||||
|     exp = CategoricalIndex( | ||||
|         list("ababc"), categories=list("bac"), ordered=False, name="XXX" | ||||
|     ) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     # GH 12766: Return an index not an array | ||||
|     tm.assert_index_equal( | ||||
|         ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") | ||||
|     ) | ||||
|  | ||||
|     # change categories dtype | ||||
|     ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|  | ||||
|     def f(x): | ||||
|         return {"A": 10, "B": 20, "C": 30}.get(x) | ||||
|  | ||||
|     result = ci.map(f) | ||||
|     exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     result = ci.map({"A": 10, "B": 20, "C": 30}) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_map_with_categorical_series(): | ||||
|     # GH 12756 | ||||
|     a = Index([1, 2, 3, 4]) | ||||
|     b = Series(["even", "odd", "even", "odd"], dtype="category") | ||||
|     c = Series(["even", "odd", "even", "odd"]) | ||||
|  | ||||
|     exp = CategoricalIndex(["odd", "even", "odd", np.nan]) | ||||
|     tm.assert_index_equal(a.map(b), exp) | ||||
|     exp = Index(["odd", "even", "odd", np.nan]) | ||||
|     tm.assert_index_equal(a.map(c), exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), | ||||
|         ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             CategoricalIndex([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_ignore(data, f, expected):  # GH 24241 | ||||
|     values = CategoricalIndex(data) | ||||
|     result = values.map(f, na_action="ignore") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             CategoricalIndex([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_none(data, f, expected):  # GH 24241 | ||||
|     values = CategoricalIndex(data) | ||||
|     result = values.map(f, na_action=None) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_with_dict_or_series(): | ||||
|     orig_values = ["a", "B", 1, "a"] | ||||
|     new_values = ["one", 2, 3.0, "one"] | ||||
|     cur_index = CategoricalIndex(orig_values, name="XXX") | ||||
|     expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) | ||||
|  | ||||
|     mapper = Series(new_values[:-1], index=orig_values[:-1]) | ||||
|     result = cur_index.map(mapper) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     mapper = dict(zip(orig_values[:-1], new_values[:-1])) | ||||
|     result = cur_index.map(mapper) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,78 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestReindex: | ||||
|     def test_reindex_list_non_unique(self): | ||||
|         # GH#11586 | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(["a", "c"]) | ||||
|  | ||||
|     def test_reindex_categorical_non_unique(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(Categorical(["a", "c"])) | ||||
|  | ||||
|     def test_reindex_list_non_unique_unused_category(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(["a", "c"]) | ||||
|  | ||||
|     def test_reindex_categorical_non_unique_unused_category(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(Categorical(["a", "c"])) | ||||
|  | ||||
|     def test_reindex_duplicate_target(self): | ||||
|         # See GH25459 | ||||
|         cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         res, indexer = cat.reindex(["a", "c", "c"]) | ||||
|         exp = Index(["a", "c", "c"]) | ||||
|         tm.assert_index_equal(res, exp, exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) | ||||
|  | ||||
|         res, indexer = cat.reindex( | ||||
|             CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) | ||||
|         ) | ||||
|         exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(res, exp, exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) | ||||
|  | ||||
|     def test_reindex_empty_index(self): | ||||
|         # See GH16770 | ||||
|         c = CategoricalIndex([]) | ||||
|         res, indexer = c.reindex(["a", "b"]) | ||||
|         tm.assert_index_equal(res, Index(["a", "b"]), exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) | ||||
|  | ||||
|     def test_reindex_categorical_added_category(self): | ||||
|         # GH 42424 | ||||
|         ci = CategoricalIndex( | ||||
|             [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")], | ||||
|             ordered=True, | ||||
|         ) | ||||
|         ci_add = CategoricalIndex( | ||||
|             [ | ||||
|                 Interval(0, 1, closed="right"), | ||||
|                 Interval(1, 2, closed="right"), | ||||
|                 Interval(2, 3, closed="right"), | ||||
|                 Interval(3, 4, closed="right"), | ||||
|             ], | ||||
|             ordered=True, | ||||
|         ) | ||||
|         result, _ = ci.reindex(ci_add) | ||||
|         expected = ci_add | ||||
|         tm.assert_index_equal(expected, result) | ||||
| @ -0,0 +1,18 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_value", [None, np.nan]) | ||||
| def test_difference_with_na(na_value): | ||||
|     # GH 57318 | ||||
|     ci = CategoricalIndex(["a", "b", "c", None]) | ||||
|     other = Index(["c", na_value]) | ||||
|     result = ci.difference(other) | ||||
|     expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
		Reference in New Issue
	
	Block a user