done
This commit is contained in:
		| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| @pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]]) | ||||
| def test_factorize(categories, ordered): | ||||
|     cat = pd.Categorical( | ||||
|         ["b", "b", "a", "c", None], categories=categories, ordered=ordered | ||||
|     ) | ||||
|     codes, uniques = pd.factorize(cat) | ||||
|     expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical( | ||||
|         ["b", "a", "c"], categories=categories, ordered=ordered | ||||
|     ) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_factorized_sort(): | ||||
|     cat = pd.Categorical(["b", "b", None, "a"]) | ||||
|     codes, uniques = pd.factorize(cat, sort=True) | ||||
|     expected_codes = np.array([1, 1, -1, 0], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical(["a", "b"]) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_factorized_sort_ordered(): | ||||
|     cat = pd.Categorical( | ||||
|         ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True | ||||
|     ) | ||||
|  | ||||
|     codes, uniques = pd.factorize(cat, sort=True) | ||||
|     expected_codes = np.array([0, 0, -1, 1], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical( | ||||
|         ["b", "a"], categories=["c", "b", "a"], ordered=True | ||||
|     ) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_isin_cats(): | ||||
|     # GH2003 | ||||
|     cat = pd.Categorical(["a", "b", np.nan]) | ||||
|  | ||||
|     result = cat.isin(["a", np.nan]) | ||||
|     expected = np.array([True, False, True], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|     result = cat.isin(["a", "c"]) | ||||
|     expected = np.array([True, False, False], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("value", [[""], [None, ""], [pd.NaT, ""]]) | ||||
| def test_isin_cats_corner_cases(value): | ||||
|     # GH36550 | ||||
|     cat = pd.Categorical([""]) | ||||
|     result = cat.isin(value) | ||||
|     expected = np.array([True], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) | ||||
| def test_isin_empty(empty): | ||||
|     s = pd.Categorical(["a", "b"]) | ||||
|     expected = np.array([False, False], dtype=bool) | ||||
|  | ||||
|     result = s.isin(empty) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_diff(): | ||||
|     ser = pd.Series([1, 2, 3], dtype="category") | ||||
|  | ||||
|     msg = "Convert to a suitable dtype" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser.diff() | ||||
|  | ||||
|     df = ser.to_frame(name="A") | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.diff() | ||||
| @ -0,0 +1,355 @@ | ||||
| import re | ||||
| import sys | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PYPY | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     NaT, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_scalar | ||||
|  | ||||
|  | ||||
| class TestCategoricalAnalytics: | ||||
|     @pytest.mark.parametrize("aggregation", ["min", "max"]) | ||||
|     def test_min_max_not_ordered_raises(self, aggregation): | ||||
|         # unordered cats have no min/max | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=False) | ||||
|         msg = f"Categorical is not ordered for operation {aggregation}" | ||||
|         agg_func = getattr(cat, aggregation) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             agg_func() | ||||
|  | ||||
|         ufunc = np.minimum if aggregation == "min" else np.maximum | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ufunc.reduce(cat) | ||||
|  | ||||
|     def test_min_max_ordered(self, index_or_series_or_array): | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         obj = index_or_series_or_array(cat) | ||||
|         _min = obj.min() | ||||
|         _max = obj.max() | ||||
|         assert _min == "a" | ||||
|         assert _max == "d" | ||||
|  | ||||
|         assert np.minimum.reduce(obj) == "a" | ||||
|         assert np.maximum.reduce(obj) == "d" | ||||
|         # TODO: raises if we pass axis=0  (on Index and Categorical, not Series) | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         obj = index_or_series_or_array(cat) | ||||
|         _min = obj.min() | ||||
|         _max = obj.max() | ||||
|         assert _min == "d" | ||||
|         assert _max == "a" | ||||
|         assert np.minimum.reduce(obj) == "d" | ||||
|         assert np.maximum.reduce(obj) == "a" | ||||
|  | ||||
|     def test_min_max_reduce(self): | ||||
|         # GH52788 | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         df = DataFrame(cat) | ||||
|  | ||||
|         result_max = df.agg("max") | ||||
|         expected_max = Series(Categorical(["d"], dtype=cat.dtype)) | ||||
|         tm.assert_series_equal(result_max, expected_max) | ||||
|  | ||||
|         result_min = df.agg("min") | ||||
|         expected_min = Series(Categorical(["a"], dtype=cat.dtype)) | ||||
|         tm.assert_series_equal(result_min, expected_min) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "categories,expected", | ||||
|         [ | ||||
|             (list("ABC"), np.nan), | ||||
|             ([1, 2, 3], np.nan), | ||||
|             pytest.param( | ||||
|                 Series(date_range("2020-01-01", periods=3), dtype="category"), | ||||
|                 NaT, | ||||
|                 marks=pytest.mark.xfail( | ||||
|                     reason="https://github.com/pandas-dev/pandas/issues/29962" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("aggregation", ["min", "max"]) | ||||
|     def test_min_max_ordered_empty(self, categories, expected, aggregation): | ||||
|         # GH 30227 | ||||
|         cat = Categorical([], categories=categories, ordered=True) | ||||
|  | ||||
|         agg_func = getattr(cat, aggregation) | ||||
|         result = agg_func() | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories", | ||||
|         [(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     @pytest.mark.parametrize("function", ["min", "max"]) | ||||
|     def test_min_max_with_nan(self, values, categories, function, skipna): | ||||
|         # GH 25303 | ||||
|         cat = Categorical(values, categories=categories, ordered=True) | ||||
|         result = getattr(cat, function)(skipna=skipna) | ||||
|  | ||||
|         if skipna is False: | ||||
|             assert result is np.nan | ||||
|         else: | ||||
|             expected = categories[0] if function == "min" else categories[2] | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("function", ["min", "max"]) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_min_max_only_nan(self, function, skipna): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33450 | ||||
|         cat = Categorical([np.nan], categories=[1, 2], ordered=True) | ||||
|         result = getattr(cat, function)(skipna=skipna) | ||||
|         assert result is np.nan | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numeric_only_min_max_raises(self, method): | ||||
|         # GH 25303 | ||||
|         cat = Categorical( | ||||
|             [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=".* got an unexpected keyword"): | ||||
|             getattr(cat, method)(numeric_only=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numpy_min_max_raises(self, method): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=False) | ||||
|         msg = ( | ||||
|             f"Categorical is not ordered for operation {method}\n" | ||||
|             "you can use .as_ordered() to change the Categorical to an ordered one" | ||||
|         ) | ||||
|         method = getattr(np, method) | ||||
|         with pytest.raises(TypeError, match=re.escape(msg)): | ||||
|             method(cat) | ||||
|  | ||||
|     @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=True) | ||||
|         msg = ( | ||||
|             f"the '{kwarg}' parameter is not supported in the pandas implementation " | ||||
|             f"of {method}" | ||||
|         ) | ||||
|         if kwarg == "axis": | ||||
|             msg = r"`axis` must be fewer than the number of dimensions \(1\)" | ||||
|         kwargs = {kwarg: 42} | ||||
|         method = getattr(np, method) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             method(cat, **kwargs) | ||||
|  | ||||
|     @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) | ||||
|     def test_numpy_min_max_axis_equals_none(self, method, expected): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=True) | ||||
|         method = getattr(np, method) | ||||
|         result = method(cat, axis=None) | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values,categories,exp_mode", | ||||
|         [ | ||||
|             ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), | ||||
|             ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), | ||||
|             ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), | ||||
|             ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), | ||||
|             ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), | ||||
|             ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_mode(self, values, categories, exp_mode): | ||||
|         cat = Categorical(values, categories=categories, ordered=True) | ||||
|         res = Series(cat).mode()._values | ||||
|         exp = Categorical(exp_mode, categories=categories, ordered=True) | ||||
|         tm.assert_categorical_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted(self, ordered): | ||||
|         # https://github.com/pandas-dev/pandas/issues/8420 | ||||
|         # https://github.com/pandas-dev/pandas/issues/14522 | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["cheese", "milk", "apple", "bread", "bread"], | ||||
|             categories=["cheese", "milk", "apple", "bread"], | ||||
|             ordered=ordered, | ||||
|         ) | ||||
|         ser = Series(cat) | ||||
|  | ||||
|         # Searching for single item argument, side='left' (default) | ||||
|         res_cat = cat.searchsorted("apple") | ||||
|         assert res_cat == 2 | ||||
|         assert is_scalar(res_cat) | ||||
|  | ||||
|         res_ser = ser.searchsorted("apple") | ||||
|         assert res_ser == 2 | ||||
|         assert is_scalar(res_ser) | ||||
|  | ||||
|         # Searching for single item array, side='left' (default) | ||||
|         res_cat = cat.searchsorted(["bread"]) | ||||
|         res_ser = ser.searchsorted(["bread"]) | ||||
|         exp = np.array([3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res_cat, exp) | ||||
|         tm.assert_numpy_array_equal(res_ser, exp) | ||||
|  | ||||
|         # Searching for several items array, side='right' | ||||
|         res_cat = cat.searchsorted(["apple", "bread"], side="right") | ||||
|         res_ser = ser.searchsorted(["apple", "bread"], side="right") | ||||
|         exp = np.array([3, 5], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res_cat, exp) | ||||
|         tm.assert_numpy_array_equal(res_ser, exp) | ||||
|  | ||||
|         # Searching for a single value that is not from the Categorical | ||||
|         with pytest.raises(TypeError, match="cucumber"): | ||||
|             cat.searchsorted("cucumber") | ||||
|         with pytest.raises(TypeError, match="cucumber"): | ||||
|             ser.searchsorted("cucumber") | ||||
|  | ||||
|         # Searching for multiple values one of each is not from the Categorical | ||||
|         msg = ( | ||||
|             "Cannot setitem on a Categorical with a new category, " | ||||
|             "set the categories first" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.searchsorted(["bread", "cucumber"]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.searchsorted(["bread", "cucumber"]) | ||||
|  | ||||
|     def test_unique(self, ordered): | ||||
|         # GH38140 | ||||
|         dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered) | ||||
|  | ||||
|         # categories are reordered based on value when ordered=False | ||||
|         cat = Categorical(["a", "b", "c"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         tm.assert_categorical_equal(res, cat) | ||||
|  | ||||
|         cat = Categorical(["a", "b", "a", "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype)) | ||||
|  | ||||
|         cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         exp_cat = Categorical(["c", "a", "b"], dtype=dtype) | ||||
|         tm.assert_categorical_equal(res, exp_cat) | ||||
|  | ||||
|         # nan must be removed | ||||
|         cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype) | ||||
|         tm.assert_categorical_equal(res, exp_cat) | ||||
|  | ||||
|     def test_unique_index_series(self, ordered): | ||||
|         # GH38140 | ||||
|         dtype = CategoricalDtype([3, 2, 1], ordered=ordered) | ||||
|  | ||||
|         c = Categorical([3, 1, 2, 2, 1], dtype=dtype) | ||||
|         # Categorical.unique sorts categories by appearance order | ||||
|         # if ordered=False | ||||
|         exp = Categorical([3, 1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(c.unique(), exp) | ||||
|  | ||||
|         tm.assert_index_equal(Index(c).unique(), Index(exp)) | ||||
|         tm.assert_categorical_equal(Series(c).unique(), exp) | ||||
|  | ||||
|         c = Categorical([1, 1, 2, 2], dtype=dtype) | ||||
|         exp = Categorical([1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(c.unique(), exp) | ||||
|         tm.assert_index_equal(Index(c).unique(), Index(exp)) | ||||
|         tm.assert_categorical_equal(Series(c).unique(), exp) | ||||
|  | ||||
|     def test_shift(self): | ||||
|         # GH 9416 | ||||
|         cat = Categorical(["a", "b", "c", "d", "a"]) | ||||
|  | ||||
|         # shift forward | ||||
|         sp1 = cat.shift(1) | ||||
|         xp1 = Categorical([np.nan, "a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(sp1, xp1) | ||||
|         tm.assert_categorical_equal(cat[:-1], sp1[1:]) | ||||
|  | ||||
|         # shift back | ||||
|         sn2 = cat.shift(-2) | ||||
|         xp2 = Categorical( | ||||
|             ["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"] | ||||
|         ) | ||||
|         tm.assert_categorical_equal(sn2, xp2) | ||||
|         tm.assert_categorical_equal(cat[2:], sn2[:-2]) | ||||
|  | ||||
|         # shift by zero | ||||
|         tm.assert_categorical_equal(cat, cat.shift(0)) | ||||
|  | ||||
|     def test_nbytes(self): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|         exp = 3 + 3 * 8  # 3 int8s for values + 3 int64s for categories | ||||
|         assert cat.nbytes == exp | ||||
|  | ||||
|     def test_memory_usage(self, using_infer_string): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|  | ||||
|         # .categories is an index, so we include the hashtable | ||||
|         assert 0 < cat.nbytes <= cat.memory_usage() | ||||
|         assert 0 < cat.nbytes <= cat.memory_usage(deep=True) | ||||
|  | ||||
|         cat = Categorical(["foo", "foo", "bar"]) | ||||
|         if using_infer_string: | ||||
|             if cat.categories.dtype.storage == "python": | ||||
|                 assert cat.memory_usage(deep=True) > cat.nbytes | ||||
|             else: | ||||
|                 assert cat.memory_usage(deep=True) >= cat.nbytes | ||||
|         else: | ||||
|             assert cat.memory_usage(deep=True) > cat.nbytes | ||||
|  | ||||
|         if not PYPY: | ||||
|             # sys.getsizeof will call the .memory_usage with | ||||
|             # deep=True, and add on some GC overhead | ||||
|             diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) | ||||
|             assert abs(diff) < 100 | ||||
|  | ||||
|     def test_map(self): | ||||
|         c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|         result = c.map(lambda x: x.lower(), na_action=None) | ||||
|         exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|         c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) | ||||
|         result = c.map(lambda x: x.lower(), na_action=None) | ||||
|         exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|         result = c.map(lambda x: 1, na_action=None) | ||||
|         # GH 12766: Return an index not an array | ||||
|         tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) | ||||
|  | ||||
|     @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) | ||||
|     def test_validate_inplace_raises(self, value): | ||||
|         cat = Categorical(["A", "B", "B", "C", "A"]) | ||||
|         msg = ( | ||||
|             'For argument "inplace" expected type bool, ' | ||||
|             f"received type {type(value).__name__}" | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.sort_values(inplace=value) | ||||
|  | ||||
|     def test_quantile_empty(self): | ||||
|         # make sure we have correct itemsize on resulting codes | ||||
|         cat = Categorical(["A", "B"]) | ||||
|         idx = Index([0.0, 0.5]) | ||||
|         result = cat[:0]._quantile(idx, interpolation="linear") | ||||
|         assert result._codes.dtype == np.int8 | ||||
|  | ||||
|         expected = cat.take([-1, -1], allow_fill=True) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,501 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     StringDtype, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.categorical import recode_for_categories | ||||
|  | ||||
|  | ||||
| class TestCategoricalAPI: | ||||
|     def test_to_list_deprecated(self): | ||||
|         # GH#51254 | ||||
|         cat1 = Categorical(list("acb"), ordered=False) | ||||
|         msg = "Categorical.to_list is deprecated and will be removed" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             cat1.to_list() | ||||
|  | ||||
|     def test_ordered_api(self): | ||||
|         # GH 9347 | ||||
|         cat1 = Categorical(list("acb"), ordered=False) | ||||
|         tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"])) | ||||
|         assert not cat1.ordered | ||||
|  | ||||
|         cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False) | ||||
|         tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"])) | ||||
|         assert not cat2.ordered | ||||
|  | ||||
|         cat3 = Categorical(list("acb"), ordered=True) | ||||
|         tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"])) | ||||
|         assert cat3.ordered | ||||
|  | ||||
|         cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True) | ||||
|         tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"])) | ||||
|         assert cat4.ordered | ||||
|  | ||||
|     def test_set_ordered(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         cat2 = cat.as_unordered() | ||||
|         assert not cat2.ordered | ||||
|         cat2 = cat.as_ordered() | ||||
|         assert cat2.ordered | ||||
|  | ||||
|         assert cat2.set_ordered(True).ordered | ||||
|         assert not cat2.set_ordered(False).ordered | ||||
|  | ||||
|         # removed in 0.19.0 | ||||
|         msg = ( | ||||
|             "property 'ordered' of 'Categorical' object has no setter" | ||||
|             if PY311 | ||||
|             else "can't set attribute" | ||||
|         ) | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             cat.ordered = True | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             cat.ordered = False | ||||
|  | ||||
|     def test_rename_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|  | ||||
|         # inplace=False: the old one must not be changed | ||||
|         res = cat.rename_categories([1, 2, 3]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) | ||||
|         ) | ||||
|         tm.assert_index_equal(res.categories, Index([1, 2, 3])) | ||||
|  | ||||
|         exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(cat.__array__(), exp_cat) | ||||
|  | ||||
|         exp_cat = Index(["a", "b", "c"]) | ||||
|         tm.assert_index_equal(cat.categories, exp_cat) | ||||
|  | ||||
|         # GH18862 (let rename_categories take callables) | ||||
|         result = cat.rename_categories(lambda x: x.upper()) | ||||
|         expected = Categorical(["A", "B", "C", "A"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) | ||||
|     def test_rename_categories_wrong_length_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|         msg = ( | ||||
|             "new categories need to have the same number of items as the " | ||||
|             "old categories!" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.rename_categories(new_categories) | ||||
|  | ||||
|     def test_rename_categories_series(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/17981 | ||||
|         c = Categorical(["a", "b"]) | ||||
|         result = c.rename_categories(Series([0, 1], index=["a", "b"])) | ||||
|         expected = Categorical([0, 1]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_rename_categories_dict(self): | ||||
|         # GH 17336 | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}) | ||||
|         expected = Index([4, 3, 2, 1]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts of smaller length | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 1, "c": 3}) | ||||
|  | ||||
|         expected = Index([1, "b", 3, "d"]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts with bigger length | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}) | ||||
|         expected = Index([1, 2, 3, 4]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts with no items from old categories | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"f": 1, "g": 3}) | ||||
|  | ||||
|         expected = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|     def test_reorder_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical( | ||||
|             ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|  | ||||
|         res = cat.reorder_categories(["c", "b", "a"]) | ||||
|         # cat must be the same as before | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         # only res is changed | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "new_categories", | ||||
|         [ | ||||
|             ["a"],  # not all "old" included in "new" | ||||
|             ["a", "b", "d"],  # still not all "old" in "new" | ||||
|             ["a", "b", "c", "d"],  # all "old" included in "new", but too long | ||||
|         ], | ||||
|     ) | ||||
|     def test_reorder_categories_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         msg = "items in new_categories are not the same as in old categories" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.reorder_categories(new_categories) | ||||
|  | ||||
|     def test_add_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical( | ||||
|             ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True | ||||
|         ) | ||||
|  | ||||
|         res = cat.add_categories("d") | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         res = cat.add_categories(["d"]) | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         # GH 9927 | ||||
|         cat = Categorical(list("abc"), ordered=True) | ||||
|         expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) | ||||
|         # test with Series, np.array, index, list | ||||
|         res = cat.add_categories(Series(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(np.array(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(Index(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(["d", "e"]) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|  | ||||
|     def test_add_categories_existing_raises(self): | ||||
|         # new is in old categories | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         msg = re.escape("new categories must not include old categories: {'d'}") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.add_categories(["d"]) | ||||
|  | ||||
|     def test_add_categories_losing_dtype_information(self): | ||||
|         # GH#48812 | ||||
|         cat = Categorical(Series([1, 2], dtype="Int64")) | ||||
|         ser = Series([4], dtype="Int64") | ||||
|         result = cat.add_categories(ser) | ||||
|         expected = Categorical( | ||||
|             Series([1, 2], dtype="Int64"), categories=Series([1, 2, 4], dtype="Int64") | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         cat = Categorical(Series(["a", "b", "a"], dtype=StringDtype())) | ||||
|         ser = Series(["d"], dtype=StringDtype()) | ||||
|         result = cat.add_categories(ser) | ||||
|         expected = Categorical( | ||||
|             Series(["a", "b", "a"], dtype=StringDtype()), | ||||
|             categories=Series(["a", "b", "d"], dtype=StringDtype()), | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         exp_categories = Index(["c", "b", "a"]) | ||||
|         exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) | ||||
|  | ||||
|         cat = cat.set_categories(["c", "b", "a"]) | ||||
|         res = cat.set_categories(["a", "b", "c"]) | ||||
|         # cat must be the same as before | ||||
|         tm.assert_index_equal(cat.categories, exp_categories) | ||||
|         tm.assert_numpy_array_equal(cat.__array__(), exp_values) | ||||
|         # only res is changed | ||||
|         exp_categories_back = Index(["a", "b", "c"]) | ||||
|         tm.assert_index_equal(res.categories, exp_categories_back) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_values) | ||||
|  | ||||
|         # not all "old" included in "new" -> all not included ones are now | ||||
|         # np.nan | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         res = cat.set_categories(["a"]) | ||||
|         tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|         # still not all "old" in "new" | ||||
|         res = cat.set_categories(["a", "b", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|         tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) | ||||
|  | ||||
|         # all "old" included in "new" | ||||
|         cat = cat.set_categories(["a", "b", "c", "d"]) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(cat.categories, exp_categories) | ||||
|  | ||||
|         # internals... | ||||
|         c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) | ||||
|         tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) | ||||
|  | ||||
|         exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), exp) | ||||
|  | ||||
|         # all "pointers" to '4' must be changed from 3 to 0,... | ||||
|         c = c.set_categories([4, 3, 2, 1]) | ||||
|  | ||||
|         # positions are changed | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) | ||||
|  | ||||
|         # categories are now in new order | ||||
|         tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) | ||||
|  | ||||
|         # output is the same | ||||
|         exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), exp) | ||||
|         assert c.min() == 4 | ||||
|         assert c.max() == 1 | ||||
|  | ||||
|         # set_categories should set the ordering if specified | ||||
|         c2 = c.set_categories([4, 3, 2, 1], ordered=False) | ||||
|         assert not c2.ordered | ||||
|  | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) | ||||
|  | ||||
|         # set_categories should pass thru the ordering | ||||
|         c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) | ||||
|         assert not c2.ordered | ||||
|  | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories, new_categories", | ||||
|         [ | ||||
|             # No NaNs, same cats, same order | ||||
|             (["a", "b", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["a", "b", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # Same, unsorted | ||||
|             (["b", "a", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["b", "a", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b", "a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             # Introduce NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             # No overlap | ||||
|             (["a", "b", "c"], ["a", "b"], ["d", "e"]), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_set_categories_many(self, values, categories, new_categories, ordered): | ||||
|         c = Categorical(values, categories) | ||||
|         expected = Categorical(values, new_categories, ordered) | ||||
|         result = c.set_categories(new_categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories_rename_less(self): | ||||
|         # GH 24675 | ||||
|         cat = Categorical(["A", "B"]) | ||||
|         result = cat.set_categories(["A"], rename=True) | ||||
|         expected = Categorical(["A", np.nan]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories_private(self): | ||||
|         cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         cat._set_categories(["a", "c", "d", "e"]) | ||||
|         expected = Categorical(["a", "c", "d"], categories=list("acde")) | ||||
|         tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|         # fastpath | ||||
|         cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         cat._set_categories(["a", "c", "d", "e"], fastpath=True) | ||||
|         expected = Categorical(["a", "c", "d"], categories=list("acde")) | ||||
|         tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|     def test_remove_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) | ||||
|  | ||||
|         res = cat.remove_categories("c") | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         res = cat.remove_categories(["c"]) | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|     @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) | ||||
|     def test_remove_categories_raises(self, removals): | ||||
|         cat = Categorical(["a", "b", "a"]) | ||||
|         message = re.escape("removals must all be in old categories: {'c'}") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=message): | ||||
|             cat.remove_categories(removals) | ||||
|  | ||||
|     def test_remove_unused_categories(self): | ||||
|         c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) | ||||
|         exp_categories_all = Index(["a", "b", "c", "d", "e"]) | ||||
|         exp_categories_dropped = Index(["a", "b", "c", "d"]) | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         res = c.remove_unused_categories() | ||||
|         tm.assert_index_equal(res.categories, exp_categories_dropped) | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         # with NaN values (GH11599) | ||||
|         c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) | ||||
|         res = c.remove_unused_categories() | ||||
|         tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"]))) | ||||
|         exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(res.codes, exp_codes) | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         val = ["F", np.nan, "D", "B", "D", "F", np.nan] | ||||
|         cat = Categorical(values=val, categories=list("ABCDEFG")) | ||||
|         out = cat.remove_unused_categories() | ||||
|         tm.assert_index_equal(out.categories, Index(["B", "D", "F"])) | ||||
|         exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(out.codes, exp_codes) | ||||
|         assert out.tolist() == val | ||||
|  | ||||
|         alpha = list("abcdefghijklmnopqrstuvwxyz") | ||||
|         val = np.random.default_rng(2).choice(alpha[::2], 10000).astype("object") | ||||
|         val[np.random.default_rng(2).choice(len(val), 100)] = np.nan | ||||
|  | ||||
|         cat = Categorical(values=val, categories=alpha) | ||||
|         out = cat.remove_unused_categories() | ||||
|         assert out.tolist() == val.tolist() | ||||
|  | ||||
|  | ||||
| class TestCategoricalAPIWithFactor: | ||||
|     def test_describe(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         # string type | ||||
|         desc = factor.describe() | ||||
|         assert factor.ordered | ||||
|         exp_index = CategoricalIndex( | ||||
|             ["a", "b", "c"], name="categories", ordered=factor.ordered | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             {"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # check unused categories | ||||
|         cat = factor.copy() | ||||
|         cat = cat.set_categories(["a", "b", "c", "d"]) | ||||
|         desc = cat.describe() | ||||
|  | ||||
|         exp_index = CategoricalIndex( | ||||
|             list("abcd"), ordered=factor.ordered, name="categories" | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             {"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]}, | ||||
|             index=exp_index, | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # check an integer one | ||||
|         cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) | ||||
|         desc = cat.describe() | ||||
|         exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories") | ||||
|         expected = DataFrame( | ||||
|             {"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]}, | ||||
|             index=exp_index, | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/3678 | ||||
|         # describe should work with NaN | ||||
|         cat = Categorical([np.nan, 1, 2, 2]) | ||||
|         desc = cat.describe() | ||||
|         expected = DataFrame( | ||||
|             {"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]}, | ||||
|             index=CategoricalIndex( | ||||
|                 [1, 2, np.nan], categories=[1, 2], name="categories" | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|  | ||||
| class TestPrivateCategoricalAPI: | ||||
|     def test_codes_immutable(self): | ||||
|         # Codes should be read only | ||||
|         c = Categorical(["a", "b", "c", "a", np.nan]) | ||||
|         exp = np.array([0, 1, 2, 0, -1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|         # Assignments to codes should raise | ||||
|         msg = ( | ||||
|             "property 'codes' of 'Categorical' object has no setter" | ||||
|             if PY311 | ||||
|             else "can't set attribute" | ||||
|         ) | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") | ||||
|  | ||||
|         # changes in the codes array should raise | ||||
|         codes = c.codes | ||||
|  | ||||
|         with pytest.raises(ValueError, match="assignment destination is read-only"): | ||||
|             codes[4] = 1 | ||||
|  | ||||
|         # But even after getting the codes, the original array should still be | ||||
|         # writeable! | ||||
|         c[4] = "a" | ||||
|         exp = np.array([0, 1, 2, 0, 0], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|         c._codes[4] = 2 | ||||
|         exp = np.array([0, 1, 2, 0, 2], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "codes, old, new, expected", | ||||
|         [ | ||||
|             ([0, 1], ["a", "b"], ["a", "b"], [0, 1]), | ||||
|             ([0, 1], ["b", "a"], ["b", "a"], [0, 1]), | ||||
|             ([0, 1], ["a", "b"], ["b", "a"], [1, 0]), | ||||
|             ([0, 1], ["b", "a"], ["a", "b"], [1, 0]), | ||||
|             ([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]), | ||||
|             ([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]), | ||||
|             ([-1, -1], [], ["a", "b"], [-1, -1]), | ||||
|             ([1, 0], ["b", "a"], ["a", "b"], [0, 1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_recode_to_categories(self, codes, old, new, expected): | ||||
|         codes = np.asanyarray(codes, dtype=np.int8) | ||||
|         expected = np.asanyarray(expected, dtype=np.int8) | ||||
|         old = Index(old) | ||||
|         new = Index(new) | ||||
|         result = recode_for_categories(codes, old, new) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_recode_to_categories_large(self): | ||||
|         N = 1000 | ||||
|         codes = np.arange(N) | ||||
|         old = Index(codes) | ||||
|         expected = np.arange(N - 1, -1, -1, dtype=np.int16) | ||||
|         new = Index(expected) | ||||
|         result = recode_for_categories(codes, old, new) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,155 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Interval, | ||||
|     NaT, | ||||
|     Period, | ||||
|     Timestamp, | ||||
|     array, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) | ||||
|     @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), NaT]]) | ||||
|     def test_astype_nan_to_int(self, cls, values): | ||||
|         # GH#28406 | ||||
|         obj = cls(values) | ||||
|  | ||||
|         msg = "Cannot (cast|convert)" | ||||
|         with pytest.raises((ValueError, TypeError), match=msg): | ||||
|             obj.astype(int) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "expected", | ||||
|         [ | ||||
|             array(["2019", "2020"], dtype="datetime64[ns, UTC]"), | ||||
|             array([0, 0], dtype="timedelta64[ns]"), | ||||
|             array([Period("2019"), Period("2020")], dtype="period[Y-DEC]"), | ||||
|             array([Interval(0, 1), Interval(1, 2)], dtype="interval"), | ||||
|             array([1, np.nan], dtype="Int64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_category_to_extension_dtype(self, expected): | ||||
|         # GH#28668 | ||||
|         result = expected.astype("category").astype(expected.dtype) | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 "datetime64[ns]", | ||||
|                 np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"), | ||||
|             ), | ||||
|             ( | ||||
|                 "datetime64[ns, MET]", | ||||
|                 DatetimeIndex([Timestamp("2015-01-01 00:00:00+0100", tz="MET")]).array, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_to_datetime64(self, dtype, expected): | ||||
|         # GH#28448 | ||||
|         result = Categorical(["2015-01-01"]).astype(dtype) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_astype_str_int_categories_to_nullable_int(self): | ||||
|         # GH#39616 | ||||
|         dtype = CategoricalDtype([str(i) for i in range(5)]) | ||||
|         codes = np.random.default_rng(2).integers(5, size=20) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|         res = arr.astype("Int64") | ||||
|         expected = array(codes, dtype="Int64") | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     def test_astype_str_int_categories_to_nullable_float(self): | ||||
|         # GH#39616 | ||||
|         dtype = CategoricalDtype([str(i / 2) for i in range(5)]) | ||||
|         codes = np.random.default_rng(2).integers(5, size=20) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|         res = arr.astype("Float64") | ||||
|         expected = array(codes, dtype="Float64") / 2 | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_astype(self, ordered): | ||||
|         # string | ||||
|         cat = Categorical(list("abbaaccc"), ordered=ordered) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array(cat) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         msg = r"Cannot cast object|str dtype to float64" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.astype(float) | ||||
|  | ||||
|         # numeric | ||||
|         cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array(cat, dtype=object) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat.astype(int) | ||||
|         expected = np.array(cat, dtype="int") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat.astype(float) | ||||
|         expected = np.array(cat, dtype=float) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype_ordered", [True, False]) | ||||
|     @pytest.mark.parametrize("cat_ordered", [True, False]) | ||||
|     def test_astype_category(self, dtype_ordered, cat_ordered): | ||||
|         # GH#10696/GH#18593 | ||||
|         data = list("abcaacbab") | ||||
|         cat = Categorical(data, categories=list("bac"), ordered=cat_ordered) | ||||
|  | ||||
|         # standard categories | ||||
|         dtype = CategoricalDtype(ordered=dtype_ordered) | ||||
|         result = cat.astype(dtype) | ||||
|         expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # non-standard categories | ||||
|         dtype = CategoricalDtype(list("adc"), dtype_ordered) | ||||
|         result = cat.astype(dtype) | ||||
|         expected = Categorical(data, dtype=dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         if dtype_ordered is False: | ||||
|             # dtype='category' can't specify ordered, so only test once | ||||
|             result = cat.astype("category") | ||||
|             expected = cat | ||||
|             tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_astype_object_datetime_categories(self): | ||||
|         # GH#40754 | ||||
|         cat = Categorical(to_datetime(["2021-03-27", NaT])) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_object_timestamp_categories(self): | ||||
|         # GH#18024 | ||||
|         cat = Categorical([Timestamp("2014-01-01")]) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_category_readonly_mask_values(self): | ||||
|         # GH#53658 | ||||
|         arr = array([0, 1, 2], dtype="Int64") | ||||
|         arr._mask.flags["WRITEABLE"] = False | ||||
|         result = arr.astype("category") | ||||
|         expected = array([0, 1, 2], dtype="Int64").astype("category") | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,787 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas.compat import HAS_PYARROW | ||||
|  | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_float_dtype, | ||||
|     is_integer_dtype, | ||||
| ) | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalConstructors: | ||||
|     def test_fastpath_deprecated(self): | ||||
|         codes = np.array([1, 2, 3]) | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False) | ||||
|         msg = "The 'fastpath' keyword in Categorical is deprecated" | ||||
|         with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|             Categorical(codes, dtype=dtype, fastpath=True) | ||||
|  | ||||
|     def test_categorical_from_cat_and_dtype_str_preserve_ordered(self): | ||||
|         # GH#49309 we should preserve orderedness in `res` | ||||
|         cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True) | ||||
|  | ||||
|         res = Categorical(cat, dtype="category") | ||||
|         assert res.dtype.ordered | ||||
|  | ||||
|     def test_categorical_disallows_scalar(self): | ||||
|         # GH#38433 | ||||
|         with pytest.raises(TypeError, match="Categorical input must be list-like"): | ||||
|             Categorical("A", categories=["A", "B"]) | ||||
|  | ||||
|     def test_categorical_1d_only(self): | ||||
|         # ndim > 1 | ||||
|         msg = "> 1 ndim Categorical are not supported at this time" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             Categorical(np.array([list("abcd")])) | ||||
|  | ||||
|     def test_validate_ordered(self): | ||||
|         # see gh-14058 | ||||
|         exp_msg = "'ordered' must either be 'True' or 'False'" | ||||
|         exp_err = TypeError | ||||
|  | ||||
|         # This should be a boolean. | ||||
|         ordered = np.array([0, 1, 2]) | ||||
|  | ||||
|         with pytest.raises(exp_err, match=exp_msg): | ||||
|             Categorical([1, 2, 3], ordered=ordered) | ||||
|  | ||||
|         with pytest.raises(exp_err, match=exp_msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 0, 1], categories=["a", "b", "c"], ordered=ordered | ||||
|             ) | ||||
|  | ||||
|     def test_constructor_empty(self): | ||||
|         # GH 17248 | ||||
|         c = Categorical([]) | ||||
|         expected = Index([]) | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|         c = Categorical([], categories=[1, 2, 3]) | ||||
|         expected = Index([1, 2, 3], dtype=np.int64) | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|     def test_constructor_empty_boolean(self): | ||||
|         # see gh-22702 | ||||
|         cat = Categorical([], categories=[True, False]) | ||||
|         categories = sorted(cat.categories.tolist()) | ||||
|         assert categories == [False, True] | ||||
|  | ||||
|     def test_constructor_tuples(self): | ||||
|         values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) | ||||
|         result = Categorical(values) | ||||
|         expected = Index([(1,), (1, 2)], tupleize_cols=False) | ||||
|         tm.assert_index_equal(result.categories, expected) | ||||
|         assert result.ordered is False | ||||
|  | ||||
|     def test_constructor_tuples_datetimes(self): | ||||
|         # numpy will auto reshape when all of the tuples are the | ||||
|         # same len, so add an extra one with 2 items and slice it off | ||||
|         values = np.array( | ||||
|             [ | ||||
|                 (Timestamp("2010-01-01"),), | ||||
|                 (Timestamp("2010-01-02"),), | ||||
|                 (Timestamp("2010-01-01"),), | ||||
|                 (Timestamp("2010-01-02"),), | ||||
|                 ("a", "b"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         )[:-1] | ||||
|         result = Categorical(values) | ||||
|         expected = Index( | ||||
|             [(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)], | ||||
|             tupleize_cols=False, | ||||
|         ) | ||||
|         tm.assert_index_equal(result.categories, expected) | ||||
|  | ||||
|     def test_constructor_unsortable(self): | ||||
|         # it works! | ||||
|         arr = np.array([1, 2, 3, datetime.now()], dtype="O") | ||||
|         factor = Categorical(arr, ordered=False) | ||||
|         assert not factor.ordered | ||||
|  | ||||
|         # this however will raise as cannot be sorted | ||||
|         msg = ( | ||||
|             "'values' is not ordered, please explicitly specify the " | ||||
|             "categories order by passing in a categories argument." | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Categorical(arr, ordered=True) | ||||
|  | ||||
|     def test_constructor_interval(self): | ||||
|         result = Categorical( | ||||
|             [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True | ||||
|         ) | ||||
|         ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) | ||||
|         exp = Categorical(ii, ordered=True) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|         tm.assert_index_equal(result.categories, ii) | ||||
|  | ||||
|     def test_constructor(self): | ||||
|         exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) | ||||
|         c1 = Categorical(exp_arr) | ||||
|         tm.assert_numpy_array_equal(c1.__array__(), exp_arr) | ||||
|         c2 = Categorical(exp_arr, categories=["a", "b", "c"]) | ||||
|         tm.assert_numpy_array_equal(c2.__array__(), exp_arr) | ||||
|         c2 = Categorical(exp_arr, categories=["c", "b", "a"]) | ||||
|         tm.assert_numpy_array_equal(c2.__array__(), exp_arr) | ||||
|  | ||||
|         # categories must be unique | ||||
|         msg = "Categorical categories must be unique" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([1, 2], [1, 2, 2]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ["a", "b", "b"]) | ||||
|  | ||||
|         # The default should be unordered | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         assert not c1.ordered | ||||
|  | ||||
|         # Categorical as input | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(c1, categories=["a", "b", "c"]) | ||||
|         tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) | ||||
|         tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) | ||||
|  | ||||
|         # Series of dtype category | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(Series(c1)) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(Series(c1)) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         # Series | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         c2 = Categorical(Series(["a", "b", "c", "a"])) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         # This should result in integer categories, not float! | ||||
|         cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) | ||||
|         assert is_integer_dtype(cat.categories) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/3678 | ||||
|         cat = Categorical([np.nan, 1, 2, 3]) | ||||
|         assert is_integer_dtype(cat.categories) | ||||
|  | ||||
|         # this should result in floats | ||||
|         cat = Categorical([np.nan, 1, 2.0, 3]) | ||||
|         assert is_float_dtype(cat.categories) | ||||
|  | ||||
|         cat = Categorical([np.nan, 1.0, 2.0, 3.0]) | ||||
|         assert is_float_dtype(cat.categories) | ||||
|  | ||||
|         # This doesn't work -> this would probably need some kind of "remember | ||||
|         # the original type" feature to try to cast the array interface result | ||||
|         # to... | ||||
|  | ||||
|         # vals = np.asarray(cat[cat.notna()]) | ||||
|         # assert is_integer_dtype(vals) | ||||
|  | ||||
|         # corner cases | ||||
|         cat = Categorical([1]) | ||||
|         assert len(cat.categories) == 1 | ||||
|         assert cat.categories[0] == 1 | ||||
|         assert len(cat.codes) == 1 | ||||
|         assert cat.codes[0] == 0 | ||||
|  | ||||
|         cat = Categorical(["a"]) | ||||
|         assert len(cat.categories) == 1 | ||||
|         assert cat.categories[0] == "a" | ||||
|         assert len(cat.codes) == 1 | ||||
|         assert cat.codes[0] == 0 | ||||
|  | ||||
|         # two arrays | ||||
|         #  - when the first is an integer dtype and the second is not | ||||
|         #  - when the resulting codes are all -1/NaN | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) | ||||
|  | ||||
|         # the next one are from the old docs | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) | ||||
|             cat = Categorical([1, 2], categories=[1, 2, 3]) | ||||
|  | ||||
|         # this is a legitimate constructor | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True) | ||||
|  | ||||
|     def test_constructor_with_existing_categories(self): | ||||
|         # GH25318: constructing with pd.Series used to bogusly skip recoding | ||||
|         # categories | ||||
|         c0 = Categorical(["a", "b", "c", "a"]) | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) | ||||
|  | ||||
|         c2 = Categorical(c0, categories=c1.categories) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c3 = Categorical(Series(c0), categories=c1.categories) | ||||
|         tm.assert_categorical_equal(c1, c3) | ||||
|  | ||||
|     def test_constructor_not_sequence(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16022 | ||||
|         msg = r"^Parameter 'categories' must be list-like, was" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Categorical(["a", "b"], categories="a") | ||||
|  | ||||
|     def test_constructor_with_null(self): | ||||
|         # Cannot have NaN in categories | ||||
|         msg = "Categorical categories cannot be null" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical( | ||||
|                 DatetimeIndex(["nat", "20160101"]), | ||||
|                 categories=[NaT, Timestamp("20160101")], | ||||
|             ) | ||||
|  | ||||
|     def test_constructor_with_index(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         tm.assert_categorical_equal(ci.values, Categorical(ci)) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         tm.assert_categorical_equal( | ||||
|             ci.values, Categorical(ci.astype(object), categories=ci.categories) | ||||
|         ) | ||||
|  | ||||
|     def test_constructor_with_generator(self): | ||||
|         # This was raising an Error in isna(single_val).any() because isna | ||||
|         # returned a scalar for a generator | ||||
|  | ||||
|         exp = Categorical([0, 1, 2]) | ||||
|         cat = Categorical(x for x in [0, 1, 2]) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|         cat = Categorical(range(3)) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|         MultiIndex.from_product([range(5), ["a", "b", "c"]]) | ||||
|  | ||||
|         # check that categories accept generators and sequences | ||||
|         cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|         cat = Categorical([0, 1, 2], categories=range(3)) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|     def test_constructor_with_rangeindex(self): | ||||
|         # RangeIndex is preserved in Categories | ||||
|         rng = Index(range(3)) | ||||
|  | ||||
|         cat = Categorical(rng) | ||||
|         tm.assert_index_equal(cat.categories, rng, exact=True) | ||||
|  | ||||
|         cat = Categorical([1, 2, 0], categories=rng) | ||||
|         tm.assert_index_equal(cat.categories, rng, exact=True) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtl", | ||||
|         [ | ||||
|             date_range("1995-01-01 00:00:00", periods=5, freq="s"), | ||||
|             date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), | ||||
|             timedelta_range("1 day", periods=5, freq="s"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_with_datetimelike(self, dtl): | ||||
|         # see gh-12077 | ||||
|         # constructor with a datetimelike and NaT | ||||
|  | ||||
|         s = Series(dtl) | ||||
|         c = Categorical(s) | ||||
|  | ||||
|         expected = type(dtl)(s) | ||||
|         expected._data.freq = None | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|         tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) | ||||
|  | ||||
|         # with NaT | ||||
|         s2 = s.copy() | ||||
|         s2.iloc[-1] = NaT | ||||
|         c = Categorical(s2) | ||||
|  | ||||
|         expected = type(dtl)(s2.dropna()) | ||||
|         expected._data.freq = None | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|         exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|         result = repr(c) | ||||
|         assert "NaT" in result | ||||
|  | ||||
|     def test_constructor_from_index_series_datetimetz(self): | ||||
|         idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") | ||||
|         idx = idx._with_freq(None)  # freq not preserved in result.categories | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     def test_constructor_date_objects(self): | ||||
|         # we dont cast date objects to timestamps, matching Index constructor | ||||
|         v = date.today() | ||||
|  | ||||
|         cat = Categorical([v, v]) | ||||
|         assert cat.categories.dtype == object | ||||
|         assert type(cat.categories[0]) is date | ||||
|  | ||||
|     def test_constructor_from_index_series_timedelta(self): | ||||
|         idx = timedelta_range("1 days", freq="D", periods=3) | ||||
|         idx = idx._with_freq(None)  # freq not preserved in result.categories | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     def test_constructor_from_index_series_period(self): | ||||
|         idx = period_range("2015-01-01", freq="D", periods=3) | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values", | ||||
|         [ | ||||
|             np.array([1.0, 1.2, 1.8, np.nan]), | ||||
|             np.array([1, 2, 3], dtype="int64"), | ||||
|             ["a", "b", "c", np.nan], | ||||
|             [pd.Period("2014-01"), pd.Period("2014-02"), NaT], | ||||
|             [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], | ||||
|             [ | ||||
|                 Timestamp("2014-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2014-01-02", tz="US/Eastern"), | ||||
|                 NaT, | ||||
|             ], | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_invariant(self, values): | ||||
|         # GH 14190 | ||||
|         c = Categorical(values) | ||||
|         c2 = Categorical(c) | ||||
|         tm.assert_categorical_equal(c, c2) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_constructor_with_dtype(self, ordered): | ||||
|         categories = ["b", "a", "c"] | ||||
|         dtype = CategoricalDtype(categories, ordered=ordered) | ||||
|         result = Categorical(["a", "b", "a", "c"], dtype=dtype) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "a", "c"], categories=categories, ordered=ordered | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|         assert result.ordered is ordered | ||||
|  | ||||
|     def test_constructor_dtype_and_others_raises(self): | ||||
|         dtype = CategoricalDtype(["a", "b"], ordered=True) | ||||
|         msg = "Cannot specify `categories` or `ordered` together with `dtype`." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ordered=True, dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ordered=False, dtype=dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_constructor_str_category(self, categories, ordered): | ||||
|         result = Categorical( | ||||
|             ["a", "b"], categories=categories, ordered=ordered, dtype="category" | ||||
|         ) | ||||
|         expected = Categorical(["a", "b"], categories=categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_str_unknown(self): | ||||
|         with pytest.raises(ValueError, match="Unknown dtype"): | ||||
|             Categorical([1, 2], dtype="foo") | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings" | ||||
|     ) | ||||
|     def test_constructor_np_strs(self): | ||||
|         # GH#31499 Hashtable.map_locations needs to work on np.str_ objects | ||||
|         cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) | ||||
|         assert all(isinstance(x, np.str_) for x in cat.categories) | ||||
|  | ||||
|     def test_constructor_from_categorical_with_dtype(self): | ||||
|         dtype = CategoricalDtype(["a", "b", "c"], ordered=True) | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         result = Categorical(values, dtype=dtype) | ||||
|         # We use dtype.categories, not values.categories | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "c"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_from_categorical_with_unknown_dtype(self): | ||||
|         dtype = CategoricalDtype(None, ordered=True) | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         result = Categorical(values, dtype=dtype) | ||||
|         # We use values.categories, not dtype.categories | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "d"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_from_categorical_string(self): | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         # use categories, ordered | ||||
|         result = Categorical( | ||||
|             values, categories=["a", "b", "c"], ordered=True, dtype="category" | ||||
|         ) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "c"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # No string | ||||
|         result = Categorical(values, categories=["a", "b", "c"], ordered=True) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_with_categorical_categories(self): | ||||
|         # GH17884 | ||||
|         expected = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||||
|  | ||||
|         result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) | ||||
|     def test_construction_with_null(self, klass, nulls_fixture): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31927 | ||||
|         values = klass(["a", nulls_fixture, "b"]) | ||||
|         result = Categorical(values) | ||||
|  | ||||
|         dtype = CategoricalDtype(["a", "b"]) | ||||
|         codes = [0, -1, 1] | ||||
|         expected = Categorical.from_codes(codes=codes, dtype=dtype) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("validate", [True, False]) | ||||
|     def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate): | ||||
|         # GH#39649 | ||||
|         cats = pd.array(range(5), dtype=any_numeric_ea_dtype) | ||||
|         codes = np.random.default_rng(2).integers(5, size=3) | ||||
|         dtype = CategoricalDtype(cats) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype, validate=validate) | ||||
|         assert arr.categories.dtype == cats.dtype | ||||
|         tm.assert_index_equal(arr.categories, Index(cats)) | ||||
|  | ||||
|     def test_from_codes_empty(self): | ||||
|         cat = ["a", "b", "c"] | ||||
|         result = Categorical.from_codes([], categories=cat) | ||||
|         expected = Categorical([], categories=cat) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("validate", [True, False]) | ||||
|     def test_from_codes_validate(self, validate): | ||||
|         # GH53122 | ||||
|         dtype = CategoricalDtype(["a", "b"]) | ||||
|         if validate: | ||||
|             with pytest.raises(ValueError, match="codes need to be between "): | ||||
|                 Categorical.from_codes([4, 5], dtype=dtype, validate=validate) | ||||
|         else: | ||||
|             # passes, though has incorrect codes, but that's the user responsibility | ||||
|             Categorical.from_codes([4, 5], dtype=dtype, validate=validate) | ||||
|  | ||||
|     def test_from_codes_too_few_categories(self): | ||||
|         dtype = CategoricalDtype(categories=[1, 2]) | ||||
|         msg = "codes need to be between " | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([1, 2], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([1, 2], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_non_int_codes(self): | ||||
|         dtype = CategoricalDtype(categories=[1, 2]) | ||||
|         msg = "codes need to be array-like integers" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(["a"], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(["a"], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_non_unique_categories(self): | ||||
|         with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||||
|             Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) | ||||
|  | ||||
|     def test_from_codes_nan_cat_included(self): | ||||
|         with pytest.raises(ValueError, match="Categorical categories cannot be null"): | ||||
|             Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) | ||||
|  | ||||
|     def test_from_codes_too_negative(self): | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         msg = r"codes need to be between -1 and len\(categories\)-1" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([-2, 1, 2], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([-2, 1, 2], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes(self): | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         exp = Categorical(["a", "b", "c"], ordered=False) | ||||
|         res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) | ||||
|         tm.assert_categorical_equal(exp, res) | ||||
|  | ||||
|         res = Categorical.from_codes([0, 1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(exp, res) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||||
|     def test_from_codes_with_categorical_categories(self, klass): | ||||
|         # GH17884 | ||||
|         expected = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||||
|  | ||||
|         result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||||
|     def test_from_codes_with_non_unique_categorical_categories(self, klass): | ||||
|         with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||||
|             Categorical.from_codes([0, 1], klass(["a", "b", "a"])) | ||||
|  | ||||
|     def test_from_codes_with_nan_code(self): | ||||
|         # GH21767 | ||||
|         codes = [1, 2, np.nan] | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||||
|             Categorical.from_codes(codes, categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||||
|             Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) | ||||
|     def test_from_codes_with_float(self, codes): | ||||
|         # GH21767 | ||||
|         # float codes should raise even if values are equal to integers | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|  | ||||
|         msg = "codes need to be array-like integers" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_with_dtype_raises(self): | ||||
|         msg = "Cannot specify" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"]) | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"]) | ||||
|             ) | ||||
|  | ||||
|     def test_from_codes_neither(self): | ||||
|         msg = "Both were None" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([0, 1]) | ||||
|  | ||||
|     def test_from_codes_with_nullable_int(self): | ||||
|         codes = pd.array([0, 1], dtype="Int64") | ||||
|         categories = ["a", "b"] | ||||
|  | ||||
|         result = Categorical.from_codes(codes, categories=categories) | ||||
|         expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_codes_with_nullable_int_na_raises(self): | ||||
|         codes = pd.array([0, None], dtype="Int64") | ||||
|         categories = ["a", "b"] | ||||
|  | ||||
|         msg = "codes cannot contain NA values" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, categories=categories) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [None, "category"]) | ||||
|     def test_from_inferred_categories(self, dtype): | ||||
|         cats = ["a", "b"] | ||||
|         codes = np.array([0, 0, 1, 1], dtype="i8") | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical.from_codes(codes, cats) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [None, "category"]) | ||||
|     def test_from_inferred_categories_sorts(self, dtype): | ||||
|         cats = ["b", "a"] | ||||
|         codes = np.array([0, 1, 1, 1], dtype="i8") | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_inferred_categories_dtype(self): | ||||
|         cats = ["a", "b", "d"] | ||||
|         codes = np.array([0, 1, 0, 2], dtype="i8") | ||||
|         dtype = CategoricalDtype(["c", "b", "a"], ordered=True) | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_inferred_categories_coerces(self): | ||||
|         cats = ["1", "2", "bad"] | ||||
|         codes = np.array([0, 0, 1, 2], dtype="i8") | ||||
|         dtype = CategoricalDtype([1, 2]) | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical([1, 1, 2, np.nan]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [None, True, False]) | ||||
|     def test_construction_with_ordered(self, ordered): | ||||
|         # GH 9347, 9190 | ||||
|         cat = Categorical([0, 1, 2], ordered=ordered) | ||||
|         assert cat.ordered == bool(ordered) | ||||
|  | ||||
|     def test_constructor_imaginary(self): | ||||
|         values = [1, 2, 3 + 1j] | ||||
|         c1 = Categorical(values) | ||||
|         tm.assert_index_equal(c1.categories, Index(values)) | ||||
|         tm.assert_numpy_array_equal(np.array(c1), np.array(values)) | ||||
|  | ||||
|     def test_constructor_string_and_tuples(self): | ||||
|         # GH 21416 | ||||
|         c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) | ||||
|         expected_index = Index([("a", "b"), ("b", "a"), "c"]) | ||||
|         assert c.categories.equals(expected_index) | ||||
|  | ||||
|     def test_interval(self): | ||||
|         idx = pd.interval_range(0, 10, periods=10) | ||||
|         cat = Categorical(idx, categories=idx) | ||||
|         expected_codes = np.arange(10, dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # infer categories | ||||
|         cat = Categorical(idx) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # list values | ||||
|         cat = Categorical(list(idx)) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # list values, categories | ||||
|         cat = Categorical(list(idx), categories=list(idx)) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # shuffled | ||||
|         values = idx.take([1, 2, 0]) | ||||
|         cat = Categorical(values, categories=idx) | ||||
|         tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8")) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # extra | ||||
|         values = pd.interval_range(8, 11, periods=3) | ||||
|         cat = Categorical(values, categories=idx) | ||||
|         expected_codes = np.array([8, 9, -1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # overlapping | ||||
|         idx = IntervalIndex([Interval(0, 2), Interval(0, 1)]) | ||||
|         cat = Categorical(idx, categories=idx) | ||||
|         expected_codes = np.array([0, 1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|     def test_categorical_extension_array_nullable(self, nulls_fixture): | ||||
|         # GH: | ||||
|         arr = pd.arrays.StringArray._from_sequence( | ||||
|             [nulls_fixture] * 2, dtype=pd.StringDtype() | ||||
|         ) | ||||
|         result = Categorical(arr) | ||||
|         assert arr.dtype == result.categories.dtype | ||||
|         expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype)) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_sequence_copy(self): | ||||
|         cat = Categorical(np.arange(5).repeat(2)) | ||||
|         result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False) | ||||
|  | ||||
|         # more generally, we'd be OK with a view | ||||
|         assert result._codes is cat._codes | ||||
|  | ||||
|         result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True) | ||||
|  | ||||
|         assert not tm.shares_memory(result, cat) | ||||
|  | ||||
|     def test_constructor_datetime64_non_nano(self): | ||||
|         categories = np.arange(10).view("M8[D]") | ||||
|         values = categories[::2].copy() | ||||
|  | ||||
|         cat = Categorical(values, categories=categories) | ||||
|         assert (cat == values).all() | ||||
|  | ||||
|     def test_constructor_preserves_freq(self): | ||||
|         # GH33830 freq retention in categorical | ||||
|         dti = date_range("2016-01-01", periods=5) | ||||
|  | ||||
|         expected = dti.freq | ||||
|  | ||||
|         cat = Categorical(dti) | ||||
|         result = cat.categories.freq | ||||
|  | ||||
|         assert expected == result | ||||
| @ -0,0 +1,139 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalDtypes: | ||||
|     def test_categories_match_up_to_permutation(self): | ||||
|         # test dtype comparisons between cats | ||||
|  | ||||
|         c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) | ||||
|         c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) | ||||
|         c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) | ||||
|         assert c1._categories_match_up_to_permutation(c1) | ||||
|         assert c2._categories_match_up_to_permutation(c2) | ||||
|         assert c3._categories_match_up_to_permutation(c3) | ||||
|         assert c1._categories_match_up_to_permutation(c2) | ||||
|         assert not c1._categories_match_up_to_permutation(c3) | ||||
|         assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) | ||||
|         assert not c1._categories_match_up_to_permutation(c1.astype(object)) | ||||
|         assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) | ||||
|         assert c1._categories_match_up_to_permutation( | ||||
|             CategoricalIndex(c1, categories=list("cab")) | ||||
|         ) | ||||
|         assert not c1._categories_match_up_to_permutation( | ||||
|             CategoricalIndex(c1, ordered=True) | ||||
|         ) | ||||
|  | ||||
|         # GH 16659 | ||||
|         s1 = Series(c1) | ||||
|         s2 = Series(c2) | ||||
|         s3 = Series(c3) | ||||
|         assert c1._categories_match_up_to_permutation(s1) | ||||
|         assert c2._categories_match_up_to_permutation(s2) | ||||
|         assert c3._categories_match_up_to_permutation(s3) | ||||
|         assert c1._categories_match_up_to_permutation(s2) | ||||
|         assert not c1._categories_match_up_to_permutation(s3) | ||||
|         assert not c1._categories_match_up_to_permutation(s1.astype(object)) | ||||
|  | ||||
|     def test_set_dtype_same(self): | ||||
|         c = Categorical(["a", "b", "c"]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, c) | ||||
|  | ||||
|     def test_set_dtype_new_categories(self): | ||||
|         c = Categorical(["a", "b", "c"]) | ||||
|         result = c._set_dtype(CategoricalDtype(list("abcd"))) | ||||
|         tm.assert_numpy_array_equal(result.codes, c.codes) | ||||
|         tm.assert_index_equal(result.dtype.categories, Index(list("abcd"))) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories, new_categories", | ||||
|         [ | ||||
|             # No NaNs, same cats, same order | ||||
|             (["a", "b", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["a", "b", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # Same, unsorted | ||||
|             (["b", "a", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["b", "a", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b", "a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             # Introduce NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             # No overlap | ||||
|             (["a", "b", "c"], ["a", "b"], ["d", "e"]), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_set_dtype_many(self, values, categories, new_categories, ordered): | ||||
|         c = Categorical(values, categories) | ||||
|         expected = Categorical(values, new_categories, ordered) | ||||
|         result = c._set_dtype(expected.dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_dtype_no_overlap(self): | ||||
|         c = Categorical(["a", "b", "c"], ["d", "e"]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "b"])) | ||||
|         expected = Categorical([None, None, None], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_codes_dtypes(self): | ||||
|         # GH 8453 | ||||
|         result = Categorical(["foo", "bar", "baz"]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|  | ||||
|         result = Categorical([f"foo{i:05d}" for i in range(400)]) | ||||
|         assert result.codes.dtype == "int16" | ||||
|  | ||||
|         result = Categorical([f"foo{i:05d}" for i in range(40000)]) | ||||
|         assert result.codes.dtype == "int32" | ||||
|  | ||||
|         # adding cats | ||||
|         result = Categorical(["foo", "bar", "baz"]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|         result = result.add_categories([f"foo{i:05d}" for i in range(400)]) | ||||
|         assert result.codes.dtype == "int16" | ||||
|  | ||||
|         # removing cats | ||||
|         result = result.remove_categories([f"foo{i:05d}" for i in range(300)]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|  | ||||
|     def test_iter_python_types(self): | ||||
|         # GH-19909 | ||||
|         cat = Categorical([1, 2]) | ||||
|         assert isinstance(next(iter(cat)), int) | ||||
|         assert isinstance(cat.tolist()[0], int) | ||||
|  | ||||
|     def test_iter_python_types_datetime(self): | ||||
|         cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) | ||||
|         assert isinstance(next(iter(cat)), Timestamp) | ||||
|         assert isinstance(cat.tolist()[0], Timestamp) | ||||
|  | ||||
|     def test_interval_index_category(self): | ||||
|         # GH 38316 | ||||
|         index = IntervalIndex.from_breaks(np.arange(3, dtype="uint64")) | ||||
|  | ||||
|         result = CategoricalIndex(index).dtype.categories | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             [0, 1], [1, 2], dtype="interval[uint64, right]" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,388 @@ | ||||
| import math | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexingWithFactor: | ||||
|     def test_getitem(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         assert factor[0] == "a" | ||||
|         assert factor[-1] == "c" | ||||
|  | ||||
|         subf = factor[[0, 1, 2]] | ||||
|         tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8)) | ||||
|  | ||||
|         subf = factor[np.asarray(factor) == "c"] | ||||
|         tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8)) | ||||
|  | ||||
|     def test_setitem(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         # int/positional | ||||
|         c = factor.copy() | ||||
|         c[0] = "b" | ||||
|         assert c[0] == "b" | ||||
|         c[-1] = "a" | ||||
|         assert c[-1] == "a" | ||||
|  | ||||
|         # boolean | ||||
|         c = factor.copy() | ||||
|         indexer = np.zeros(len(c), dtype="bool") | ||||
|         indexer[0] = True | ||||
|         indexer[-1] = True | ||||
|         c[indexer] = "c" | ||||
|         expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|  | ||||
|         tm.assert_categorical_equal(c, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])], | ||||
|     ) | ||||
|     def test_setitem_same_but_unordered(self, other): | ||||
|         # GH-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"]) | ||||
|         mask = np.array([True, False]) | ||||
|         target[mask] = other[mask] | ||||
|         expected = Categorical(["b", "b"], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(target, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             Categorical(["b", "a"], categories=["b", "a", "c"]), | ||||
|             Categorical(["b", "a"], categories=["a", "b", "c"]), | ||||
|             Categorical(["a", "a"], categories=["a"]), | ||||
|             Categorical(["b", "b"], categories=["b"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_different_unordered_raises(self, other): | ||||
|         # GH-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"]) | ||||
|         mask = np.array([True, False]) | ||||
|         msg = "Cannot set a Categorical with another, without identical categories" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             target[mask] = other[mask] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             Categorical(["b", "a"]), | ||||
|             Categorical(["b", "a"], categories=["b", "a"], ordered=True), | ||||
|             Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_same_ordered_raises(self, other): | ||||
|         # Gh-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         mask = np.array([True, False]) | ||||
|         msg = "Cannot set a Categorical with another, without identical categories" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             target[mask] = other[mask] | ||||
|  | ||||
|     def test_setitem_tuple(self): | ||||
|         # GH#20439 | ||||
|         cat = Categorical([(0, 1), (0, 2), (0, 1)]) | ||||
|  | ||||
|         # This should not raise | ||||
|         cat[1] = cat[0] | ||||
|         assert cat[1] == (0, 1) | ||||
|  | ||||
|     def test_setitem_listlike(self): | ||||
|         # GH#9469 | ||||
|         # properly coerce the input indexers | ||||
|  | ||||
|         cat = Categorical( | ||||
|             np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) | ||||
|         ).add_categories([-1000]) | ||||
|         indexer = np.array([100000]).astype(np.int64) | ||||
|         cat[indexer] = -1000 | ||||
|  | ||||
|         # we are asserting the code result here | ||||
|         # which maps to the -1000 category | ||||
|         result = cat.codes[np.array([100000]).astype(np.int64)] | ||||
|         tm.assert_numpy_array_equal(result, np.array([5], dtype="int8")) | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexing: | ||||
|     def test_getitem_slice(self): | ||||
|         cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) | ||||
|         sliced = cat[3] | ||||
|         assert sliced == "d" | ||||
|  | ||||
|         sliced = cat[3:5] | ||||
|         expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(sliced, expected) | ||||
|  | ||||
|     def test_getitem_listlike(self): | ||||
|         # GH 9469 | ||||
|         # properly coerce the input indexers | ||||
|  | ||||
|         c = Categorical( | ||||
|             np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) | ||||
|         ) | ||||
|         result = c.codes[np.array([100000]).astype(np.int64)] | ||||
|         expected = c[np.array([100000]).astype(np.int64)].codes | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_periodindex(self): | ||||
|         idx1 = PeriodIndex( | ||||
|             ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], | ||||
|             freq="M", | ||||
|         ) | ||||
|  | ||||
|         cat1 = Categorical(idx1) | ||||
|         str(cat1) | ||||
|         exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) | ||||
|         exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") | ||||
|         tm.assert_numpy_array_equal(cat1._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat1.categories, exp_idx) | ||||
|  | ||||
|         idx2 = PeriodIndex( | ||||
|             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], | ||||
|             freq="M", | ||||
|         ) | ||||
|         cat2 = Categorical(idx2, ordered=True) | ||||
|         str(cat2) | ||||
|         exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) | ||||
|         exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") | ||||
|         tm.assert_numpy_array_equal(cat2._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat2.categories, exp_idx2) | ||||
|  | ||||
|         idx3 = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-12", | ||||
|                 "2013-11", | ||||
|                 "2013-10", | ||||
|                 "2013-09", | ||||
|                 "2013-08", | ||||
|                 "2013-07", | ||||
|                 "2013-05", | ||||
|             ], | ||||
|             freq="M", | ||||
|         ) | ||||
|         cat3 = Categorical(idx3, ordered=True) | ||||
|         exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) | ||||
|         exp_idx = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-05", | ||||
|                 "2013-07", | ||||
|                 "2013-08", | ||||
|                 "2013-09", | ||||
|                 "2013-10", | ||||
|                 "2013-11", | ||||
|                 "2013-12", | ||||
|             ], | ||||
|             freq="M", | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(cat3._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat3.categories, exp_idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "null_val", | ||||
|         [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], | ||||
|     ) | ||||
|     def test_periodindex_on_null_types(self, null_val): | ||||
|         # GH 46673 | ||||
|         result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D") | ||||
|         expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]") | ||||
|         assert result[2] is NaT | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) | ||||
|     def test_categories_assignments_wrong_length_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|         msg = ( | ||||
|             "new categories need to have the same number of items " | ||||
|             "as the old categories!" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.rename_categories(new_categories) | ||||
|  | ||||
|     # Combinations of sorted/unique: | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] | ||||
|     ) | ||||
|     # Combinations of missing/unique | ||||
|     @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) | ||||
|     @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) | ||||
|     @pytest.mark.parametrize("dtype", [None, "category", "key"]) | ||||
|     def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype): | ||||
|         # GH 21448 | ||||
|         key = key_class(key_values, categories=range(1, 5)) | ||||
|  | ||||
|         if dtype == "key": | ||||
|             dtype = key.dtype | ||||
|  | ||||
|         # Test for flat index and CategoricalIndex with same/different cats: | ||||
|         idx = Index(idx_values, dtype=dtype) | ||||
|         expected, exp_miss = idx.get_indexer_non_unique(key_values) | ||||
|         result, res_miss = idx.get_indexer_non_unique(key) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|         tm.assert_numpy_array_equal(exp_miss, res_miss) | ||||
|  | ||||
|         exp_unique = idx.unique().get_indexer(key_values) | ||||
|         res_unique = idx.unique().get_indexer(key) | ||||
|         tm.assert_numpy_array_equal(res_unique, exp_unique) | ||||
|  | ||||
|     def test_where_unobserved_nan(self): | ||||
|         ser = Series(Categorical(["a", "b"])) | ||||
|         result = ser.where([True, False]) | ||||
|         expected = Series(Categorical(["a", None], categories=["a", "b"])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # all NA | ||||
|         ser = Series(Categorical(["a", "b"])) | ||||
|         result = ser.where([False, False]) | ||||
|         expected = Series(Categorical([None, None], categories=["a", "b"])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_unobserved_categories(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) | ||||
|         result = ser.where([True, True, False], other="b") | ||||
|         expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_other_categorical(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) | ||||
|         other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"]) | ||||
|         result = ser.where([True, False, True], other) | ||||
|         expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_new_category_raises(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"])) | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.where([True, False, True], "d") | ||||
|  | ||||
|     def test_where_ordered_differs_rasies(self): | ||||
|         ser = Series( | ||||
|             Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True) | ||||
|         ) | ||||
|         other = Categorical( | ||||
|             ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match="without identical categories"): | ||||
|             ser.where([True, False, True], other) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     def test_contains(self): | ||||
|         # GH#21508 | ||||
|         cat = Categorical(list("aabbca"), categories=list("cab")) | ||||
|  | ||||
|         assert "b" in cat | ||||
|         assert "z" not in cat | ||||
|         assert np.nan not in cat | ||||
|         with pytest.raises(TypeError, match="unhashable type: 'list'"): | ||||
|             assert [1] in cat | ||||
|  | ||||
|         # assert codes NOT in index | ||||
|         assert 0 not in cat | ||||
|         assert 1 not in cat | ||||
|  | ||||
|         cat = Categorical(list("aabbca") + [np.nan], categories=list("cab")) | ||||
|         assert np.nan in cat | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "item, expected", | ||||
|         [ | ||||
|             (Interval(0, 1), True), | ||||
|             (1.5, True), | ||||
|             (Interval(0.5, 1.5), False), | ||||
|             ("a", False), | ||||
|             (Timestamp(1), False), | ||||
|             (Timedelta(1), False), | ||||
|         ], | ||||
|         ids=str, | ||||
|     ) | ||||
|     def test_contains_interval(self, item, expected): | ||||
|         # GH#23705 | ||||
|         cat = Categorical(IntervalIndex.from_breaks(range(3))) | ||||
|         result = item in cat | ||||
|         assert result is expected | ||||
|  | ||||
|     def test_contains_list(self): | ||||
|         # GH#21729 | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|  | ||||
|         assert "a" not in cat | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a"] in cat | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a", "b"] in cat | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [True, False]) | ||||
| def test_mask_with_boolean(index): | ||||
|     ser = Series(range(3)) | ||||
|     idx = Categorical([True, False, True]) | ||||
|     if index: | ||||
|         idx = CategoricalIndex(idx) | ||||
|  | ||||
|     assert com.is_bool_indexer(idx) | ||||
|     result = ser[idx] | ||||
|     expected = ser[idx.astype("object")] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [True, False]) | ||||
| def test_mask_with_boolean_na_treated_as_false(index): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31503 | ||||
|     ser = Series(range(3)) | ||||
|     idx = Categorical([True, False, None]) | ||||
|     if index: | ||||
|         idx = CategoricalIndex(idx) | ||||
|  | ||||
|     result = ser[idx] | ||||
|     expected = ser[idx.fillna(False)] | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def non_coercible_categorical(monkeypatch): | ||||
|     """ | ||||
|     Monkeypatch Categorical.__array__ to ensure no implicit conversion. | ||||
|  | ||||
|     Raises | ||||
|     ------ | ||||
|     ValueError | ||||
|         When Categorical.__array__ is called. | ||||
|     """ | ||||
|  | ||||
|     # TODO(Categorical): identify other places where this may be | ||||
|     # useful and move to a conftest.py | ||||
|     def array(self, dtype=None): | ||||
|         raise ValueError("I cannot be converted.") | ||||
|  | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(Categorical, "__array__", array) | ||||
|         yield | ||||
|  | ||||
|  | ||||
| def test_series_at(): | ||||
|     arr = Categorical(["a", "b", "c"]) | ||||
|     ser = Series(arr) | ||||
|     result = ser.at[0] | ||||
|     assert result == "a" | ||||
| @ -0,0 +1,154 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "ignore"]) | ||||
| def na_action(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, categories", | ||||
|     [ | ||||
|         (list("abcbca"), list("cab")), | ||||
|         (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), | ||||
|     ], | ||||
|     ids=["string", "interval"], | ||||
| ) | ||||
| def test_map_str(data, categories, ordered, na_action): | ||||
|     # GH 31202 - override base class since we want to maintain categorical/ordered | ||||
|     cat = Categorical(data, categories=categories, ordered=ordered) | ||||
|     result = cat.map(str, na_action=na_action) | ||||
|     expected = Categorical( | ||||
|         map(str, data), categories=map(str, categories), ordered=ordered | ||||
|     ) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(na_action): | ||||
|     cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|     result = cat.map(lambda x: x.lower(), na_action=na_action) | ||||
|     exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|     result = cat.map(lambda x: x.lower(), na_action=na_action) | ||||
|     exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     # GH 12766: Return an index not an array | ||||
|     result = cat.map(lambda x: 1, na_action=na_action) | ||||
|     exp = Index(np.array([1] * 5, dtype=np.int64)) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     # change categories dtype | ||||
|     cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|  | ||||
|     def f(x): | ||||
|         return {"A": 10, "B": 20, "C": 30}.get(x) | ||||
|  | ||||
|     result = cat.map(f, na_action=na_action) | ||||
|     exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     mapper = Series([10, 20, 30], index=["A", "B", "C"]) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             Categorical([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False] * 3), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_none(data, f, expected):  # GH 24241 | ||||
|     values = Categorical(data) | ||||
|     result = values.map(f, na_action=None) | ||||
|     if isinstance(expected, Categorical): | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), | ||||
|         ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             Categorical([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_ignore(data, f, expected):  # GH 24241 | ||||
|     values = Categorical(data) | ||||
|     result = values.map(f, na_action="ignore") | ||||
|     if data[1] == 1: | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_with_dict_or_series(na_action): | ||||
|     orig_values = ["a", "B", 1, "a"] | ||||
|     new_values = ["one", 2, 3.0, "one"] | ||||
|     cat = Categorical(orig_values) | ||||
|  | ||||
|     mapper = Series(new_values[:-1], index=orig_values[:-1]) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|  | ||||
|     # Order of categories in result can be different | ||||
|     expected = Categorical(new_values, categories=[3.0, 2, "one"]) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     mapper = dict(zip(orig_values[:-1], new_values[:-1])) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_na_action_no_default_deprecated(): | ||||
|     # GH51645 | ||||
|     cat = Categorical(["a", "b", "c"]) | ||||
|     msg = ( | ||||
|         "The default value of 'ignore' for the `na_action` parameter in " | ||||
|         "pandas.Categorical.map is deprecated and will be " | ||||
|         "changed to 'None' in a future version. Please set na_action to the " | ||||
|         "desired value to avoid seeing this warning" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         cat.map(lambda x: x) | ||||
| @ -0,0 +1,216 @@ | ||||
| import collections | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalMissing: | ||||
|     def test_isna(self): | ||||
|         exp = np.array([False, False, True]) | ||||
|         cat = Categorical(["a", "b", np.nan]) | ||||
|         res = cat.isna() | ||||
|  | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_na_flags_int_categories(self): | ||||
|         # #1457 | ||||
|  | ||||
|         categories = list(range(10)) | ||||
|         labels = np.random.default_rng(2).integers(0, 10, 20) | ||||
|         labels[::5] = -1 | ||||
|  | ||||
|         cat = Categorical(labels, categories) | ||||
|         repr(cat) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(isna(cat), labels == -1) | ||||
|  | ||||
|     def test_nan_handling(self): | ||||
|         # Nans are represented as -1 in codes | ||||
|         c = Categorical(["a", "b", np.nan, "a"]) | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|         c[1] = np.nan | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|         # Adding nan to categories should make assigned nan point to the | ||||
|         # category! | ||||
|         c = Categorical(["a", "b", np.nan, "a"]) | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|     def test_set_dtype_nans(self): | ||||
|         c = Categorical(["a", "b", np.nan]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "c"])) | ||||
|         tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8")) | ||||
|  | ||||
|     def test_set_item_nan(self): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|         cat[1] = np.nan | ||||
|  | ||||
|         exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fillna_kwargs, msg", | ||||
|         [ | ||||
|             ( | ||||
|                 {"value": 1, "method": "ffill"}, | ||||
|                 "Cannot specify both 'value' and 'method'.", | ||||
|             ), | ||||
|             ({}, "Must specify a fill 'value' or 'method'."), | ||||
|             ({"method": "bad"}, "Invalid fill method. Expecting .* bad"), | ||||
|             ( | ||||
|                 {"value": Series([1, 2, 3, 4, "a"])}, | ||||
|                 "Cannot setitem on a Categorical with a new category", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_raises(self, fillna_kwargs, msg): | ||||
|         # https://github.com/pandas-dev/pandas/issues/19682 | ||||
|         # https://github.com/pandas-dev/pandas/issues/13628 | ||||
|         cat = Categorical([1, 2, 3, None, None]) | ||||
|  | ||||
|         if len(fillna_kwargs) == 1 and "value" in fillna_kwargs: | ||||
|             err = TypeError | ||||
|         else: | ||||
|             err = ValueError | ||||
|  | ||||
|         with pytest.raises(err, match=msg): | ||||
|             cat.fillna(**fillna_kwargs) | ||||
|  | ||||
|     @pytest.mark.parametrize("named", [True, False]) | ||||
|     def test_fillna_iterable_category(self, named): | ||||
|         # https://github.com/pandas-dev/pandas/issues/21097 | ||||
|         if named: | ||||
|             Point = collections.namedtuple("Point", "x y") | ||||
|         else: | ||||
|             Point = lambda *args: args  # tuple | ||||
|         cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object)) | ||||
|         result = cat.fillna(Point(0, 0)) | ||||
|         expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # Case where the Point is not among our categories; we want ValueError, | ||||
|         #  not NotImplementedError GH#41914 | ||||
|         cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object)) | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(Point(0, 0)) | ||||
|  | ||||
|     def test_fillna_array(self): | ||||
|         # accept Categorical or ndarray value if it holds appropriate values | ||||
|         cat = Categorical(["A", "B", "C", None, None]) | ||||
|  | ||||
|         other = cat.fillna("C") | ||||
|         result = cat.fillna(other) | ||||
|         tm.assert_categorical_equal(result, other) | ||||
|         assert isna(cat[-1])  # didn't modify original inplace | ||||
|  | ||||
|         other = np.array(["A", "B", "C", "B", "A"]) | ||||
|         result = cat.fillna(other) | ||||
|         expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|         assert isna(cat[-1])  # didn't modify original inplace | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, expected", | ||||
|         [ | ||||
|             ([1, 2, 3], np.array([False, False, False])), | ||||
|             ([1, 2, np.nan], np.array([False, False, True])), | ||||
|             ([1, 2, np.inf], np.array([False, False, True])), | ||||
|             ([1, 2, pd.NA], np.array([False, False, True])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_use_inf_as_na(self, values, expected): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33594 | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("mode.use_inf_as_na", True): | ||||
|                 cat = Categorical(values) | ||||
|                 result = cat.isna() | ||||
|                 tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|                 result = Series(cat).isna() | ||||
|                 expected = Series(expected) | ||||
|                 tm.assert_series_equal(result, expected) | ||||
|  | ||||
|                 result = DataFrame(cat).isna() | ||||
|                 expected = DataFrame(expected) | ||||
|                 tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, expected", | ||||
|         [ | ||||
|             ([1, 2, 3], np.array([False, False, False])), | ||||
|             ([1, 2, np.nan], np.array([False, False, True])), | ||||
|             ([1, 2, np.inf], np.array([False, False, True])), | ||||
|             ([1, 2, pd.NA], np.array([False, False, True])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_use_inf_as_na_outside_context(self, values, expected): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33594 | ||||
|         # Using isna directly for Categorical will fail in general here | ||||
|         cat = Categorical(values) | ||||
|  | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("mode.use_inf_as_na", True): | ||||
|                 result = isna(cat) | ||||
|                 tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|                 result = isna(Series(cat)) | ||||
|                 expected = Series(expected) | ||||
|                 tm.assert_series_equal(result, expected) | ||||
|  | ||||
|                 result = isna(DataFrame(cat)) | ||||
|                 expected = DataFrame(expected) | ||||
|                 tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "a1, a2, categories", | ||||
|         [ | ||||
|             (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]), | ||||
|             ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_compare_categorical_with_missing(self, a1, a2, categories): | ||||
|         # GH 28384 | ||||
|         cat_type = CategoricalDtype(categories) | ||||
|  | ||||
|         # != | ||||
|         result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type) | ||||
|         expected = Series(a1) != Series(a2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # == | ||||
|         result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type) | ||||
|         expected = Series(a1) == Series(a2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "na_value, dtype", | ||||
|         [ | ||||
|             (pd.NaT, "datetime64[ns]"), | ||||
|             (None, "float64"), | ||||
|             (np.nan, "float64"), | ||||
|             (pd.NA, "float64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_categorical_only_missing_values_no_cast(self, na_value, dtype): | ||||
|         # GH#44900 | ||||
|         result = Categorical([na_value, na_value]) | ||||
|         tm.assert_index_equal(result.categories, Index([], dtype=dtype)) | ||||
| @ -0,0 +1,414 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalOpsWithFactor: | ||||
|     def test_categories_none_comparisons(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         tm.assert_categorical_equal(factor, factor) | ||||
|  | ||||
|     def test_comparisons(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         result = factor[factor == "a"] | ||||
|         expected = factor[np.asarray(factor) == "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor != "a"] | ||||
|         expected = factor[np.asarray(factor) != "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor < "c"] | ||||
|         expected = factor[np.asarray(factor) < "c"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor > "a"] | ||||
|         expected = factor[np.asarray(factor) > "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor >= "b"] | ||||
|         expected = factor[np.asarray(factor) >= "b"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor <= "b"] | ||||
|         expected = factor[np.asarray(factor) <= "b"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         n = len(factor) | ||||
|  | ||||
|         other = factor[np.random.default_rng(2).permutation(n)] | ||||
|         result = factor == other | ||||
|         expected = np.asarray(factor) == np.asarray(other) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = factor == "d" | ||||
|         expected = np.zeros(len(factor), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # comparisons with categoricals | ||||
|         cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True) | ||||
|         cat_rev_base = Categorical( | ||||
|             ["b", "b", "b"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         cat = Categorical(["a", "b", "c"], ordered=True) | ||||
|         cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True) | ||||
|  | ||||
|         # comparisons need to take categories ordering into account | ||||
|         res_rev = cat_rev > cat_rev_base | ||||
|         exp_rev = np.array([True, False, False]) | ||||
|         tm.assert_numpy_array_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res_rev = cat_rev < cat_rev_base | ||||
|         exp_rev = np.array([False, False, True]) | ||||
|         tm.assert_numpy_array_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res = cat > cat_base | ||||
|         exp = np.array([False, False, True]) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         # Only categories with same categories can be compared | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_rev | ||||
|  | ||||
|         cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"]) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > cat_rev_base2 | ||||
|  | ||||
|         # Only categories with same ordering information can be compared | ||||
|         cat_unordered = cat.set_ordered(False) | ||||
|         assert not (cat > cat).any() | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_unordered | ||||
|  | ||||
|         # comparison (in both directions) with Series will raise | ||||
|         s = Series(["b", "b", "b"], dtype=object) | ||||
|         msg = ( | ||||
|             "Cannot compare a Categorical for op __gt__ with type " | ||||
|             r"<class 'numpy\.ndarray'>" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat_rev | ||||
|  | ||||
|         # comparison with numpy.array will raise in both direction, but only on | ||||
|         # newer numpy versions | ||||
|         a = np.array(["b", "b", "b"], dtype=object) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > a | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > a | ||||
|  | ||||
|         # Make sure that unequal comparison take the categories order in | ||||
|         # account | ||||
|         cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True) | ||||
|         exp = np.array([True, False, False]) | ||||
|         res = cat_rev > "b" | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         # check that zero-dim array gets unboxed | ||||
|         res = cat_rev > np.array("b") | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|  | ||||
| class TestCategoricalOps: | ||||
|     @pytest.mark.parametrize( | ||||
|         "categories", | ||||
|         [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], | ||||
|     ) | ||||
|     def test_not_equal_with_na(self, categories): | ||||
|         # https://github.com/pandas-dev/pandas/issues/32276 | ||||
|         c1 = Categorical.from_codes([-1, 0], categories=categories) | ||||
|         c2 = Categorical.from_codes([0, 1], categories=categories) | ||||
|  | ||||
|         result = c1 != c2 | ||||
|  | ||||
|         assert result.all() | ||||
|  | ||||
|     def test_compare_frame(self): | ||||
|         # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame | ||||
|         data = ["a", "b", 2, "a"] | ||||
|         cat = Categorical(data) | ||||
|  | ||||
|         df = DataFrame(cat) | ||||
|  | ||||
|         result = cat == df.T | ||||
|         expected = DataFrame([[True, True, True, True]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = cat[::-1] != df.T | ||||
|         expected = DataFrame([[False, True, True, False]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_compare_frame_raises(self, comparison_op): | ||||
|         # alignment raises unless we transpose | ||||
|         op = comparison_op | ||||
|         cat = Categorical(["a", "b", 2, "a"]) | ||||
|         df = DataFrame(cat) | ||||
|         msg = "Unable to coerce to Series, length must be 1: given 4" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             op(cat, df) | ||||
|  | ||||
|     def test_datetime_categorical_comparison(self): | ||||
|         dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True) | ||||
|         tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) | ||||
|         tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True])) | ||||
|  | ||||
|     def test_reflected_comparison_with_scalars(self): | ||||
|         # GH8658 | ||||
|         cat = Categorical([1, 2, 3], ordered=True) | ||||
|         tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) | ||||
|         tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True])) | ||||
|  | ||||
|     def test_comparison_with_unknown_scalars(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 | ||||
|         # and following comparisons with scalars not in categories should raise | ||||
|         # for unequal comps, but not for equal/not equal | ||||
|         cat = Categorical([1, 2, 3], ordered=True) | ||||
|  | ||||
|         msg = "Invalid comparison between dtype=category and int" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat < 4 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > 4 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             4 < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             4 > cat | ||||
|  | ||||
|         tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False])) | ||||
|         tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) | ||||
|  | ||||
|     def test_comparison_with_tuple(self): | ||||
|         cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object)) | ||||
|  | ||||
|         result = cat == "foo" | ||||
|         expected = np.array([True, False, False, False], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat == (0, 1) | ||||
|         expected = np.array([False, True, False, True], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat != (0, 1) | ||||
|         tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore::RuntimeWarning") | ||||
|     def test_comparison_of_ordered_categorical_with_nan_to_scalar( | ||||
|         self, compare_operators_no_eq_ne | ||||
|     ): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26504 | ||||
|         # BUG: fix ordered categorical comparison with missing values (#26504 ) | ||||
|         # and following comparisons with scalars in categories with missing | ||||
|         # values should be evaluated as False | ||||
|  | ||||
|         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) | ||||
|         scalar = 2 | ||||
|         expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar) | ||||
|         actual = getattr(cat, compare_operators_no_eq_ne)(scalar) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore::RuntimeWarning") | ||||
|     def test_comparison_of_ordered_categorical_with_nan_to_listlike( | ||||
|         self, compare_operators_no_eq_ne | ||||
|     ): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26504 | ||||
|         # and following comparisons of missing values in ordered Categorical | ||||
|         # with listlike should be evaluated as False | ||||
|  | ||||
|         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) | ||||
|         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) | ||||
|         expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) | ||||
|         actual = getattr(cat, compare_operators_no_eq_ne)(other) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,reverse,base", | ||||
|         [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])], | ||||
|     ) | ||||
|     def test_comparisons(self, data, reverse, base): | ||||
|         cat_rev = Series(Categorical(data, categories=reverse, ordered=True)) | ||||
|         cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True)) | ||||
|         cat = Series(Categorical(data, ordered=True)) | ||||
|         cat_base = Series( | ||||
|             Categorical(base, categories=cat.cat.categories, ordered=True) | ||||
|         ) | ||||
|         s = Series(base, dtype=object if base == list("bbb") else None) | ||||
|         a = np.array(base) | ||||
|  | ||||
|         # comparisons need to take categories ordering into account | ||||
|         res_rev = cat_rev > cat_rev_base | ||||
|         exp_rev = Series([True, False, False]) | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res_rev = cat_rev < cat_rev_base | ||||
|         exp_rev = Series([False, False, True]) | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res = cat > cat_base | ||||
|         exp = Series([False, False, True]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         scalar = base[1] | ||||
|         res = cat > scalar | ||||
|         exp = Series([False, False, True]) | ||||
|         exp2 = cat.values > scalar | ||||
|         tm.assert_series_equal(res, exp) | ||||
|         tm.assert_numpy_array_equal(res.values, exp2) | ||||
|         res_rev = cat_rev > scalar | ||||
|         exp_rev = Series([True, False, False]) | ||||
|         exp_rev2 = cat_rev.values > scalar | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|         tm.assert_numpy_array_equal(res_rev.values, exp_rev2) | ||||
|  | ||||
|         # Only categories with same categories can be compared | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_rev | ||||
|  | ||||
|         # categorical cannot be compared to Series or numpy array, and also | ||||
|         # not the other way around | ||||
|         msg = ( | ||||
|             "Cannot compare a Categorical for op __gt__ with type " | ||||
|             r"<class 'numpy\.ndarray'>" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > a | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > a | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat_rev | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a < cat_rev | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ctor", | ||||
|         [ | ||||
|             lambda *args, **kwargs: Categorical(*args, **kwargs), | ||||
|             lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_unordered_different_order_equal(self, ctor): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16014 | ||||
|         c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 == c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 != c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 != c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) | ||||
|         result = c1 == c2 | ||||
|         tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) | ||||
|  | ||||
|     def test_unordered_different_categories_raises(self): | ||||
|         c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=("Categoricals can only be compared")): | ||||
|             c1 == c2 | ||||
|  | ||||
|     def test_compare_different_lengths(self): | ||||
|         c1 = Categorical([], categories=["a", "b"]) | ||||
|         c2 = Categorical([], categories=["a"]) | ||||
|  | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same." | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             c1 == c2 | ||||
|  | ||||
|     def test_compare_unordered_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- | ||||
|         # 349290078 | ||||
|         a = Categorical(["a"], categories=["a", "b"]) | ||||
|         b = Categorical(["b"], categories=["b", "a"]) | ||||
|         assert not a.equals(b) | ||||
|  | ||||
|     def test_numeric_like_ops(self): | ||||
|         df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)}) | ||||
|         labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] | ||||
|         cat_labels = Categorical(labels, labels) | ||||
|  | ||||
|         df = df.sort_values(by=["value"], ascending=True) | ||||
|         df["value_group"] = pd.cut( | ||||
|             df.value, range(0, 10500, 500), right=False, labels=cat_labels | ||||
|         ) | ||||
|  | ||||
|         # numeric ops should not succeed | ||||
|         for op, str_rep in [ | ||||
|             ("__add__", r"\+"), | ||||
|             ("__sub__", "-"), | ||||
|             ("__mul__", r"\*"), | ||||
|             ("__truediv__", "/"), | ||||
|         ]: | ||||
|             msg = f"Series cannot perform the operation {str_rep}|unsupported operand" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(df, op)(df) | ||||
|  | ||||
|         # reduction ops should not succeed (unless specifically defined, e.g. | ||||
|         # min/max) | ||||
|         s = df["value_group"] | ||||
|         for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: | ||||
|             msg = f"does not support reduction '{op}'" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(s, op)(numeric_only=False) | ||||
|  | ||||
|     def test_numeric_like_ops_series(self): | ||||
|         # numpy ops | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         with pytest.raises(TypeError, match="does not support reduction 'sum'"): | ||||
|             np.sum(s) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "op, str_rep", | ||||
|         [ | ||||
|             ("__add__", r"\+"), | ||||
|             ("__sub__", "-"), | ||||
|             ("__mul__", r"\*"), | ||||
|             ("__truediv__", "/"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numeric_like_ops_series_arith(self, op, str_rep): | ||||
|         # numeric ops on a Series | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         msg = f"Series cannot perform the operation {str_rep}|unsupported operand" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(s, op)(2) | ||||
|  | ||||
|     def test_numeric_like_ops_series_invalid(self): | ||||
|         # invalid ufunc | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         msg = "Object with dtype category cannot perform the numpy op log" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             np.log(s) | ||||
| @ -0,0 +1,111 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_replace,value,expected,flip_categories", | ||||
|     [ | ||||
|         # one-to-one | ||||
|         (1, 2, [2, 2, 3], False), | ||||
|         (1, 4, [4, 2, 3], False), | ||||
|         (4, 1, [1, 2, 3], False), | ||||
|         (5, 6, [1, 2, 3], False), | ||||
|         # many-to-one | ||||
|         ([1], 2, [2, 2, 3], False), | ||||
|         ([1, 2], 3, [3, 3, 3], False), | ||||
|         ([1, 2], 4, [4, 4, 3], False), | ||||
|         ((1, 2, 4), 5, [5, 5, 3], False), | ||||
|         ((5, 6), 2, [1, 2, 3], False), | ||||
|         ([1], [2], [2, 2, 3], False), | ||||
|         ([1, 4], [5, 2], [5, 2, 3], False), | ||||
|         # GH49404: overlap between to_replace and value | ||||
|         ([1, 2, 3], [2, 3, 4], [2, 3, 4], False), | ||||
|         # GH50872, GH46884: replace with null | ||||
|         (1, None, [None, 2, 3], False), | ||||
|         (1, pd.NA, [None, 2, 3], False), | ||||
|         # check_categorical sorts categories, which crashes on mixed dtypes | ||||
|         (3, "4", [1, 2, "4"], False), | ||||
|         ([1, 2, "3"], "5", ["5", "5", 3], True), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:.*with CategoricalDtype is deprecated:FutureWarning" | ||||
| ) | ||||
| def test_replace_categorical_series(to_replace, value, expected, flip_categories): | ||||
|     # GH 31720 | ||||
|  | ||||
|     ser = pd.Series([1, 2, 3], dtype="category") | ||||
|     result = ser.replace(to_replace, value) | ||||
|     expected = pd.Series(expected, dtype="category") | ||||
|     ser.replace(to_replace, value, inplace=True) | ||||
|  | ||||
|     if flip_categories: | ||||
|         expected = expected.cat.set_categories(expected.cat.categories[::-1]) | ||||
|  | ||||
|     tm.assert_series_equal(expected, result, check_category_order=False) | ||||
|     tm.assert_series_equal(expected, ser, check_category_order=False) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_replace, value, result, expected_error_msg", | ||||
|     [ | ||||
|         ("b", "c", ["a", "c"], "Categorical.categories are different"), | ||||
|         ("c", "d", ["a", "b"], None), | ||||
|         # https://github.com/pandas-dev/pandas/issues/33288 | ||||
|         ("a", "a", ["a", "b"], None), | ||||
|         ("b", None, ["a", None], "Categorical.categories length are different"), | ||||
|     ], | ||||
| ) | ||||
| def test_replace_categorical(to_replace, value, result, expected_error_msg): | ||||
|     # GH#26988 | ||||
|     cat = Categorical(["a", "b"]) | ||||
|     expected = Categorical(result) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     warn = FutureWarning if expected_error_msg is not None else None | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = pd.Series(cat, copy=False).replace(to_replace, value)._values | ||||
|  | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|     if to_replace == "b":  # the "c" test is supposed to be unchanged | ||||
|         with pytest.raises(AssertionError, match=expected_error_msg): | ||||
|             # ensure non-inplace call does not affect original | ||||
|             tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|     ser = pd.Series(cat, copy=False) | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         ser.replace(to_replace, value, inplace=True) | ||||
|     tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|  | ||||
| def test_replace_categorical_ea_dtype(): | ||||
|     # GH49404 | ||||
|     cat = Categorical(pd.array(["a", "b"], dtype="string")) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values | ||||
|     expected = Categorical(pd.array(["c", pd.NA], dtype="string")) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_replace_maintain_ordering(): | ||||
|     # GH51016 | ||||
|     dtype = pd.CategoricalDtype([0, 1, 2], ordered=True) | ||||
|     ser = pd.Series([0, 1, 2], dtype=dtype) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = ser.replace(0, 2) | ||||
|     expected_dtype = pd.CategoricalDtype([1, 2], ordered=True) | ||||
|     expected = pd.Series([2, 1, 2], dtype=expected_dtype) | ||||
|     tm.assert_series_equal(expected, result, check_category_order=True) | ||||
| @ -0,0 +1,545 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Series, | ||||
|     date_range, | ||||
|     option_context, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestCategoricalReprWithFactor: | ||||
|     def test_print(self, using_infer_string): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         dtype = "str" if using_infer_string else "object" | ||||
|         expected = [ | ||||
|             "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", | ||||
|             f"Categories (3, {dtype}): ['a' < 'b' < 'c']", | ||||
|         ] | ||||
|         expected = "\n".join(expected) | ||||
|         actual = repr(factor) | ||||
|         assert actual == expected | ||||
|  | ||||
|  | ||||
| class TestCategoricalRepr: | ||||
|     def test_big_print(self): | ||||
|         codes = np.array([0, 1, 2, 0, 1, 2] * 100) | ||||
|         dtype = CategoricalDtype(categories=Index(["a", "b", "c"], dtype=object)) | ||||
|         factor = Categorical.from_codes(codes, dtype=dtype) | ||||
|         expected = [ | ||||
|             "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", | ||||
|             "Length: 600", | ||||
|             "Categories (3, object): ['a', 'b', 'c']", | ||||
|         ] | ||||
|         expected = "\n".join(expected) | ||||
|  | ||||
|         actual = repr(factor) | ||||
|  | ||||
|         assert actual == expected | ||||
|  | ||||
|     def test_empty_print(self): | ||||
|         factor = Categorical([], Index(["a", "b", "c"], dtype=object)) | ||||
|         expected = "[], Categories (3, object): ['a', 'b', 'c']" | ||||
|         actual = repr(factor) | ||||
|         assert actual == expected | ||||
|  | ||||
|         assert expected == actual | ||||
|         factor = Categorical([], Index(["a", "b", "c"], dtype=object), ordered=True) | ||||
|         expected = "[], Categories (3, object): ['a' < 'b' < 'c']" | ||||
|         actual = repr(factor) | ||||
|         assert expected == actual | ||||
|  | ||||
|         factor = Categorical([], []) | ||||
|         expected = "[], Categories (0, object): []" | ||||
|         assert expected == repr(factor) | ||||
|  | ||||
|     def test_print_none_width(self): | ||||
|         # GH10087 | ||||
|         a = Series(Categorical([1, 2, 3, 4])) | ||||
|         exp = ( | ||||
|             "0    1\n1    2\n2    3\n3    4\n" | ||||
|             "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" | ||||
|         ) | ||||
|  | ||||
|         with option_context("display.width", None): | ||||
|             assert exp == repr(a) | ||||
|  | ||||
|     @pytest.mark.skipif( | ||||
|         using_string_dtype(), | ||||
|         reason="Change once infer_string is set to True by default", | ||||
|     ) | ||||
|     def test_unicode_print(self): | ||||
|         c = Categorical(["aaaaa", "bb", "cccc"] * 20) | ||||
|         expected = """\ | ||||
| ['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['aaaaa', 'bb', 'cccc']""" | ||||
|  | ||||
|         assert repr(c) == expected | ||||
|  | ||||
|         c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) | ||||
|         expected = """\ | ||||
| ['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == expected | ||||
|  | ||||
|         # unicode option should not affect to Categorical, as it doesn't care | ||||
|         # the repr width | ||||
|         with option_context("display.unicode.east_asian_width", True): | ||||
|             c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) | ||||
|             expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(c) == expected | ||||
|  | ||||
|     def test_categorical_repr(self): | ||||
|         c = Categorical([1, 2, 3]) | ||||
|         exp = """[1, 2, 3] | ||||
| Categories (3, int64): [1, 2, 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) | ||||
|         exp = """[1, 2, 3, 1, 2, 3] | ||||
| Categories (3, int64): [1, 2, 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 4, 5] * 10) | ||||
|         exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] | ||||
| Length: 50 | ||||
| Categories (5, int64): [1, 2, 3, 4, 5]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(np.arange(20, dtype=np.int64)) | ||||
|         exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] | ||||
| Length: 20 | ||||
| Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_ordered(self): | ||||
|         c = Categorical([1, 2, 3], ordered=True) | ||||
|         exp = """[1, 2, 3] | ||||
| Categories (3, int64): [1 < 2 < 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) | ||||
|         exp = """[1, 2, 3, 1, 2, 3] | ||||
| Categories (3, int64): [1 < 2 < 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) | ||||
|         exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] | ||||
| Length: 50 | ||||
| Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(np.arange(20, dtype=np.int64), ordered=True) | ||||
|         exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] | ||||
| Length: 20 | ||||
| Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_datetime(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx) | ||||
|  | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " | ||||
|             "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" | ||||
|             "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" | ||||
|             "                                 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]" | ||||
|             "" | ||||
|         ) | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " | ||||
|             "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]\n" | ||||
|             "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" | ||||
|             "                                 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         c = Categorical(idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " | ||||
|             "2011-01-01 13:00:00-05:00]\n" | ||||
|             "Categories (5, datetime64[ns, US/Eastern]): " | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 13:00:00-05:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " | ||||
|             "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " | ||||
|             "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " | ||||
|             "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" | ||||
|             "Categories (5, datetime64[ns, US/Eastern]): " | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 13:00:00-05:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_datetime_ordered(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] | ||||
| Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < | ||||
|                                  2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] | ||||
| Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < | ||||
|                                  2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] | ||||
| Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < | ||||
|                                              2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < | ||||
|                                              2011-01-01 13:00:00-05:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] | ||||
| Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < | ||||
|                                              2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < | ||||
|                                              2011-01-01 13:00:00-05:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_int_with_nan(self): | ||||
|         c = Categorical([1, 2, np.nan]) | ||||
|         c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]""" | ||||
|         assert repr(c) == c_exp | ||||
|  | ||||
|         s = Series([1, 2, np.nan], dtype="object").astype("category") | ||||
|         s_exp = """0      1\n1      2\n2    NaN | ||||
| dtype: category | ||||
| Categories (2, int64): [1, 2]""" | ||||
|         assert repr(s) == s_exp | ||||
|  | ||||
|     def test_categorical_repr_period(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_period_ordered(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_timedelta(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=20) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 20 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, | ||||
|                                    3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, | ||||
|                                    18 days 01:00:00, 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 40 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, | ||||
|                                    3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, | ||||
|                                    18 days 01:00:00, 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_timedelta_ordered(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=20) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 20 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < | ||||
|                                    3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < | ||||
|                                    18 days 01:00:00 < 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 40 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < | ||||
|                                    3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < | ||||
|                                    18 days 01:00:00 < 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_index_repr(self): | ||||
|         idx = CategoricalIndex(Categorical([1, 2, 3])) | ||||
|         exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64))) | ||||
|         exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_ordered(self): | ||||
|         i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) | ||||
|         exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True)) | ||||
|         exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_datetime(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', | ||||
|                   '2011-01-01 11:00:00', '2011-01-01 12:00:00', | ||||
|                   '2011-01-01 13:00:00'], | ||||
|                  categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_datetime_ordered(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', | ||||
|                   '2011-01-01 11:00:00', '2011-01-01 12:00:00', | ||||
|                   '2011-01-01 13:00:00'], | ||||
|                  categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', | ||||
|                   '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', | ||||
|                   '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_period(self): | ||||
|         # test all length | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=1) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=2) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=3) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(idx.append(idx))) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', | ||||
|                   '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', | ||||
|                   '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_period_ordered(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_timedelta(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=10) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', | ||||
|                   '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', | ||||
|                   '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', | ||||
|                   '9 days 01:00:00'], | ||||
|                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_timedelta_ordered(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=10) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', | ||||
|                   '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', | ||||
|                   '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', | ||||
|                   '9 days 01:00:00'], | ||||
|                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_str_repr(self): | ||||
|         # GH 33676 | ||||
|         result = repr(Categorical([1, "2", 3, 4])) | ||||
|         expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" | ||||
|         assert result == expected | ||||
| @ -0,0 +1,128 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalSort: | ||||
|     def test_argsort(self): | ||||
|         c = Categorical([5, 3, 1, 4, 2], ordered=True) | ||||
|  | ||||
|         expected = np.array([2, 4, 1, 3, 0]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             c.argsort(ascending=True), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|         expected = expected[::-1] | ||||
|         tm.assert_numpy_array_equal( | ||||
|             c.argsort(ascending=False), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|     def test_numpy_argsort(self): | ||||
|         c = Categorical([5, 3, 1, 4, 2], ordered=True) | ||||
|  | ||||
|         expected = np.array([2, 4, 1, 3, 0]) | ||||
|         tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) | ||||
|  | ||||
|         tm.assert_numpy_array_equal( | ||||
|             np.argsort(c, kind="mergesort"), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|         msg = "the 'axis' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(c, axis=0) | ||||
|  | ||||
|         msg = "the 'order' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(c, order="C") | ||||
|  | ||||
|     def test_sort_values(self): | ||||
|         # unordered cats are sortable | ||||
|         cat = Categorical(["a", "b", "b", "a"], ordered=False) | ||||
|         cat.sort_values() | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d"], ordered=True) | ||||
|  | ||||
|         # sort_values | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True | ||||
|         ) | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         res = cat.sort_values(ascending=False) | ||||
|         exp = np.array(["d", "c", "b", "a"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         # sort (inplace order) | ||||
|         cat1 = cat.copy() | ||||
|         orig_codes = cat1._codes | ||||
|         cat1.sort_values(inplace=True) | ||||
|         assert cat1._codes is orig_codes | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(cat1.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         # reverse | ||||
|         cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) | ||||
|         res = cat.sort_values(ascending=False) | ||||
|         exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|     def test_sort_values_na_position(self): | ||||
|         # see gh-12882 | ||||
|         cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) | ||||
|         exp_categories = Index([2, 5]) | ||||
|  | ||||
|         exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values()  # default arguments | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) | ||||
|         res = cat.sort_values(ascending=True, na_position="first") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) | ||||
|         res = cat.sort_values(ascending=False, na_position="first") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values(ascending=True, na_position="last") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values(ascending=False, na_position="last") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) | ||||
|         res = cat.sort_values(ascending=False, na_position="last") | ||||
|         exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) | ||||
|         res = cat.sort_values(ascending=False, na_position="first") | ||||
|         exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
| @ -0,0 +1,26 @@ | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class SubclassedCategorical(Categorical): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class TestCategoricalSubclassing: | ||||
|     def test_constructor(self): | ||||
|         sc = SubclassedCategorical(["a", "b", "c"]) | ||||
|         assert isinstance(sc, SubclassedCategorical) | ||||
|         tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"])) | ||||
|  | ||||
|     def test_from_codes(self): | ||||
|         sc = SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"]) | ||||
|         assert isinstance(sc, SubclassedCategorical) | ||||
|         exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"]) | ||||
|         tm.assert_categorical_equal(sc, exp) | ||||
|  | ||||
|     def test_map(self): | ||||
|         sc = SubclassedCategorical(["a", "b", "c"]) | ||||
|         res = sc.map(lambda x: x.upper(), na_action=None) | ||||
|         assert isinstance(res, SubclassedCategorical) | ||||
|         exp = Categorical(["A", "B", "C"]) | ||||
|         tm.assert_categorical_equal(res, exp) | ||||
| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def allow_fill(request): | ||||
|     """Boolean 'allow_fill' parameter for Categorical.take""" | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|  | ||||
|     def test_take_default_allow_fill(self): | ||||
|         cat = Categorical(["a", "b"]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = cat.take([0, -1]) | ||||
|  | ||||
|         assert result.equals(cat) | ||||
|  | ||||
|     def test_take_positive_no_warning(self): | ||||
|         cat = Categorical(["a", "b"]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             cat.take([0, 0]) | ||||
|  | ||||
|     def test_take_bounds(self, allow_fill): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|         cat = Categorical(["a", "b", "a"]) | ||||
|         if allow_fill: | ||||
|             msg = "indices are out-of-bounds" | ||||
|         else: | ||||
|             msg = "index 4 is out of bounds for( axis 0 with)? size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             cat.take([4, 5], allow_fill=allow_fill) | ||||
|  | ||||
|     def test_take_empty(self, allow_fill): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|         cat = Categorical([], categories=["a", "b"]) | ||||
|         if allow_fill: | ||||
|             msg = "indices are out-of-bounds" | ||||
|         else: | ||||
|             msg = "cannot do a non-empty take from an empty axes" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             cat.take([0], allow_fill=allow_fill) | ||||
|  | ||||
|     def test_positional_take(self, ordered): | ||||
|         cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) | ||||
|         result = cat.take([0, 1, 2], allow_fill=False) | ||||
|         expected = Categorical( | ||||
|             ["a", "a", "b"], categories=cat.categories, ordered=ordered | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_positional_take_unobserved(self, ordered): | ||||
|         cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) | ||||
|         result = cat.take([1, 0], allow_fill=False) | ||||
|         expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_allow_fill(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "a", "b"]) | ||||
|         result = cat.take([0, -1, -1], allow_fill=True) | ||||
|         expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_with_negative_one(self): | ||||
|         # -1 was a category | ||||
|         cat = Categorical([-1, 0, 1]) | ||||
|         result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) | ||||
|         expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_value(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "b", "c"]) | ||||
|         result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) | ||||
|         expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_value_new_raises(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "b", "c"]) | ||||
|         xpr = r"Cannot setitem on a Categorical with a new category \(d\)" | ||||
|         with pytest.raises(TypeError, match=xpr): | ||||
|             cat.take([0, 1, -1], fill_value="d", allow_fill=True) | ||||
| @ -0,0 +1,19 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalWarnings: | ||||
|     def test_tab_complete_warning(self, ip): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16409 | ||||
|         pytest.importorskip("IPython", minversion="6.0.0") | ||||
|         from IPython.core.completer import provisionalcompleter | ||||
|  | ||||
|         code = "import pandas as pd; c = pd.Categorical([])" | ||||
|         ip.run_cell(code) | ||||
|  | ||||
|         # GH 31324 newer jedi version raises Deprecation warning; | ||||
|         #  appears resolved 2021-02-02 | ||||
|         with tm.assert_produces_warning(None, raise_on_extra_warnings=False): | ||||
|             with provisionalcompleter("ignore"): | ||||
|                 list(ip.Completer.completions("c.", 1)) | ||||
		Reference in New Issue
	
	Block a user