done
This commit is contained in:
		| @ -0,0 +1,27 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Note: identical the "multi" entry in the top-level "index" fixture | ||||
| @pytest.fixture | ||||
| def idx(): | ||||
|     # a MultiIndex used to test the general functionality of the | ||||
|     # general functionality of this object | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|     index_names = ["first", "second"] | ||||
|     mi = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=index_names, | ||||
|         verify_integrity=False, | ||||
|     ) | ||||
|     return mi | ||||
| @ -0,0 +1,263 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_infer_objects(idx): | ||||
|     with pytest.raises(NotImplementedError, match="to_frame"): | ||||
|         idx.infer_objects() | ||||
|  | ||||
|  | ||||
| def test_shift(idx): | ||||
|     # GH8083 test the base class for shift | ||||
|     msg = ( | ||||
|         "This method is only implemented for DatetimeIndex, PeriodIndex and " | ||||
|         "TimedeltaIndex; Got type MultiIndex" | ||||
|     ) | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.shift(1) | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.shift(1, 2) | ||||
|  | ||||
|  | ||||
| def test_groupby(idx): | ||||
|     groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) | ||||
|     labels = idx.tolist() | ||||
|     exp = {1: labels[:3], 2: labels[3:]} | ||||
|     tm.assert_dict_equal(groups, exp) | ||||
|  | ||||
|     # GH5620 | ||||
|     groups = idx.groupby(idx) | ||||
|     exp = {key: [key] for key in idx} | ||||
|     tm.assert_dict_equal(groups, exp) | ||||
|  | ||||
|  | ||||
| def test_truncate_multiindex(): | ||||
|     # GH 34564 for MultiIndex level names check | ||||
|     major_axis = Index(list(range(4))) | ||||
|     minor_axis = Index(list(range(2))) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=["L1", "L2"], | ||||
|     ) | ||||
|  | ||||
|     result = index.truncate(before=1) | ||||
|     assert "foo" not in result.levels[0] | ||||
|     assert 1 in result.levels[0] | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     result = index.truncate(after=1) | ||||
|     assert 2 not in result.levels[0] | ||||
|     assert 1 in result.levels[0] | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     result = index.truncate(before=1, after=2) | ||||
|     assert len(result.levels[0]) == 2 | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     msg = "after < before" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index.truncate(3, 1) | ||||
|  | ||||
|  | ||||
| # TODO: reshape | ||||
|  | ||||
|  | ||||
| def test_reorder_levels(idx): | ||||
|     # this blows up | ||||
|     with pytest.raises(IndexError, match="^Too many levels"): | ||||
|         idx.reorder_levels([2, 1, 0]) | ||||
|  | ||||
|  | ||||
| def test_numpy_repeat(): | ||||
|     reps = 2 | ||||
|     numbers = [1, 2, 3] | ||||
|     names = np.array(["foo", "bar"]) | ||||
|  | ||||
|     m = MultiIndex.from_product([numbers, names], names=names) | ||||
|     expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) | ||||
|     tm.assert_index_equal(np.repeat(m, reps), expected) | ||||
|  | ||||
|     msg = "the 'axis' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         np.repeat(m, reps, axis=1) | ||||
|  | ||||
|  | ||||
| def test_append_mixed_dtypes(): | ||||
|     # GH 13660 | ||||
|     dti = date_range("2011-01-01", freq="ME", periods=3) | ||||
|     dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern") | ||||
|     pi = period_range("2011-01", freq="M", periods=3) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] | ||||
|     ) | ||||
|     assert mi.nlevels == 6 | ||||
|  | ||||
|     res = mi.append(mi) | ||||
|     exp = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, 2, 3, 1, 2, 3], | ||||
|             [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], | ||||
|             ["a", "b", "c", "a", "b", "c"], | ||||
|             dti.append(dti), | ||||
|             dti_tz.append(dti_tz), | ||||
|             pi.append(pi), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     other = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     res = mi.append(other) | ||||
|     exp = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, 2, 3, "x", "y", "z"], | ||||
|             [1.1, np.nan, 3.3, "x", "y", "z"], | ||||
|             ["a", "b", "c", "x", "y", "z"], | ||||
|             dti.append(Index(["x", "y", "z"])), | ||||
|             dti_tz.append(Index(["x", "y", "z"])), | ||||
|             pi.append(Index(["x", "y", "z"])), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_iter(idx): | ||||
|     result = list(idx) | ||||
|     expected = [ | ||||
|         ("foo", "one"), | ||||
|         ("foo", "two"), | ||||
|         ("bar", "one"), | ||||
|         ("baz", "two"), | ||||
|         ("qux", "one"), | ||||
|         ("qux", "two"), | ||||
|     ] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_sub(idx): | ||||
|     first = idx | ||||
|  | ||||
|     # - now raises (previously was set op difference) | ||||
|     msg = "cannot perform __sub__ with this index type: MultiIndex" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first - idx[-3:] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx[-3:] - first | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx[-3:] - first.tolist() | ||||
|     msg = "cannot perform __rsub__ with this index type: MultiIndex" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.tolist() - idx[-3:] | ||||
|  | ||||
|  | ||||
| def test_map(idx): | ||||
|     # callable | ||||
|     index = idx | ||||
|  | ||||
|     result = index.map(lambda x: x) | ||||
|     tm.assert_index_equal(result, index) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "mapper", | ||||
|     [ | ||||
|         lambda values, idx: {i: e for e, i in zip(values, idx)}, | ||||
|         lambda values, idx: pd.Series(values, idx), | ||||
|     ], | ||||
| ) | ||||
| def test_map_dictlike(idx, mapper): | ||||
|     identity = mapper(idx.values, idx) | ||||
|  | ||||
|     # we don't infer to uint64 dtype for a dict | ||||
|     if idx.dtype == np.uint64 and isinstance(identity, dict): | ||||
|         expected = idx.astype("int64") | ||||
|     else: | ||||
|         expected = idx | ||||
|  | ||||
|     result = idx.map(identity) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # empty mappable | ||||
|     expected = Index([np.nan] * len(idx)) | ||||
|     result = idx.map(mapper(expected, idx)) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [ | ||||
|         np.exp, | ||||
|         np.exp2, | ||||
|         np.expm1, | ||||
|         np.log, | ||||
|         np.log2, | ||||
|         np.log10, | ||||
|         np.log1p, | ||||
|         np.sqrt, | ||||
|         np.sin, | ||||
|         np.cos, | ||||
|         np.tan, | ||||
|         np.arcsin, | ||||
|         np.arccos, | ||||
|         np.arctan, | ||||
|         np.sinh, | ||||
|         np.cosh, | ||||
|         np.tanh, | ||||
|         np.arcsinh, | ||||
|         np.arccosh, | ||||
|         np.arctanh, | ||||
|         np.deg2rad, | ||||
|         np.rad2deg, | ||||
|     ], | ||||
|     ids=lambda func: func.__name__, | ||||
| ) | ||||
| def test_numpy_ufuncs(idx, func): | ||||
|     # test ufuncs of numpy. see: | ||||
|     # https://numpy.org/doc/stable/reference/ufuncs.html | ||||
|  | ||||
|     expected_exception = TypeError | ||||
|     msg = ( | ||||
|         "loop of ufunc does not support argument 0 of type tuple which " | ||||
|         f"has no callable {func.__name__} method" | ||||
|     ) | ||||
|     with pytest.raises(expected_exception, match=msg): | ||||
|         func(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [np.isfinite, np.isinf, np.isnan, np.signbit], | ||||
|     ids=lambda func: func.__name__, | ||||
| ) | ||||
| def test_numpy_type_funcs(idx, func): | ||||
|     msg = ( | ||||
|         f"ufunc '{func.__name__}' not supported for the input types, and the inputs " | ||||
|         "could not be safely coerced to any supported types according to " | ||||
|         "the casting rule ''safe''" | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         func(idx) | ||||
| @ -0,0 +1,30 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_astype(idx): | ||||
|     expected = idx.copy() | ||||
|     actual = idx.astype("O") | ||||
|     tm.assert_copy(actual.levels, expected.levels) | ||||
|     tm.assert_copy(actual.codes, expected.codes) | ||||
|     assert actual.names == list(expected.names) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Setting.*dtype.*object"): | ||||
|         idx.astype(np.dtype(int)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| def test_astype_category(idx, ordered): | ||||
|     # GH 18630 | ||||
|     msg = "> 1 ndim Categorical are not supported at this time" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.astype(CategoricalDtype(ordered=ordered)) | ||||
|  | ||||
|     if ordered is False: | ||||
|         # dtype='category' defaults to ordered=False, so only test once | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx.astype("category") | ||||
| @ -0,0 +1,122 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_numeric_compat(idx): | ||||
|     with pytest.raises(TypeError, match="cannot perform __mul__"): | ||||
|         idx * 1 | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __rmul__"): | ||||
|         1 * idx | ||||
|  | ||||
|     div_err = "cannot perform __truediv__" | ||||
|     with pytest.raises(TypeError, match=div_err): | ||||
|         idx / 1 | ||||
|  | ||||
|     div_err = div_err.replace(" __", " __r") | ||||
|     with pytest.raises(TypeError, match=div_err): | ||||
|         1 / idx | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __floordiv__"): | ||||
|         idx // 1 | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): | ||||
|         1 // idx | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["all", "any", "__invert__"]) | ||||
| def test_logical_compat(idx, method): | ||||
|     msg = f"cannot perform {method}" | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         getattr(idx, method)() | ||||
|  | ||||
|  | ||||
| def test_inplace_mutation_resets_values(): | ||||
|     levels = [["a", "b", "c"], [4]] | ||||
|     levels2 = [[1, 2, 3], ["a"]] | ||||
|     codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] | ||||
|  | ||||
|     mi1 = MultiIndex(levels=levels, codes=codes) | ||||
|     mi2 = MultiIndex(levels=levels2, codes=codes) | ||||
|  | ||||
|     # instantiating MultiIndex should not access/cache _.values | ||||
|     assert "_values" not in mi1._cache | ||||
|     assert "_values" not in mi2._cache | ||||
|  | ||||
|     vals = mi1.values.copy() | ||||
|     vals2 = mi2.values.copy() | ||||
|  | ||||
|     # accessing .values should cache ._values | ||||
|     assert mi1._values is mi1._cache["_values"] | ||||
|     assert mi1.values is mi1._cache["_values"] | ||||
|     assert isinstance(mi1._cache["_values"], np.ndarray) | ||||
|  | ||||
|     # Make sure level setting works | ||||
|     new_vals = mi1.set_levels(levels2).values | ||||
|     tm.assert_almost_equal(vals2, new_vals) | ||||
|  | ||||
|     #  Doesn't drop _values from _cache [implementation detail] | ||||
|     tm.assert_almost_equal(mi1._cache["_values"], vals) | ||||
|  | ||||
|     # ...and values is still same too | ||||
|     tm.assert_almost_equal(mi1.values, vals) | ||||
|  | ||||
|     # Make sure label setting works too | ||||
|     codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] | ||||
|     exp_values = np.empty((6,), dtype=object) | ||||
|     exp_values[:] = [(1, "a")] * 6 | ||||
|  | ||||
|     # Must be 1d array of tuples | ||||
|     assert exp_values.shape == (6,) | ||||
|  | ||||
|     new_mi = mi2.set_codes(codes2) | ||||
|     assert "_values" not in new_mi._cache | ||||
|     new_values = new_mi.values | ||||
|     assert "_values" in new_mi._cache | ||||
|  | ||||
|     # Shouldn't change cache | ||||
|     tm.assert_almost_equal(mi2._cache["_values"], vals2) | ||||
|  | ||||
|     # Should have correct values | ||||
|     tm.assert_almost_equal(exp_values, new_values) | ||||
|  | ||||
|  | ||||
| def test_boxable_categorical_values(): | ||||
|     cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h")) | ||||
|     result = MultiIndex.from_product([["a", "b", "c"], cat]).values | ||||
|     expected = pd.Series( | ||||
|         [ | ||||
|             ("a", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("a", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("a", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|         ] | ||||
|     ).values | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|     result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values | ||||
|     expected = pd.DataFrame( | ||||
|         { | ||||
|             "a": ["a", "b", "c"], | ||||
|             "b": [ | ||||
|                 pd.Timestamp("2012-01-01 00:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 01:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 02:00:00"), | ||||
|             ], | ||||
|             "c": [ | ||||
|                 pd.Timestamp("2012-01-01 00:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 01:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 02:00:00"), | ||||
|             ], | ||||
|         } | ||||
|     ).values | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,860 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
| ) | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_constructor_single_level(): | ||||
|     result = MultiIndex( | ||||
|         levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] | ||||
|     ) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     expected = Index(["foo", "bar", "baz", "qux"], name="first") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["first"] | ||||
|  | ||||
|  | ||||
| def test_constructor_no_levels(): | ||||
|     msg = "non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex(levels=[], codes=[]) | ||||
|  | ||||
|     msg = "Must pass both levels and codes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(levels=[]) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(codes=[]) | ||||
|  | ||||
|  | ||||
| def test_constructor_nonhashable_names(): | ||||
|     # GH 20527 | ||||
|     levels = [[1, 2], ["one", "two"]] | ||||
|     codes = [[0, 0, 1, 1], [0, 1, 0, 1]] | ||||
|     names = (["foo"], ["bar"]) | ||||
|     msg = r"MultiIndex\.name must be a hashable type" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(levels=levels, codes=codes, names=names) | ||||
|  | ||||
|     # With .rename() | ||||
|     mi = MultiIndex( | ||||
|         levels=[[1, 2], ["one", "two"]], | ||||
|         codes=[[0, 0, 1, 1], [0, 1, 0, 1]], | ||||
|         names=("foo", "bar"), | ||||
|     ) | ||||
|     renamed = [["fooo"], ["barr"]] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         mi.rename(names=renamed) | ||||
|  | ||||
|     # With .set_names() | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         mi.set_names(names=renamed) | ||||
|  | ||||
|  | ||||
| def test_constructor_mismatched_codes_levels(idx): | ||||
|     codes = [np.array([1]), np.array([2]), np.array([3])] | ||||
|     levels = ["a"] | ||||
|  | ||||
|     msg = "Length of levels and codes must be the same" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|     length_error = ( | ||||
|         r"On level 0, code max \(3\) >= length of level \(1\)\. " | ||||
|         "NOTE: this index is in an inconsistent state" | ||||
|     ) | ||||
|     label_error = r"Unequal code lengths: \[4, 2\]" | ||||
|     code_value_error = r"On level 0, code value \(-2\) < -1" | ||||
|  | ||||
|     # important to check that it's looking at the right thing. | ||||
|     with pytest.raises(ValueError, match=length_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=label_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) | ||||
|  | ||||
|     # external API | ||||
|     with pytest.raises(ValueError, match=length_error): | ||||
|         idx.copy().set_levels([["a"], ["b"]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=label_error): | ||||
|         idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) | ||||
|  | ||||
|     # test set_codes with verify_integrity=False | ||||
|     # the setting should not raise any value error | ||||
|     idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) | ||||
|  | ||||
|     # code value smaller than -1 | ||||
|     with pytest.raises(ValueError, match=code_value_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) | ||||
|  | ||||
|  | ||||
| def test_na_levels(): | ||||
|     # GH26408 | ||||
|     # test if codes are re-assigned value -1 for levels | ||||
|     # with missing values (NaN, NaT, None) | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # verify set_levels and set_codes | ||||
|     result = MultiIndex( | ||||
|         levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] | ||||
|     ).set_codes([[0, -1, 1, 2, 3, 4]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_copy_in_constructor(): | ||||
|     levels = np.array(["a", "b", "c"]) | ||||
|     codes = np.array([1, 1, 2, 0, 0, 1, 1]) | ||||
|     val = codes[0] | ||||
|     mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) | ||||
|     assert mi.codes[0][0] == val | ||||
|     codes[0] = 15 | ||||
|     assert mi.codes[0][0] == val | ||||
|     val = levels[0] | ||||
|     levels[0] = "PANDA" | ||||
|     assert mi.levels[0][0] == val | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_arrays | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_arrays(idx): | ||||
|     arrays = [ | ||||
|         np.asarray(lev).take(level_codes) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ] | ||||
|  | ||||
|     # list of arrays as input | ||||
|     result = MultiIndex.from_arrays(arrays, names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # infer correctly | ||||
|     result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) | ||||
|     assert result.levels[0].equals(Index([Timestamp("20130101")])) | ||||
|     assert result.levels[1].equals(Index(["a", "b"])) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_iterator(idx): | ||||
|     # GH 18434 | ||||
|     arrays = [ | ||||
|         np.asarray(lev).take(level_codes) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ] | ||||
|  | ||||
|     # iterator as input | ||||
|     result = MultiIndex.from_arrays(iter(arrays), names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # invalid iterator input | ||||
|     msg = "Input must be a list / sequence of array-likes." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_arrays(0) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_tuples(idx): | ||||
|     arrays = tuple( | ||||
|         tuple(np.asarray(lev).take(level_codes)) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ) | ||||
|  | ||||
|     # tuple of tuples as input | ||||
|     result = MultiIndex.from_arrays(arrays, names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("idx1", "idx2"), | ||||
|     [ | ||||
|         ( | ||||
|             pd.period_range("2011-01-01", freq="D", periods=3), | ||||
|             pd.period_range("2015-01-01", freq="h", periods=3), | ||||
|         ), | ||||
|         ( | ||||
|             date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), | ||||
|             date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"), | ||||
|         ), | ||||
|         ( | ||||
|             pd.timedelta_range("1 days", freq="D", periods=3), | ||||
|             pd.timedelta_range("2 hours", freq="h", periods=3), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2): | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|  | ||||
|     tm.assert_index_equal(result, result2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_index_datetimelike_mixed(): | ||||
|     idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") | ||||
|     idx2 = date_range("2015-01-01 10:00", freq="h", periods=3) | ||||
|     idx3 = pd.timedelta_range("1 days", freq="D", periods=3) | ||||
|     idx4 = pd.period_range("2011-01-01", freq="D", periods=3) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|     tm.assert_index_equal(result.get_level_values(2), idx3) | ||||
|     tm.assert_index_equal(result.get_level_values(3), idx4) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays( | ||||
|         [Series(idx1), Series(idx2), Series(idx3), Series(idx4)] | ||||
|     ) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|     tm.assert_index_equal(result2.get_level_values(2), idx3) | ||||
|     tm.assert_index_equal(result2.get_level_values(3), idx4) | ||||
|  | ||||
|     tm.assert_index_equal(result, result2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_index_series_categorical(): | ||||
|     # GH13743 | ||||
|     idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) | ||||
|     idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|  | ||||
|     result3 = MultiIndex.from_arrays([idx1.values, idx2.values]) | ||||
|     tm.assert_index_equal(result3.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result3.get_level_values(1), idx2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_empty(): | ||||
|     # 0 levels | ||||
|     msg = "Must pass non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_arrays(arrays=[]) | ||||
|  | ||||
|     # 1 level | ||||
|     result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     expected = Index([], name="A") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["A"] | ||||
|  | ||||
|     # N levels | ||||
|     for N in [2, 3]: | ||||
|         arrays = [[]] * N | ||||
|         names = list("ABC")[:N] | ||||
|         result = MultiIndex.from_arrays(arrays=arrays, names=names) | ||||
|         expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "invalid_sequence_of_arrays", | ||||
|     [ | ||||
|         1, | ||||
|         [1], | ||||
|         [1, 2], | ||||
|         [[1], 2], | ||||
|         [1, [2]], | ||||
|         "a", | ||||
|         ["a"], | ||||
|         ["a", "b"], | ||||
|         [["a"], "b"], | ||||
|         (1,), | ||||
|         (1, 2), | ||||
|         ([1], 2), | ||||
|         (1, [2]), | ||||
|         "a", | ||||
|         ("a",), | ||||
|         ("a", "b"), | ||||
|         (["a"], "b"), | ||||
|         [(1,), 2], | ||||
|         [1, (2,)], | ||||
|         [("a",), "b"], | ||||
|         ((1,), 2), | ||||
|         (1, (2,)), | ||||
|         (("a",), "b"), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrays_invalid_input(invalid_sequence_of_arrays): | ||||
|     msg = "Input must be a list / sequence of array-likes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] | ||||
| ) | ||||
| def test_from_arrays_different_lengths(idx1, idx2): | ||||
|     # see gh-13599 | ||||
|     msg = "^all arrays must be same length$" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_arrays([idx1, idx2]) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_respects_none_names(): | ||||
|     # GH27292 | ||||
|     a = Series([1, 2, 3], name="foo") | ||||
|     b = Series(["a", "b", "c"], name="bar") | ||||
|  | ||||
|     result = MultiIndex.from_arrays([a, b], names=None) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None | ||||
|     ) | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_tuples | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_tuples(): | ||||
|     msg = "Cannot infer number of levels from empty list" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_tuples([]) | ||||
|  | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] | ||||
|     ) | ||||
|  | ||||
|     # input tuples | ||||
|     result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_iterator(): | ||||
|     # GH 18434 | ||||
|     # input iterator for tuples | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # input non-iterables | ||||
|     msg = "Input must be a list / sequence of tuple-likes." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_tuples(0) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_empty(): | ||||
|     # GH 16777 | ||||
|     result = MultiIndex.from_tuples([], names=["a", "b"]) | ||||
|     expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_index_values(idx): | ||||
|     result = MultiIndex.from_tuples(idx) | ||||
|     assert (result.values == idx.values).all() | ||||
|  | ||||
|  | ||||
| def test_tuples_with_name_string(): | ||||
|     # GH 15110 and GH 14848 | ||||
|  | ||||
|     li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] | ||||
|     msg = "Names should be list-like for a MultiIndex" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         Index(li, name="abc") | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         Index(li, name="a") | ||||
|  | ||||
|  | ||||
| def test_from_tuples_with_tuple_label(): | ||||
|     # GH 15457 | ||||
|     expected = pd.DataFrame( | ||||
|         [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] | ||||
|     ).set_index(["a", "b"]) | ||||
|     idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) | ||||
|     result = pd.DataFrame([2, 3], columns=["c"], index=idx) | ||||
|     tm.assert_frame_equal(expected, result) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_product | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_product_empty_zero_levels(): | ||||
|     # 0 levels | ||||
|     msg = "Must pass non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_product([]) | ||||
|  | ||||
|  | ||||
| def test_from_product_empty_one_level(): | ||||
|     result = MultiIndex.from_product([[]], names=["A"]) | ||||
|     expected = Index([], name="A") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["A"] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] | ||||
| ) | ||||
| def test_from_product_empty_two_levels(first, second): | ||||
|     names = ["A", "B"] | ||||
|     result = MultiIndex.from_product([first, second], names=names) | ||||
|     expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("N", list(range(4))) | ||||
| def test_from_product_empty_three_levels(N): | ||||
|     # GH12258 | ||||
|     names = ["A", "B", "C"] | ||||
|     lvl2 = list(range(N)) | ||||
|     result = MultiIndex.from_product([[], lvl2, []], names=names) | ||||
|     expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] | ||||
| ) | ||||
| def test_from_product_invalid_input(invalid_input): | ||||
|     msg = r"Input must be a list / sequence of iterables|Input must be list-like" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_product(iterables=invalid_input) | ||||
|  | ||||
|  | ||||
| def test_from_product_datetimeindex(): | ||||
|     dt_index = date_range("2000-01-01", periods=2) | ||||
|     mi = MultiIndex.from_product([[1, 2], dt_index]) | ||||
|     etalon = construct_1d_object_array_from_listlike( | ||||
|         [ | ||||
|             (1, Timestamp("2000-01-01")), | ||||
|             (1, Timestamp("2000-01-02")), | ||||
|             (2, Timestamp("2000-01-01")), | ||||
|             (2, Timestamp("2000-01-02")), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_numpy_array_equal(mi.values, etalon) | ||||
|  | ||||
|  | ||||
| def test_from_product_rangeindex(): | ||||
|     # RangeIndex is preserved by factorize, so preserved in levels | ||||
|     rng = Index(range(5)) | ||||
|     other = ["a", "b"] | ||||
|     mi = MultiIndex.from_product([rng, other]) | ||||
|     tm.assert_index_equal(mi._levels[0], rng, exact=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [False, True]) | ||||
| @pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) | ||||
| def test_from_product_index_series_categorical(ordered, f): | ||||
|     # GH13743 | ||||
|     first = ["foo", "bar"] | ||||
|  | ||||
|     idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) | ||||
|     expected = pd.CategoricalIndex( | ||||
|         list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_product([first, f(idx)]) | ||||
|     tm.assert_index_equal(result.get_level_values(1), expected) | ||||
|  | ||||
|  | ||||
| def test_from_product(): | ||||
|     first = ["foo", "bar", "buz"] | ||||
|     second = ["a", "b", "c"] | ||||
|     names = ["first", "second"] | ||||
|     result = MultiIndex.from_product([first, second], names=names) | ||||
|  | ||||
|     tuples = [ | ||||
|         ("foo", "a"), | ||||
|         ("foo", "b"), | ||||
|         ("foo", "c"), | ||||
|         ("bar", "a"), | ||||
|         ("bar", "b"), | ||||
|         ("bar", "c"), | ||||
|         ("buz", "a"), | ||||
|         ("buz", "b"), | ||||
|         ("buz", "c"), | ||||
|     ] | ||||
|     expected = MultiIndex.from_tuples(tuples, names=names) | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_iterator(): | ||||
|     # GH 18434 | ||||
|     first = ["foo", "bar", "buz"] | ||||
|     second = ["a", "b", "c"] | ||||
|     names = ["first", "second"] | ||||
|     tuples = [ | ||||
|         ("foo", "a"), | ||||
|         ("foo", "b"), | ||||
|         ("foo", "c"), | ||||
|         ("bar", "a"), | ||||
|         ("bar", "b"), | ||||
|         ("bar", "c"), | ||||
|         ("buz", "a"), | ||||
|         ("buz", "b"), | ||||
|         ("buz", "c"), | ||||
|     ] | ||||
|     expected = MultiIndex.from_tuples(tuples, names=names) | ||||
|  | ||||
|     # iterator as input | ||||
|     result = MultiIndex.from_product(iter([first, second]), names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # Invalid non-iterable input | ||||
|     msg = "Input must be a list / sequence of iterables." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_product(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b, expected_names", | ||||
|     [ | ||||
|         ( | ||||
|             Series([1, 2, 3], name="foo"), | ||||
|             Series(["a", "b"], name="bar"), | ||||
|             ["foo", "bar"], | ||||
|         ), | ||||
|         (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), | ||||
|         ([1, 2, 3], ["a", "b"], None), | ||||
|     ], | ||||
| ) | ||||
| def test_from_product_infer_names(a, b, expected_names): | ||||
|     # GH27292 | ||||
|     result = MultiIndex.from_product([a, b]) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b"]], | ||||
|         codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], | ||||
|         names=expected_names, | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_respects_none_names(): | ||||
|     # GH27292 | ||||
|     a = Series([1, 2, 3], name="foo") | ||||
|     b = Series(["a", "b"], name="bar") | ||||
|  | ||||
|     result = MultiIndex.from_product([a, b], names=None) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b"]], | ||||
|         codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], | ||||
|         names=None, | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_readonly(): | ||||
|     # GH#15286 passing read-only array to from_product | ||||
|     a = np.array(range(3)) | ||||
|     b = ["a", "b"] | ||||
|     expected = MultiIndex.from_product([a, b]) | ||||
|  | ||||
|     a.setflags(write=False) | ||||
|     result = MultiIndex.from_product([a, b]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_create_index_existing_name(idx): | ||||
|     # GH11193, when an existing index is passed, and a new name is not | ||||
|     # specified, the new index should inherit the previous object name | ||||
|     index = idx | ||||
|     index.names = ["foo", "bar"] | ||||
|     result = Index(index) | ||||
|     expected = Index( | ||||
|         Index( | ||||
|             [ | ||||
|                 ("foo", "one"), | ||||
|                 ("foo", "two"), | ||||
|                 ("bar", "one"), | ||||
|                 ("baz", "two"), | ||||
|                 ("qux", "one"), | ||||
|                 ("qux", "two"), | ||||
|             ], | ||||
|             dtype="object", | ||||
|         ) | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = Index(index, name="A") | ||||
|     expected = Index( | ||||
|         Index( | ||||
|             [ | ||||
|                 ("foo", "one"), | ||||
|                 ("foo", "two"), | ||||
|                 ("bar", "one"), | ||||
|                 ("baz", "two"), | ||||
|                 ("qux", "one"), | ||||
|                 ("qux", "two"), | ||||
|             ], | ||||
|             dtype="object", | ||||
|         ), | ||||
|         name="A", | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_frame | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_frame(): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] | ||||
|     ) | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] | ||||
|     ) | ||||
|     result = MultiIndex.from_frame(df) | ||||
|     tm.assert_index_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_from_frame_missing_values_multiIndex(): | ||||
|     # GH 39984 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "a": Series([1, 2, None], dtype="Int64"), | ||||
|             "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), | ||||
|         } | ||||
|     ) | ||||
|     multi_indexed = MultiIndex.from_frame(df) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([1, 2, None]).astype("Int64"), | ||||
|             pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), | ||||
|         ], | ||||
|         names=["a", "b"], | ||||
|     ) | ||||
|     tm.assert_index_equal(multi_indexed, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "non_frame", | ||||
|     [ | ||||
|         Series([1, 2, 3, 4]), | ||||
|         [1, 2, 3, 4], | ||||
|         [[1, 2], [3, 4], [5, 6]], | ||||
|         Index([1, 2, 3, 4]), | ||||
|         np.array([[1, 2], [3, 4], [5, 6]]), | ||||
|         27, | ||||
|     ], | ||||
| ) | ||||
| def test_from_frame_error(non_frame): | ||||
|     # GH 22420 | ||||
|     with pytest.raises(TypeError, match="Input must be a DataFrame"): | ||||
|         MultiIndex.from_frame(non_frame) | ||||
|  | ||||
|  | ||||
| def test_from_frame_dtype_fidelity(): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "dates": date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             "a": [1, 1, 1, 2, 2, 2], | ||||
|             "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             "c": ["x", "x", "y", "z", "x", "y"], | ||||
|         } | ||||
|     ) | ||||
|     original_dtypes = df.dtypes.to_dict() | ||||
|  | ||||
|     expected_mi = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             [1, 1, 1, 2, 2, 2], | ||||
|             pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             ["x", "x", "y", "z", "x", "y"], | ||||
|         ], | ||||
|         names=["dates", "a", "b", "c"], | ||||
|     ) | ||||
|     mi = MultiIndex.from_frame(df) | ||||
|     mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} | ||||
|  | ||||
|     tm.assert_index_equal(expected_mi, mi) | ||||
|     assert original_dtypes == mi_dtypes | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] | ||||
| ) | ||||
| def test_from_frame_valid_names(names_in, names_out): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], | ||||
|         columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), | ||||
|     ) | ||||
|     mi = MultiIndex.from_frame(df, names=names_in) | ||||
|     assert mi.names == names_out | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "names,expected_error_msg", | ||||
|     [ | ||||
|         ("bad_input", "Names should be list-like for a MultiIndex"), | ||||
|         (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), | ||||
|     ], | ||||
| ) | ||||
| def test_from_frame_invalid_names(names, expected_error_msg): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], | ||||
|         columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=expected_error_msg): | ||||
|         MultiIndex.from_frame(df, names=names) | ||||
|  | ||||
|  | ||||
| def test_index_equal_empty_iterable(): | ||||
|     # #16844 | ||||
|     a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) | ||||
|     b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) | ||||
|     tm.assert_index_equal(a, b) | ||||
|  | ||||
|  | ||||
| def test_raise_invalid_sortorder(): | ||||
|     # Test that the MultiIndex constructor raise when a incorrect sortorder is given | ||||
|     # GH#28518 | ||||
|  | ||||
|     levels = [[0, 1], [0, 1, 2]] | ||||
|  | ||||
|     # Correct sortorder | ||||
|     MultiIndex( | ||||
|         levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): | ||||
|         MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2 | ||||
|         ) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): | ||||
|         MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1 | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_datetimeindex(): | ||||
|     idx1 = pd.DatetimeIndex( | ||||
|         ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" | ||||
|     ) | ||||
|     idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern") | ||||
|     idx = MultiIndex.from_arrays([idx1, idx2]) | ||||
|  | ||||
|     expected1 = pd.DatetimeIndex( | ||||
|         ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" | ||||
|     ) | ||||
|  | ||||
|     tm.assert_index_equal(idx.levels[0], expected1) | ||||
|     tm.assert_index_equal(idx.levels[1], idx2) | ||||
|  | ||||
|     # from datetime combos | ||||
|     # GH 7888 | ||||
|     date1 = np.datetime64("today") | ||||
|     date2 = datetime.today() | ||||
|     date3 = Timestamp.today() | ||||
|  | ||||
|     for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): | ||||
|         index = MultiIndex.from_product([[d1], [d2]]) | ||||
|         assert isinstance(index.levels[0], pd.DatetimeIndex) | ||||
|         assert isinstance(index.levels[1], pd.DatetimeIndex) | ||||
|  | ||||
|     # but NOT date objects, matching Index behavior | ||||
|     date4 = date.today() | ||||
|     index = MultiIndex.from_product([[date4], [date2]]) | ||||
|     assert not isinstance(index.levels[0], pd.DatetimeIndex) | ||||
|     assert isinstance(index.levels[1], pd.DatetimeIndex) | ||||
|  | ||||
|  | ||||
| def test_constructor_with_tz(): | ||||
|     index = pd.DatetimeIndex( | ||||
|         ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" | ||||
|     ) | ||||
|     columns = pd.DatetimeIndex( | ||||
|         ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([index, columns]) | ||||
|  | ||||
|     assert result.names == ["dt1", "dt2"] | ||||
|     tm.assert_index_equal(result.levels[0], index) | ||||
|     tm.assert_index_equal(result.levels[1], columns) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([Series(index), Series(columns)]) | ||||
|  | ||||
|     assert result.names == ["dt1", "dt2"] | ||||
|     tm.assert_index_equal(result.levels[0], index) | ||||
|     tm.assert_index_equal(result.levels[1], columns) | ||||
|  | ||||
|  | ||||
| def test_multiindex_inference_consistency(): | ||||
|     # check that inference behavior matches the base class | ||||
|  | ||||
|     v = date.today() | ||||
|  | ||||
|     arr = [v, v] | ||||
|  | ||||
|     idx = Index(arr) | ||||
|     assert idx.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_product([arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(x,) for x in arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|  | ||||
| def test_dtype_representation(using_infer_string): | ||||
|     # GH#46900 | ||||
|     pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")]) | ||||
|     result = pmidx.dtypes | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = Series( | ||||
|         ["int64", exp], | ||||
|         index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]), | ||||
|         dtype=object, | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,201 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_to_numpy(idx): | ||||
|     result = idx.to_numpy() | ||||
|     exp = idx.values | ||||
|     tm.assert_numpy_array_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_array_interface(idx): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60046 | ||||
|     result = np.asarray(idx) | ||||
|     expected = np.empty((6,), dtype=object) | ||||
|     expected[:] = [ | ||||
|         ("foo", "one"), | ||||
|         ("foo", "two"), | ||||
|         ("bar", "one"), | ||||
|         ("baz", "two"), | ||||
|         ("qux", "one"), | ||||
|         ("qux", "two"), | ||||
|     ] | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # it always gives a copy by default, but the values are cached, so results | ||||
|     # are still sharing memory | ||||
|     result_copy1 = np.asarray(idx) | ||||
|     result_copy2 = np.asarray(idx) | ||||
|     assert np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     # with explicit copy=True, then it is an actual copy | ||||
|     result_copy1 = np.array(idx, copy=True) | ||||
|     result_copy2 = np.array(idx, copy=True) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     if not np_version_gt2: | ||||
|         # copy=False semantics are only supported in NumPy>=2. | ||||
|         return | ||||
|  | ||||
|     # for MultiIndex, copy=False is never allowed | ||||
|     msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         np.array(idx, copy=False) | ||||
|  | ||||
|  | ||||
| def test_to_frame(): | ||||
|     tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] | ||||
|  | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame(tuples) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] | ||||
|     index = MultiIndex.from_tuples(tuples, names=["first", "second"]) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame(tuples) | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # See GH-22580 | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     result = index.to_frame(index=False, name=["first", "second"]) | ||||
|     expected = DataFrame(tuples) | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame(name=["first", "second"]) | ||||
|     expected.index = index | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     msg = "'name' must be a list / sequence of column names." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         index.to_frame(name="first") | ||||
|  | ||||
|     msg = "'name' should have same length as number of levels on index." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index.to_frame(name=["first"]) | ||||
|  | ||||
|     # Tests for datetime index | ||||
|     index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             0: np.repeat(np.arange(5, dtype="int64"), 3), | ||||
|             1: np.tile(pd.date_range("20130101", periods=3), 5), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # See GH-22580 | ||||
|     result = index.to_frame(index=False, name=["first", "second"]) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "first": np.repeat(np.arange(5, dtype="int64"), 3), | ||||
|             "second": np.tile(pd.date_range("20130101", periods=3), 5), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame(name=["first", "second"]) | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_frame_dtype_fidelity(): | ||||
|     # GH 22420 | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             pd.date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             [1, 1, 1, 2, 2, 2], | ||||
|             pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             ["x", "x", "y", "z", "x", "y"], | ||||
|         ], | ||||
|         names=["dates", "a", "b", "c"], | ||||
|     ) | ||||
|     original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} | ||||
|  | ||||
|     expected_df = DataFrame( | ||||
|         { | ||||
|             "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             "a": [1, 1, 1, 2, 2, 2], | ||||
|             "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             "c": ["x", "x", "y", "z", "x", "y"], | ||||
|         } | ||||
|     ) | ||||
|     df = mi.to_frame(index=False) | ||||
|     df_dtypes = df.dtypes.to_dict() | ||||
|  | ||||
|     tm.assert_frame_equal(df, expected_df) | ||||
|     assert original_dtypes == df_dtypes | ||||
|  | ||||
|  | ||||
| def test_to_frame_resulting_column_order(): | ||||
|     # GH 22420 | ||||
|     expected = ["z", 0, "a"] | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected | ||||
|     ) | ||||
|     result = mi.to_frame().columns.tolist() | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_to_frame_duplicate_labels(): | ||||
|     # GH 45245 | ||||
|     data = [(1, 2), (3, 4)] | ||||
|     names = ["a", "a"] | ||||
|     index = MultiIndex.from_tuples(data, names=names) | ||||
|     with pytest.raises(ValueError, match="Cannot create duplicate column labels"): | ||||
|         index.to_frame() | ||||
|  | ||||
|     result = index.to_frame(allow_duplicates=True) | ||||
|     expected = DataFrame(data, index=index, columns=names) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     names = [None, 0] | ||||
|     index = MultiIndex.from_tuples(data, names=names) | ||||
|     with pytest.raises(ValueError, match="Cannot create duplicate column labels"): | ||||
|         index.to_frame() | ||||
|  | ||||
|     result = index.to_frame(allow_duplicates=True) | ||||
|     expected = DataFrame(data, index=index, columns=[0, 0]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_flat_index(idx): | ||||
|     expected = pd.Index( | ||||
|         ( | ||||
|             ("foo", "one"), | ||||
|             ("foo", "two"), | ||||
|             ("bar", "one"), | ||||
|             ("baz", "two"), | ||||
|             ("qux", "one"), | ||||
|             ("qux", "two"), | ||||
|         ), | ||||
|         tupleize_cols=False, | ||||
|     ) | ||||
|     result = idx.to_flat_index() | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,96 @@ | ||||
| from copy import ( | ||||
|     copy, | ||||
|     deepcopy, | ||||
| ) | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def assert_multiindex_copied(copy, original): | ||||
|     # Levels should be (at least, shallow copied) | ||||
|     tm.assert_copy(copy.levels, original.levels) | ||||
|     tm.assert_almost_equal(copy.codes, original.codes) | ||||
|  | ||||
|     # Labels doesn't matter which way copied | ||||
|     tm.assert_almost_equal(copy.codes, original.codes) | ||||
|     assert copy.codes is not original.codes | ||||
|  | ||||
|     # Names doesn't matter which way copied | ||||
|     assert copy.names == original.names | ||||
|     assert copy.names is not original.names | ||||
|  | ||||
|     # Sort order should be copied | ||||
|     assert copy.sortorder == original.sortorder | ||||
|  | ||||
|  | ||||
| def test_copy(idx): | ||||
|     i_copy = idx.copy() | ||||
|  | ||||
|     assert_multiindex_copied(i_copy, idx) | ||||
|  | ||||
|  | ||||
| def test_shallow_copy(idx): | ||||
|     i_copy = idx._view() | ||||
|  | ||||
|     assert_multiindex_copied(i_copy, idx) | ||||
|  | ||||
|  | ||||
| def test_view(idx): | ||||
|     i_view = idx.view() | ||||
|     assert_multiindex_copied(i_view, idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [copy, deepcopy]) | ||||
| def test_copy_and_deepcopy(func): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = func(idx) | ||||
|     assert idx_copy is not idx | ||||
|     assert idx_copy.equals(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("deep", [True, False]) | ||||
| def test_copy_method(deep): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = idx.copy(deep=deep) | ||||
|     assert idx_copy.equals(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("deep", [True, False]) | ||||
| @pytest.mark.parametrize( | ||||
|     "kwarg, value", | ||||
|     [ | ||||
|         ("names", ["third", "fourth"]), | ||||
|     ], | ||||
| ) | ||||
| def test_copy_method_kwargs(deep, kwarg, value): | ||||
|     # gh-12309: Check that the "name" argument as well other kwargs are honored | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = idx.copy(**{kwarg: value, "deep": deep}) | ||||
|     assert getattr(idx_copy, kwarg) == value | ||||
|  | ||||
|  | ||||
| def test_copy_deep_false_retains_id(): | ||||
|     # GH#47878 | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|  | ||||
|     res = idx.copy(deep=False) | ||||
|     assert res._id is idx._id | ||||
| @ -0,0 +1,190 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import PerformanceWarning | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_drop(idx): | ||||
|     dropped = idx.drop([("foo", "two"), ("qux", "one")]) | ||||
|  | ||||
|     index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) | ||||
|     dropped2 = idx.drop(index) | ||||
|  | ||||
|     expected = idx[[0, 2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|     tm.assert_index_equal(dropped2, expected) | ||||
|  | ||||
|     dropped = idx.drop(["bar"]) | ||||
|     expected = idx[[0, 1, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop("foo") | ||||
|     expected = idx[[2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     index = MultiIndex.from_tuples([("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop([("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop(index) | ||||
|     with pytest.raises(KeyError, match=r"^'two'$"): | ||||
|         idx.drop(["foo", "two"]) | ||||
|  | ||||
|     # partially correct argument | ||||
|     mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop(mixed_index) | ||||
|  | ||||
|     # error='ignore' | ||||
|     dropped = idx.drop(index, errors="ignore") | ||||
|     expected = idx[[0, 1, 2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop(mixed_index, errors="ignore") | ||||
|     expected = idx[[0, 1, 2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop(["foo", "two"], errors="ignore") | ||||
|     expected = idx[[2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     # mixed partial / full drop | ||||
|     dropped = idx.drop(["foo", ("qux", "one")]) | ||||
|     expected = idx[[2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     # mixed partial / full drop / error='ignore' | ||||
|     mixed_index = ["foo", ("qux", "one"), "two"] | ||||
|     with pytest.raises(KeyError, match=r"^'two'$"): | ||||
|         idx.drop(mixed_index) | ||||
|     dropped = idx.drop(mixed_index, errors="ignore") | ||||
|     expected = idx[[2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|  | ||||
| def test_droplevel_with_names(idx): | ||||
|     index = idx[idx.get_loc("foo")] | ||||
|     dropped = index.droplevel(0) | ||||
|     assert dropped.name == "second" | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(range(4)), Index(range(4)), Index(range(4))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|         names=["one", "two", "three"], | ||||
|     ) | ||||
|     dropped = index.droplevel(0) | ||||
|     assert dropped.names == ("two", "three") | ||||
|  | ||||
|     dropped = index.droplevel("two") | ||||
|     expected = index.droplevel(1) | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_droplevel_list(): | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(range(4)), Index(range(4)), Index(range(4))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|         names=["one", "two", "three"], | ||||
|     ) | ||||
|  | ||||
|     dropped = index[:2].droplevel(["three", "one"]) | ||||
|     expected = index[:2].droplevel(2).droplevel(0) | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|     dropped = index[:2].droplevel([]) | ||||
|     expected = index[:2] | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|     msg = ( | ||||
|         "Cannot remove 3 levels from an index with 3 levels: " | ||||
|         "at least one level must be left" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index[:2].droplevel(["one", "two", "three"]) | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'Level four not found'"): | ||||
|         index[:2].droplevel(["one", "four"]) | ||||
|  | ||||
|  | ||||
| def test_drop_not_lexsorted(): | ||||
|     # GH 12078 | ||||
|  | ||||
|     # define the lexsorted version of the multi-index | ||||
|     tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] | ||||
|     lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) | ||||
|     assert lexsorted_mi._is_lexsorted() | ||||
|  | ||||
|     # and the not-lexsorted version | ||||
|     df = pd.DataFrame( | ||||
|         columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] | ||||
|     ) | ||||
|     df = df.pivot_table(index="a", columns=["b", "c"], values="d") | ||||
|     df = df.reset_index() | ||||
|     not_lexsorted_mi = df.columns | ||||
|     assert not not_lexsorted_mi._is_lexsorted() | ||||
|  | ||||
|     # compare the results | ||||
|     tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) | ||||
|     with tm.assert_produces_warning(PerformanceWarning): | ||||
|         tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) | ||||
|  | ||||
|  | ||||
| def test_drop_with_nan_in_index(nulls_fixture): | ||||
|     # GH#18853 | ||||
|     mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) | ||||
|     msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop(pd.Timestamp("2001"), level="date") | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") | ||||
| def test_drop_with_non_monotonic_duplicates(): | ||||
|     # GH#33494 | ||||
|     mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) | ||||
|     result = mi.drop((1, 2)) | ||||
|     expected = MultiIndex.from_tuples([(2, 3)]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_single_level_drop_partially_missing_elements(): | ||||
|     # GH 37820 | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) | ||||
|     msg = r"labels \[4\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop(4, level=0) | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([1, 4], level=0) | ||||
|     msg = r"labels \[nan\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan], level=0) | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan, 1, 2, 3], level=0) | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) | ||||
|     msg = r"labels \['a'\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan, 1, "a"], level=0) | ||||
|  | ||||
|  | ||||
| def test_droplevel_multiindex_one_level(): | ||||
|     # GH#37208 | ||||
|     index = MultiIndex.from_tuples([(2,)], names=("b",)) | ||||
|     result = index.droplevel([]) | ||||
|     expected = Index([2], name="b") | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,363 @@ | ||||
| from itertools import product | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import ( | ||||
|     hashtable, | ||||
|     index as libindex, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def idx_dup(): | ||||
|     # compare tests/indexes/multi/conftest.py | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 0, 1, 1]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|     index_names = ["first", "second"] | ||||
|     mi = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=index_names, | ||||
|         verify_integrity=False, | ||||
|     ) | ||||
|     return mi | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("names", [None, ["first", "second"]]) | ||||
| def test_unique(names): | ||||
|     mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) | ||||
|  | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names) | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names) | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     # GH #20568 - empty MI | ||||
|     mi = MultiIndex.from_arrays([[], []], names=names) | ||||
|     res = mi.unique() | ||||
|     tm.assert_index_equal(mi, res) | ||||
|  | ||||
|  | ||||
| def test_unique_datetimelike(): | ||||
|     idx1 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"] | ||||
|     ) | ||||
|     idx2 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"], | ||||
|         tz="Asia/Tokyo", | ||||
|     ) | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]).unique() | ||||
|  | ||||
|     eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"]) | ||||
|     eidx2 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo" | ||||
|     ) | ||||
|     exp = MultiIndex.from_arrays([eidx1, eidx2]) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("level", [0, "first", 1, "second"]) | ||||
| def test_unique_level(idx, level): | ||||
|     # GH #17896 - with level= argument | ||||
|     result = idx.unique(level=level) | ||||
|     expected = idx.get_level_values(level).unique() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # With already unique level | ||||
|     mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"]) | ||||
|     result = mi.unique(level=level) | ||||
|     expected = mi.get_level_values(level) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # With empty MI | ||||
|     mi = MultiIndex.from_arrays([[], []], names=["first", "second"]) | ||||
|     result = mi.unique(level=level) | ||||
|     expected = mi.get_level_values(level) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicate_multiindex_codes(): | ||||
|     # GH 17464 | ||||
|     # Make sure that a MultiIndex with duplicate levels throws a ValueError | ||||
|     msg = r"Level values must be unique: \[[A', ]+\] on level 0" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)]) | ||||
|  | ||||
|     # And that using set_levels with duplicate levels fails | ||||
|     mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) | ||||
|     msg = r"Level values must be unique: \[[AB', ]+\] on level 0" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) | ||||
| def test_duplicate_level_names(names): | ||||
|     # GH18872, GH19029 | ||||
|     mi = MultiIndex.from_product([[0, 1]] * 3, names=names) | ||||
|     assert mi.names == names | ||||
|  | ||||
|     # With .rename() | ||||
|     mi = MultiIndex.from_product([[0, 1]] * 3) | ||||
|     mi = mi.rename(names) | ||||
|     assert mi.names == names | ||||
|  | ||||
|     # With .rename(., level=) | ||||
|     mi.rename(names[1], level=1, inplace=True) | ||||
|     mi = mi.rename([names[0], names[2]], level=[0, 2]) | ||||
|     assert mi.names == names | ||||
|  | ||||
|  | ||||
| def test_duplicate_meta_data(): | ||||
|     # GH 10115 | ||||
|     mi = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] | ||||
|     ) | ||||
|  | ||||
|     for idx in [ | ||||
|         mi, | ||||
|         mi.set_names([None, None]), | ||||
|         mi.set_names([None, "Num"]), | ||||
|         mi.set_names(["Upper", "Num"]), | ||||
|     ]: | ||||
|         assert idx.has_duplicates | ||||
|         assert idx.drop_duplicates().names == idx.names | ||||
|  | ||||
|  | ||||
| def test_has_duplicates(idx, idx_dup): | ||||
|     # see fixtures | ||||
|     assert idx.is_unique is True | ||||
|     assert idx.has_duplicates is False | ||||
|     assert idx_dup.is_unique is False | ||||
|     assert idx_dup.has_duplicates is True | ||||
|  | ||||
|     mi = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] | ||||
|     ) | ||||
|     assert mi.is_unique is False | ||||
|     assert mi.has_duplicates is True | ||||
|  | ||||
|     # single instance of NaN | ||||
|     mi_nan = MultiIndex( | ||||
|         levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]] | ||||
|     ) | ||||
|     assert mi_nan.is_unique is True | ||||
|     assert mi_nan.has_duplicates is False | ||||
|  | ||||
|     # multiple instances of NaN | ||||
|     mi_nan_dup = MultiIndex( | ||||
|         levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]] | ||||
|     ) | ||||
|     assert mi_nan_dup.is_unique is False | ||||
|     assert mi_nan_dup.has_duplicates is True | ||||
|  | ||||
|  | ||||
| def test_has_duplicates_from_tuples(): | ||||
|     # GH 9075 | ||||
|     t = [ | ||||
|         ("x", "out", "z", 5, "y", "in", "z", 169), | ||||
|         ("x", "out", "z", 7, "y", "in", "z", 119), | ||||
|         ("x", "out", "z", 9, "y", "in", "z", 135), | ||||
|         ("x", "out", "z", 13, "y", "in", "z", 145), | ||||
|         ("x", "out", "z", 14, "y", "in", "z", 158), | ||||
|         ("x", "out", "z", 16, "y", "in", "z", 122), | ||||
|         ("x", "out", "z", 17, "y", "in", "z", 160), | ||||
|         ("x", "out", "z", 18, "y", "in", "z", 180), | ||||
|         ("x", "out", "z", 20, "y", "in", "z", 143), | ||||
|         ("x", "out", "z", 21, "y", "in", "z", 128), | ||||
|         ("x", "out", "z", 22, "y", "in", "z", 129), | ||||
|         ("x", "out", "z", 25, "y", "in", "z", 111), | ||||
|         ("x", "out", "z", 28, "y", "in", "z", 114), | ||||
|         ("x", "out", "z", 29, "y", "in", "z", 121), | ||||
|         ("x", "out", "z", 31, "y", "in", "z", 126), | ||||
|         ("x", "out", "z", 32, "y", "in", "z", 155), | ||||
|         ("x", "out", "z", 33, "y", "in", "z", 123), | ||||
|         ("x", "out", "z", 12, "y", "in", "z", 144), | ||||
|     ] | ||||
|  | ||||
|     mi = MultiIndex.from_tuples(t) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("nlevels", [4, 8]) | ||||
| @pytest.mark.parametrize("with_nulls", [True, False]) | ||||
| def test_has_duplicates_overflow(nlevels, with_nulls): | ||||
|     # handle int64 overflow if possible | ||||
|     # no overflow with 4 | ||||
|     # overflow possible with 8 | ||||
|     codes = np.tile(np.arange(500), 2) | ||||
|     level = np.arange(500) | ||||
|  | ||||
|     if with_nulls:  # inject some null values | ||||
|         codes[500] = -1  # common nan value | ||||
|         codes = [codes.copy() for i in range(nlevels)] | ||||
|         for i in range(nlevels): | ||||
|             codes[i][500 + i - nlevels // 2] = -1 | ||||
|  | ||||
|         codes += [np.array([-1, 1]).repeat(500)] | ||||
|     else: | ||||
|         codes = [codes] * nlevels + [np.arange(2).repeat(500)] | ||||
|  | ||||
|     levels = [level] * nlevels + [[0, 1]] | ||||
|  | ||||
|     # no dups | ||||
|     mi = MultiIndex(levels=levels, codes=codes) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     # with a dup | ||||
|     if with_nulls: | ||||
|  | ||||
|         def f(a): | ||||
|             return np.insert(a, 1000, a[0]) | ||||
|  | ||||
|         codes = list(map(f, codes)) | ||||
|         mi = MultiIndex(levels=levels, codes=codes) | ||||
|     else: | ||||
|         values = mi.values.tolist() | ||||
|         mi = MultiIndex.from_tuples(values + [values[0]]) | ||||
|  | ||||
|     assert mi.has_duplicates | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", np.array([False, False, False, True, True, False])), | ||||
|         ("last", np.array([False, True, True, False, False, False])), | ||||
|         (False, np.array([False, True, True, True, True, False])), | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated(idx_dup, keep, expected): | ||||
|     result = idx_dup.duplicated(keep=keep) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.arm_slow | ||||
| def test_duplicated_hashtable_impl(keep, monkeypatch): | ||||
|     # GH 9125 | ||||
|     n, k = 6, 10 | ||||
|     levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)] | ||||
|     codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels] | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", 50) | ||||
|         mi = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         result = mi.duplicated(keep=keep) | ||||
|         expected = hashtable.duplicated(mi.values, keep=keep) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [101, 102]) | ||||
| def test_duplicated_with_nan(val): | ||||
|     # GH5873 | ||||
|     mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]]) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("n", range(1, 6)) | ||||
| @pytest.mark.parametrize("m", range(1, 5)) | ||||
| def test_duplicated_with_nan_multi_shape(n, m): | ||||
|     # GH5873 | ||||
|     # all possible unique combinations, including nan | ||||
|     codes = product(range(-1, n), range(-1, m)) | ||||
|     mi = MultiIndex( | ||||
|         levels=[list("abcde")[:n], list("WXYZ")[:m]], | ||||
|         codes=np.random.default_rng(2).permutation(list(codes)).T, | ||||
|     ) | ||||
|     assert len(mi) == (n + 1) * (m + 1) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool")) | ||||
|  | ||||
|  | ||||
| def test_duplicated_drop_duplicates(): | ||||
|     # GH#4060 | ||||
|     idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) | ||||
|  | ||||
|     expected = np.array([False, False, False, True, False, False], dtype=bool) | ||||
|     duplicated = idx.duplicated() | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(), expected) | ||||
|  | ||||
|     expected = np.array([True, False, False, False, False, False]) | ||||
|     duplicated = idx.duplicated(keep="last") | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) | ||||
|  | ||||
|     expected = np.array([True, False, False, True, False, False]) | ||||
|     duplicated = idx.duplicated(keep=False) | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     [ | ||||
|         np.complex64, | ||||
|         np.complex128, | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated_series_complex_numbers(dtype): | ||||
|     # GH 17927 | ||||
|     expected = Series( | ||||
|         [False, False, False, True, False, False, False, True, False, True], | ||||
|         dtype=bool, | ||||
|     ) | ||||
|     result = Series( | ||||
|         [ | ||||
|             np.nan + np.nan * 1j, | ||||
|             0, | ||||
|             1j, | ||||
|             1j, | ||||
|             1, | ||||
|             1 + 1j, | ||||
|             1 + 2j, | ||||
|             1 + 1j, | ||||
|             np.nan, | ||||
|             np.nan + np.nan * 1j, | ||||
|         ], | ||||
|         dtype=dtype, | ||||
|     ).duplicated() | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_midx_unique_ea_dtype(): | ||||
|     # GH#48335 | ||||
|     vals_a = Series([1, 2, NA, NA], dtype="Int64") | ||||
|     vals_b = np.array([1, 2, 3, 3]) | ||||
|     midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"]) | ||||
|     result = midx.unique() | ||||
|  | ||||
|     exp_vals_a = Series([1, 2, NA], dtype="Int64") | ||||
|     exp_vals_b = np.array([1, 2, 3]) | ||||
|     expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,284 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_any_real_numeric_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_equals(idx): | ||||
|     assert idx.equals(idx) | ||||
|     assert idx.equals(idx.copy()) | ||||
|     assert idx.equals(idx.astype(object)) | ||||
|     assert idx.equals(idx.to_flat_index()) | ||||
|     assert idx.equals(idx.to_flat_index().astype("category")) | ||||
|  | ||||
|     assert not idx.equals(list(idx)) | ||||
|     assert not idx.equals(np.array(idx)) | ||||
|  | ||||
|     same_values = Index(idx, dtype=object) | ||||
|     assert idx.equals(same_values) | ||||
|     assert same_values.equals(idx) | ||||
|  | ||||
|     if idx.nlevels == 1: | ||||
|         # do not test MultiIndex | ||||
|         assert not idx.equals(Series(idx)) | ||||
|  | ||||
|  | ||||
| def test_equals_op(idx): | ||||
|     # GH9947, GH10637 | ||||
|     index_a = idx | ||||
|  | ||||
|     n = len(index_a) | ||||
|     index_b = index_a[0:-1] | ||||
|     index_c = index_a[0:-1].append(index_a[-2:-1]) | ||||
|     index_d = index_a[0:1] | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == index_b | ||||
|     expected1 = np.array([True] * n) | ||||
|     expected2 = np.array([True] * (n - 1) + [False]) | ||||
|     tm.assert_numpy_array_equal(index_a == index_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == index_c, expected2) | ||||
|  | ||||
|     # test comparisons with numpy arrays | ||||
|     array_a = np.array(index_a) | ||||
|     array_b = np.array(index_a[0:-1]) | ||||
|     array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) | ||||
|     array_d = np.array(index_a[0:1]) | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == array_b | ||||
|     tm.assert_numpy_array_equal(index_a == array_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == array_c, expected2) | ||||
|  | ||||
|     # test comparisons with Series | ||||
|     series_a = Series(array_a) | ||||
|     series_b = Series(array_b) | ||||
|     series_c = Series(array_c) | ||||
|     series_d = Series(array_d) | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == series_b | ||||
|  | ||||
|     tm.assert_numpy_array_equal(index_a == series_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == series_c, expected2) | ||||
|  | ||||
|     # cases where length is 1 for one of them | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == index_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == series_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == array_d | ||||
|     msg = "Can only compare identically-labeled Series objects" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         series_a == series_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         series_a == array_d | ||||
|  | ||||
|     # comparing with a scalar should broadcast; note that we are excluding | ||||
|     # MultiIndex because in this case each item in the index is a tuple of | ||||
|     # length 2, and therefore is considered an array of length 2 in the | ||||
|     # comparison instead of a scalar | ||||
|     if not isinstance(index_a, MultiIndex): | ||||
|         expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) | ||||
|         # assuming the 2nd to last item is unique in the data | ||||
|         item = index_a[-2] | ||||
|         tm.assert_numpy_array_equal(index_a == item, expected3) | ||||
|         tm.assert_series_equal(series_a == item, Series(expected3)) | ||||
|  | ||||
|  | ||||
| def test_compare_tuple(): | ||||
|     # GH#21517 | ||||
|     mi = MultiIndex.from_product([[1, 2]] * 2) | ||||
|  | ||||
|     all_false = np.array([False, False, False, False]) | ||||
|  | ||||
|     result = mi == mi[0] | ||||
|     expected = np.array([True, False, False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi != mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     result = mi < mi[0] | ||||
|     tm.assert_numpy_array_equal(result, all_false) | ||||
|  | ||||
|     result = mi <= mi[0] | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi > mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     result = mi >= mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~all_false) | ||||
|  | ||||
|  | ||||
| def test_compare_tuple_strs(): | ||||
|     # GH#34180 | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) | ||||
|  | ||||
|     result = mi == ("c", "a") | ||||
|     expected = np.array([False, False, True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi == ("c",) | ||||
|     expected = np.array([False, False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals_multi(idx): | ||||
|     assert idx.equals(idx) | ||||
|     assert not idx.equals(idx.values) | ||||
|     assert idx.equals(Index(idx.values)) | ||||
|  | ||||
|     assert idx.equal_levels(idx) | ||||
|     assert not idx.equals(idx[:-1]) | ||||
|     assert not idx.equals(idx[-1]) | ||||
|  | ||||
|     # different number of levels | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|     ) | ||||
|  | ||||
|     index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) | ||||
|     assert not index.equals(index2) | ||||
|     assert not index.equal_levels(index2) | ||||
|  | ||||
|     # levels are different | ||||
|     major_axis = Index(list(range(4))) | ||||
|     minor_axis = Index(list(range(2))) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 2, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 0, 1, 0]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|     assert not idx.equals(index) | ||||
|     assert not idx.equal_levels(index) | ||||
|  | ||||
|     # some of the labels are different | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 2, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|     assert not idx.equals(index) | ||||
|  | ||||
|  | ||||
| def test_identical(idx): | ||||
|     mi = idx.copy() | ||||
|     mi2 = idx.copy() | ||||
|     assert mi.identical(mi2) | ||||
|  | ||||
|     mi = mi.set_names(["new1", "new2"]) | ||||
|     assert mi.equals(mi2) | ||||
|     assert not mi.identical(mi2) | ||||
|  | ||||
|     mi2 = mi2.set_names(["new1", "new2"]) | ||||
|     assert mi.identical(mi2) | ||||
|  | ||||
|     mi4 = Index(mi.tolist(), tupleize_cols=False) | ||||
|     assert not mi.identical(mi4) | ||||
|     assert mi.equals(mi4) | ||||
|  | ||||
|  | ||||
| def test_equals_operator(idx): | ||||
|     # GH9785 | ||||
|     assert (idx == idx).all() | ||||
|  | ||||
|  | ||||
| def test_equals_missing_values(): | ||||
|     # make sure take is not using -1 | ||||
|     i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) | ||||
|     result = i[0:1].equals(i[0]) | ||||
|     assert not result | ||||
|     result = i[1:2].equals(i[1]) | ||||
|     assert not result | ||||
|  | ||||
|  | ||||
| def test_equals_missing_values_differently_sorted(): | ||||
|     # GH#38439 | ||||
|     mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) | ||||
|     mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) | ||||
|     assert not mi1.equals(mi2) | ||||
|  | ||||
|     mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) | ||||
|     assert mi1.equals(mi2) | ||||
|  | ||||
|  | ||||
| def test_is_(): | ||||
|     mi = MultiIndex.from_tuples(zip(range(10), range(10))) | ||||
|     assert mi.is_(mi) | ||||
|     assert mi.is_(mi.view()) | ||||
|     assert mi.is_(mi.view().view().view().view()) | ||||
|     mi2 = mi.view() | ||||
|     # names are metadata, they don't change id | ||||
|     mi2.names = ["A", "B"] | ||||
|     assert mi2.is_(mi) | ||||
|     assert mi.is_(mi2) | ||||
|  | ||||
|     assert not mi.is_(mi.set_names(["C", "D"])) | ||||
|     # levels are inherent properties, they change identity | ||||
|     mi3 = mi2.set_levels([list(range(10)), list(range(10))]) | ||||
|     assert not mi3.is_(mi2) | ||||
|     # shouldn't change | ||||
|     assert mi2.is_(mi) | ||||
|     mi4 = mi3.view() | ||||
|  | ||||
|     # GH 17464 - Remove duplicate MultiIndex levels | ||||
|     mi4 = mi4.set_levels([list(range(10)), list(range(10))]) | ||||
|     assert not mi4.is_(mi3) | ||||
|     mi5 = mi.view() | ||||
|     mi5 = mi5.set_levels(mi5.levels) | ||||
|     assert not mi5.is_(mi) | ||||
|  | ||||
|  | ||||
| def test_is_all_dates(idx): | ||||
|     assert not idx._is_all_dates | ||||
|  | ||||
|  | ||||
| def test_is_numeric(idx): | ||||
|     # MultiIndex is never numeric | ||||
|     assert not is_any_real_numeric_dtype(idx) | ||||
|  | ||||
|  | ||||
| def test_multiindex_compare(): | ||||
|     # GH 21149 | ||||
|     # Ensure comparison operations for MultiIndex with nlevels == 1 | ||||
|     # behave consistently with those for MultiIndex with nlevels > 1 | ||||
|  | ||||
|     midx = MultiIndex.from_product([[0, 1]]) | ||||
|  | ||||
|     # Equality self-test: MultiIndex object vs self | ||||
|     expected = Series([True, True]) | ||||
|     result = Series(midx == midx) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # Greater than comparison: MultiIndex object vs self | ||||
|     expected = Series([False, False]) | ||||
|     result = Series(midx > midx) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals_ea_int_regular_int(): | ||||
|     # GH#46026 | ||||
|     mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]]) | ||||
|     mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]]) | ||||
|     assert not mi1.equals(mi2) | ||||
|     assert not mi2.equals(mi1) | ||||
| @ -0,0 +1,249 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_format(idx): | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         idx.format() | ||||
|         idx[:0].format() | ||||
|  | ||||
|  | ||||
| def test_format_integer_names(): | ||||
|     index = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] | ||||
|     ) | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         index.format(names=True) | ||||
|  | ||||
|  | ||||
| def test_format_sparse_config(idx): | ||||
|     # GH1538 | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with pd.option_context("display.multi_sparse", False): | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = idx.format() | ||||
|     assert result[1] == "foo  two" | ||||
|  | ||||
|  | ||||
| def test_format_sparse_display(): | ||||
|     index = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1], [0, 1], [0]], | ||||
|         codes=[ | ||||
|             [0, 0, 0, 1, 1, 1], | ||||
|             [0, 0, 1, 0, 0, 1], | ||||
|             [0, 1, 0, 0, 1, 0], | ||||
|             [0, 0, 0, 0, 0, 0], | ||||
|         ], | ||||
|     ) | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = index.format() | ||||
|     assert result[3] == "1  0  0  0" | ||||
|  | ||||
|  | ||||
| def test_repr_with_unicode_data(): | ||||
|     with pd.option_context("display.encoding", "UTF-8"): | ||||
|         d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} | ||||
|         index = pd.DataFrame(d).set_index(["a", "b"]).index | ||||
|         assert "\\" not in repr(index)  # we don't want unicode-escaped | ||||
|  | ||||
|  | ||||
| def test_repr_roundtrip_raises(): | ||||
|     mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) | ||||
|     msg = "Must pass both levels and codes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         eval(repr(mi)) | ||||
|  | ||||
|  | ||||
| def test_unicode_string_with_unicode(): | ||||
|     d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} | ||||
|     idx = pd.DataFrame(d).set_index(["a", "b"]).index | ||||
|     str(idx) | ||||
|  | ||||
|  | ||||
| def test_repr_max_seq_item_setting(idx): | ||||
|     # GH10182 | ||||
|     idx = idx.repeat(50) | ||||
|     with pd.option_context("display.max_seq_items", None): | ||||
|         repr(idx) | ||||
|         assert "..." not in str(idx) | ||||
|  | ||||
|  | ||||
| class TestRepr: | ||||
|     def test_unicode_repr_issues(self): | ||||
|         levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] | ||||
|         codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] | ||||
|         index = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         repr(index.levels) | ||||
|         repr(index.get_level_values(1)) | ||||
|  | ||||
|     def test_repr_max_seq_items_equal_to_n(self, idx): | ||||
|         # display.max_seq_items == n | ||||
|         with pd.option_context("display.max_seq_items", 6): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ('bar', 'one'), | ||||
|             ('baz', 'two'), | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'])""" | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_repr(self, idx): | ||||
|         result = idx[:1].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('foo', 'one')], | ||||
|            names=['first', 'second'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = idx.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ('bar', 'one'), | ||||
|             ('baz', 'two'), | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         with pd.option_context("display.max_seq_items", 5): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ... | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'], length=6)""" | ||||
|             assert result == expected | ||||
|  | ||||
|         # display.max_seq_items == 1 | ||||
|         with pd.option_context("display.max_seq_items", 1): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([... | ||||
|             ('qux', 'two')], | ||||
|            names=['first', ...], length=6)""" | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_rjust(self): | ||||
|         n = 1000 | ||||
|         ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) | ||||
|         dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) | ||||
|         mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) | ||||
|         result = mi[:1].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('a', 9, '2000-01-01 00:00:00')], | ||||
|            names=['a', 'b', 'dti'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi[::500].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00'), | ||||
|             (  'a',  9, '2000-01-01 00:08:20'), | ||||
|             ('abc', 10, '2000-01-01 00:16:40'), | ||||
|             ('abc', 10, '2000-01-01 00:25:00')], | ||||
|            names=['a', 'b', 'dti'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00'), | ||||
|             (  'a',  9, '2000-01-01 00:00:01'), | ||||
|             (  'a',  9, '2000-01-01 00:00:02'), | ||||
|             (  'a',  9, '2000-01-01 00:00:03'), | ||||
|             (  'a',  9, '2000-01-01 00:00:04'), | ||||
|             (  'a',  9, '2000-01-01 00:00:05'), | ||||
|             (  'a',  9, '2000-01-01 00:00:06'), | ||||
|             (  'a',  9, '2000-01-01 00:00:07'), | ||||
|             (  'a',  9, '2000-01-01 00:00:08'), | ||||
|             (  'a',  9, '2000-01-01 00:00:09'), | ||||
|             ... | ||||
|             ('abc', 10, '2000-01-01 00:33:10'), | ||||
|             ('abc', 10, '2000-01-01 00:33:11'), | ||||
|             ('abc', 10, '2000-01-01 00:33:12'), | ||||
|             ('abc', 10, '2000-01-01 00:33:13'), | ||||
|             ('abc', 10, '2000-01-01 00:33:14'), | ||||
|             ('abc', 10, '2000-01-01 00:33:15'), | ||||
|             ('abc', 10, '2000-01-01 00:33:16'), | ||||
|             ('abc', 10, '2000-01-01 00:33:17'), | ||||
|             ('abc', 10, '2000-01-01 00:33:18'), | ||||
|             ('abc', 10, '2000-01-01 00:33:19')], | ||||
|            names=['a', 'b', 'dti'], length=2000)""" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_tuple_width(self): | ||||
|         n = 1000 | ||||
|         ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) | ||||
|         dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) | ||||
|         levels = [ci, ci.codes + 9, dti, dti, dti] | ||||
|         names = ["a", "b", "dti_1", "dti_2", "dti_3"] | ||||
|         mi = MultiIndex.from_arrays(levels, names=names) | ||||
|         result = mi[:1].__repr__() | ||||
|         expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""  # noqa: E501 | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi[:10].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), | ||||
|             ... | ||||
|             ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_multiindex_long_element(self): | ||||
|         # Non-regression test towards GH#52960 | ||||
|         data = MultiIndex.from_tuples([("c" * 62,)]) | ||||
|  | ||||
|         expected = ( | ||||
|             "MultiIndex([('cccccccccccccccccccccccccccccccccccccccc" | ||||
|             "cccccccccccccccccccccc',)],\n           )" | ||||
|         ) | ||||
|         assert str(data) == expected | ||||
| @ -0,0 +1,124 @@ | ||||
| import numpy as np | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetLevelValues: | ||||
|     def test_get_level_values_box_datetime64(self): | ||||
|         dates = date_range("1/1/2000", periods=4) | ||||
|         levels = [dates, [0, 1]] | ||||
|         codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] | ||||
|  | ||||
|         index = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         assert isinstance(index.get_level_values(0)[0], Timestamp) | ||||
|  | ||||
|  | ||||
| def test_get_level_values(idx): | ||||
|     result = idx.get_level_values(0) | ||||
|     expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     assert result.name == "first" | ||||
|  | ||||
|     result = idx.get_level_values("first") | ||||
|     expected = idx.get_level_values(0) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # GH 10460 | ||||
|     index = MultiIndex( | ||||
|         levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])], | ||||
|         codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])], | ||||
|     ) | ||||
|  | ||||
|     exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"]) | ||||
|     tm.assert_index_equal(index.get_level_values(0), exp) | ||||
|     exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) | ||||
|     tm.assert_index_equal(index.get_level_values(1), exp) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_all_na(): | ||||
|     # GH#17924 when level entirely consists of nan | ||||
|     arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([np.nan, np.nan, np.nan], dtype=np.float64) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index(["a", np.nan, 1], dtype=object) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_int_with_na(): | ||||
|     # GH#17924 | ||||
|     arrays = [["a", "b", "b"], [1, np.nan, 2]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index([1, np.nan, 2]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [["a", "b", "b"], [np.nan, np.nan, 2]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index([np.nan, np.nan, 2]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_na(): | ||||
|     arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([np.nan, np.nan, np.nan]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index(["a", np.nan, 1]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = pd.DatetimeIndex([0, 1, pd.NaT]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [[], []] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([], dtype=object) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_when_periods(): | ||||
|     # GH33131. See also discussion in GH32669. | ||||
|     # This test can probably be removed when PeriodIndex._engine is removed. | ||||
|     from pandas import ( | ||||
|         Period, | ||||
|         PeriodIndex, | ||||
|     ) | ||||
|  | ||||
|     idx = MultiIndex.from_arrays( | ||||
|         [PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")] | ||||
|     ) | ||||
|     idx2 = MultiIndex.from_arrays( | ||||
|         [idx._get_level_values(level) for level in range(idx.nlevels)] | ||||
|     ) | ||||
|     assert all(x.is_monotonic_increasing for x in idx2.levels) | ||||
|  | ||||
|  | ||||
| def test_values_loses_freq_of_underlying_index(): | ||||
|     # GH#49054 | ||||
|     idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME")) | ||||
|     expected = idx.copy(deep=True) | ||||
|     idx2 = Index([1, 2, 3]) | ||||
|     midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]]) | ||||
|     midx.values | ||||
|     assert idx.freq is not None | ||||
|     tm.assert_index_equal(idx, expected) | ||||
| @ -0,0 +1,384 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def assert_matching(actual, expected, check_dtype=False): | ||||
|     # avoid specifying internal representation | ||||
|     # as much as possible | ||||
|     assert len(actual) == len(expected) | ||||
|     for act, exp in zip(actual, expected): | ||||
|         act = np.asarray(act) | ||||
|         exp = np.asarray(exp) | ||||
|         tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) | ||||
|  | ||||
|  | ||||
| def test_get_level_number_integer(idx): | ||||
|     idx.names = [1, 0] | ||||
|     assert idx._get_level_number(1) == 0 | ||||
|     assert idx._get_level_number(0) == 1 | ||||
|     msg = "Too many levels: Index has only 2 levels, not 3" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx._get_level_number(2) | ||||
|     with pytest.raises(KeyError, match="Level fourth not found"): | ||||
|         idx._get_level_number("fourth") | ||||
|  | ||||
|  | ||||
| def test_get_dtypes(using_infer_string): | ||||
|     # Test MultiIndex.dtypes (# Gh37062) | ||||
|     idx_multitype = MultiIndex.from_product( | ||||
|         [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], | ||||
|         names=["int", "string", "dt"], | ||||
|     ) | ||||
|  | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         { | ||||
|             "int": np.dtype("int64"), | ||||
|             "string": exp, | ||||
|             "dt": DatetimeTZDtype(tz="utc"), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_series_equal(expected, idx_multitype.dtypes) | ||||
|  | ||||
|  | ||||
| def test_get_dtypes_no_level_name(using_infer_string): | ||||
|     # Test MultiIndex.dtypes (# GH38580 ) | ||||
|     idx_multitype = MultiIndex.from_product( | ||||
|         [ | ||||
|             [1, 2, 3], | ||||
|             ["a", "b", "c"], | ||||
|             pd.date_range("20200101", periods=2, tz="UTC"), | ||||
|         ], | ||||
|     ) | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         { | ||||
|             "level_0": np.dtype("int64"), | ||||
|             "level_1": exp, | ||||
|             "level_2": DatetimeTZDtype(tz="utc"), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_series_equal(expected, idx_multitype.dtypes) | ||||
|  | ||||
|  | ||||
| def test_get_dtypes_duplicate_level_names(using_infer_string): | ||||
|     # Test MultiIndex.dtypes with non-unique level names (# GH45174) | ||||
|     result = MultiIndex.from_product( | ||||
|         [ | ||||
|             [1, 2, 3], | ||||
|             ["a", "b", "c"], | ||||
|             pd.date_range("20200101", periods=2, tz="UTC"), | ||||
|         ], | ||||
|         names=["A", "A", "A"], | ||||
|     ).dtypes | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         [np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")], | ||||
|         index=["A", "A", "A"], | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): | ||||
|     frame = multiindex_dataframe_random_data | ||||
|  | ||||
|     with pytest.raises(IndexError, match="Too many levels"): | ||||
|         frame.index._get_level_number(2) | ||||
|     with pytest.raises(IndexError, match="not a valid level number"): | ||||
|         frame.index._get_level_number(-3) | ||||
|  | ||||
|  | ||||
| def test_set_name_methods(idx): | ||||
|     # so long as these are synonyms, we don't need to test set_names | ||||
|     index_names = ["first", "second"] | ||||
|     assert idx.rename == idx.set_names | ||||
|     new_names = [name + "SUFFIX" for name in index_names] | ||||
|     ind = idx.set_names(new_names) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == new_names | ||||
|     msg = "Length of names must match number of levels in MultiIndex" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ind.set_names(new_names + new_names) | ||||
|     new_names2 = [name + "SUFFIX2" for name in new_names] | ||||
|     res = ind.set_names(new_names2, inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == new_names2 | ||||
|  | ||||
|     # set names for specific level (# GH7792) | ||||
|     ind = idx.set_names(new_names[0], level=0) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == [new_names[0], index_names[1]] | ||||
|  | ||||
|     res = ind.set_names(new_names2[0], level=0, inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == [new_names2[0], index_names[1]] | ||||
|  | ||||
|     # set names for multiple levels | ||||
|     ind = idx.set_names(new_names, level=[0, 1]) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == new_names | ||||
|  | ||||
|     res = ind.set_names(new_names2, level=[0, 1], inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == new_names2 | ||||
|  | ||||
|  | ||||
| def test_set_levels_codes_directly(idx): | ||||
|     # setting levels/codes directly raises AttributeError | ||||
|  | ||||
|     levels = idx.levels | ||||
|     new_levels = [[lev + "a" for lev in level] for level in levels] | ||||
|  | ||||
|     codes = idx.codes | ||||
|     major_codes, minor_codes = codes | ||||
|     major_codes = [(x + 1) % 3 for x in major_codes] | ||||
|     minor_codes = [(x + 1) % 1 for x in minor_codes] | ||||
|     new_codes = [major_codes, minor_codes] | ||||
|  | ||||
|     msg = "Can't set attribute" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         idx.levels = new_levels | ||||
|  | ||||
|     msg = ( | ||||
|         "property 'codes' of 'MultiIndex' object has no setter" | ||||
|         if PY311 | ||||
|         else "can't set attribute" | ||||
|     ) | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         idx.codes = new_codes | ||||
|  | ||||
|  | ||||
| def test_set_levels(idx): | ||||
|     # side note - you probably wouldn't want to use levels and codes | ||||
|     # directly like this - but it is possible. | ||||
|     levels = idx.levels | ||||
|     new_levels = [[lev + "a" for lev in level] for level in levels] | ||||
|  | ||||
|     # level changing [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels) | ||||
|     assert_matching(ind2.levels, new_levels) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # level changing specific level [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels[0], level=0) | ||||
|     assert_matching(ind2.levels, [new_levels[0], levels[1]]) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     ind2 = idx.set_levels(new_levels[1], level=1) | ||||
|     assert_matching(ind2.levels, [levels[0], new_levels[1]]) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # level changing multiple levels [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels, level=[0, 1]) | ||||
|     assert_matching(ind2.levels, new_levels) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # illegal level changing should not change levels | ||||
|     # GH 13754 | ||||
|     original_index = idx.copy() | ||||
|     with pytest.raises(ValueError, match="^On"): | ||||
|         idx.set_levels(["c"], level=0) | ||||
|     assert_matching(idx.levels, original_index.levels, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="^On"): | ||||
|         idx.set_codes([0, 1, 2, 3, 4, 5], level=0) | ||||
|     assert_matching(idx.codes, original_index.codes, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Levels"): | ||||
|         idx.set_levels("c", level=0) | ||||
|     assert_matching(idx.levels, original_index.levels, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Codes"): | ||||
|         idx.set_codes(1, level=0) | ||||
|     assert_matching(idx.codes, original_index.codes, check_dtype=True) | ||||
|  | ||||
|  | ||||
| def test_set_codes(idx): | ||||
|     # side note - you probably wouldn't want to use levels and codes | ||||
|     # directly like this - but it is possible. | ||||
|     codes = idx.codes | ||||
|     major_codes, minor_codes = codes | ||||
|     major_codes = [(x + 1) % 3 for x in major_codes] | ||||
|     minor_codes = [(x + 1) % 1 for x in minor_codes] | ||||
|     new_codes = [major_codes, minor_codes] | ||||
|  | ||||
|     # changing codes w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes) | ||||
|     assert_matching(ind2.codes, new_codes) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # codes changing specific level w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes[0], level=0) | ||||
|     assert_matching(ind2.codes, [new_codes[0], codes[1]]) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     ind2 = idx.set_codes(new_codes[1], level=1) | ||||
|     assert_matching(ind2.codes, [codes[0], new_codes[1]]) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # codes changing multiple levels w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes, level=[0, 1]) | ||||
|     assert_matching(ind2.codes, new_codes) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # label changing for levels of different magnitude of categories | ||||
|     ind = MultiIndex.from_tuples([(0, i) for i in range(130)]) | ||||
|     new_codes = range(129, -1, -1) | ||||
|     expected = MultiIndex.from_tuples([(0, i) for i in new_codes]) | ||||
|  | ||||
|     # [w/o mutation] | ||||
|     result = ind.set_codes(codes=new_codes, level=1) | ||||
|     assert result.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_set_levels_codes_names_bad_input(idx): | ||||
|     levels, codes = idx.levels, idx.codes | ||||
|     names = idx.names | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of levels"): | ||||
|         idx.set_levels([levels[0]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of codes"): | ||||
|         idx.set_codes([codes[0]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of names"): | ||||
|         idx.set_names([names[0]]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_levels(levels[0]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_codes(codes[0]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_names(names[0]) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_levels(levels[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_levels(levels, level=0) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_codes(codes[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_codes(codes, level=0) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(ValueError, match="Length of names"): | ||||
|         idx.set_names(names[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="Names must be a"): | ||||
|         idx.set_names(names, level=0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("inplace", [True, False]) | ||||
| def test_set_names_with_nlevel_1(inplace): | ||||
|     # GH 21149 | ||||
|     # Ensure that .set_names for MultiIndex with | ||||
|     # nlevels == 1 does not raise any errors | ||||
|     expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"]) | ||||
|     m = MultiIndex.from_product([[0, 1]]) | ||||
|     result = m.set_names("first", level=0, inplace=inplace) | ||||
|  | ||||
|     if inplace: | ||||
|         result = m | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| def test_set_levels_categorical(ordered): | ||||
|     # GH13854 | ||||
|     index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) | ||||
|  | ||||
|     cidx = CategoricalIndex(list("bac"), ordered=ordered) | ||||
|     result = index.set_levels(cidx, level=0) | ||||
|     expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result_lvl = result.get_level_values(0) | ||||
|     expected_lvl = CategoricalIndex( | ||||
|         list("bacb"), categories=cidx.categories, ordered=cidx.ordered | ||||
|     ) | ||||
|     tm.assert_index_equal(result_lvl, expected_lvl) | ||||
|  | ||||
|  | ||||
| def test_set_value_keeps_names(): | ||||
|     # motivating example from #3742 | ||||
|     lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"] | ||||
|     lev2 = ["1", "2", "3"] * 2 | ||||
|     idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) | ||||
|     df = pd.DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((6, 4)), | ||||
|         columns=["one", "two", "three", "four"], | ||||
|         index=idx, | ||||
|     ) | ||||
|     df = df.sort_index() | ||||
|     assert df._is_copy is None | ||||
|     assert df.index.names == ("Name", "Number") | ||||
|     df.at[("grethe", "4"), "one"] = 99.34 | ||||
|     assert df._is_copy is None | ||||
|     assert df.index.names == ("Name", "Number") | ||||
|  | ||||
|  | ||||
| def test_set_levels_with_iterable(): | ||||
|     # GH23273 | ||||
|     sizes = [1, 2, 3] | ||||
|     colors = ["black"] * 3 | ||||
|     index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"]) | ||||
|  | ||||
|     result = index.set_levels(map(int, ["3", "2", "1"]), level="size") | ||||
|  | ||||
|     expected_sizes = [3, 2, 1] | ||||
|     expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_set_empty_level(): | ||||
|     # GH#48636 | ||||
|     midx = MultiIndex.from_arrays([[]], names=["A"]) | ||||
|     result = midx.set_levels(pd.DatetimeIndex([]), level=0) | ||||
|     expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_set_levels_pos_args_removal(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/41485 | ||||
|     idx = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (1, "one"), | ||||
|             (3, "one"), | ||||
|         ], | ||||
|         names=["foo", "bar"], | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match="positional arguments"): | ||||
|         idx.set_levels(["a", "b", "c"], 0) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="positional arguments"): | ||||
|         idx.set_codes([[0, 1], [1, 0]], 0) | ||||
|  | ||||
|  | ||||
| def test_set_levels_categorical_keep_dtype(): | ||||
|     # GH#52125 | ||||
|     midx = MultiIndex.from_arrays([[5, 6]]) | ||||
|     result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0) | ||||
|     expected = MultiIndex.from_arrays([pd.Categorical([1, 2])]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,289 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_labels_dtypes(): | ||||
|     # GH 8456 | ||||
|     i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) | ||||
|     assert i.codes[0].dtype == "int8" | ||||
|     assert i.codes[1].dtype == "int8" | ||||
|  | ||||
|     i = MultiIndex.from_product([["a"], range(40)]) | ||||
|     assert i.codes[1].dtype == "int8" | ||||
|     i = MultiIndex.from_product([["a"], range(400)]) | ||||
|     assert i.codes[1].dtype == "int16" | ||||
|     i = MultiIndex.from_product([["a"], range(40000)]) | ||||
|     assert i.codes[1].dtype == "int32" | ||||
|  | ||||
|     i = MultiIndex.from_product([["a"], range(1000)]) | ||||
|     assert (i.codes[0] >= 0).all() | ||||
|     assert (i.codes[1] >= 0).all() | ||||
|  | ||||
|  | ||||
| def test_values_boxed(): | ||||
|     tuples = [ | ||||
|         (1, pd.Timestamp("2000-01-01")), | ||||
|         (2, pd.NaT), | ||||
|         (3, pd.Timestamp("2000-01-03")), | ||||
|         (1, pd.Timestamp("2000-01-04")), | ||||
|         (2, pd.Timestamp("2000-01-02")), | ||||
|         (3, pd.Timestamp("2000-01-03")), | ||||
|     ] | ||||
|     result = MultiIndex.from_tuples(tuples) | ||||
|     expected = construct_1d_object_array_from_listlike(tuples) | ||||
|     tm.assert_numpy_array_equal(result.values, expected) | ||||
|     # Check that code branches for boxed values produce identical results | ||||
|     tm.assert_numpy_array_equal(result.values[:4], result[:4].values) | ||||
|  | ||||
|  | ||||
| def test_values_multiindex_datetimeindex(): | ||||
|     # Test to ensure we hit the boxing / nobox part of MI.values | ||||
|     ints = np.arange(10**18, 10**18 + 5) | ||||
|     naive = pd.DatetimeIndex(ints) | ||||
|  | ||||
|     aware = pd.DatetimeIndex(ints, tz="US/Central") | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([naive, aware]) | ||||
|     result = idx.values | ||||
|  | ||||
|     outer = pd.DatetimeIndex([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, naive) | ||||
|  | ||||
|     inner = pd.DatetimeIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, aware) | ||||
|  | ||||
|     # n_lev > n_lab | ||||
|     result = idx[:2].values | ||||
|  | ||||
|     outer = pd.DatetimeIndex([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, naive[:2]) | ||||
|  | ||||
|     inner = pd.DatetimeIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, aware[:2]) | ||||
|  | ||||
|  | ||||
| def test_values_multiindex_periodindex(): | ||||
|     # Test to ensure we hit the boxing / nobox part of MI.values | ||||
|     ints = np.arange(2007, 2012) | ||||
|     pidx = pd.PeriodIndex(ints, freq="D") | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([ints, pidx]) | ||||
|     result = idx.values | ||||
|  | ||||
|     outer = Index([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, Index(ints, dtype=np.int64)) | ||||
|  | ||||
|     inner = pd.PeriodIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, pidx) | ||||
|  | ||||
|     # n_lev > n_lab | ||||
|     result = idx[:2].values | ||||
|  | ||||
|     outer = Index([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64)) | ||||
|  | ||||
|     inner = pd.PeriodIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, pidx[:2]) | ||||
|  | ||||
|  | ||||
| def test_consistency(): | ||||
|     # need to construct an overflow | ||||
|     major_axis = list(range(70000)) | ||||
|     minor_axis = list(range(10)) | ||||
|  | ||||
|     major_codes = np.arange(70000) | ||||
|     minor_codes = np.repeat(range(10), 7000) | ||||
|  | ||||
|     # the fact that is works means it's consistent | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|  | ||||
|     # inconsistent | ||||
|     major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|  | ||||
|     assert index.is_unique is False | ||||
|  | ||||
|  | ||||
| @pytest.mark.slow | ||||
| def test_hash_collisions(monkeypatch): | ||||
|     # non-smoke test that we don't get hash collisions | ||||
|     size_cutoff = 50 | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|         index = MultiIndex.from_product( | ||||
|             [np.arange(8), np.arange(8)], names=["one", "two"] | ||||
|         ) | ||||
|         result = index.get_indexer(index.values) | ||||
|         tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) | ||||
|  | ||||
|         for i in [0, 1, len(index) - 2, len(index) - 1]: | ||||
|             result = index.get_loc(index[i]) | ||||
|             assert result == i | ||||
|  | ||||
|  | ||||
| def test_dims(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| def test_take_invalid_kwargs(): | ||||
|     vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] | ||||
|     idx = MultiIndex.from_product(vals, names=["str", "dt"]) | ||||
|     indices = [1, 2] | ||||
|  | ||||
|     msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.take(indices, foo=2) | ||||
|  | ||||
|     msg = "the 'out' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, out=indices) | ||||
|  | ||||
|     msg = "the 'mode' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, mode="clip") | ||||
|  | ||||
|  | ||||
| def test_isna_behavior(idx): | ||||
|     # should not segfault GH5123 | ||||
|     # NOTE: if MI representation changes, may make sense to allow | ||||
|     # isna(MI) | ||||
|     msg = "isna is not defined for MultiIndex" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         pd.isna(idx) | ||||
|  | ||||
|  | ||||
| def test_large_multiindex_error(monkeypatch): | ||||
|     # GH12527 | ||||
|     size_cutoff = 50 | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|         df_below_cutoff = pd.DataFrame( | ||||
|             1, | ||||
|             index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]), | ||||
|             columns=["dest"], | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): | ||||
|             df_below_cutoff.loc[(-1, 0), "dest"] | ||||
|         with pytest.raises(KeyError, match=r"^\(3, 0\)$"): | ||||
|             df_below_cutoff.loc[(3, 0), "dest"] | ||||
|         df_above_cutoff = pd.DataFrame( | ||||
|             1, | ||||
|             index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]), | ||||
|             columns=["dest"], | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): | ||||
|             df_above_cutoff.loc[(-1, 0), "dest"] | ||||
|         with pytest.raises(KeyError, match=r"^\(3, 0\)$"): | ||||
|             df_above_cutoff.loc[(3, 0), "dest"] | ||||
|  | ||||
|  | ||||
| def test_mi_hashtable_populated_attribute_error(monkeypatch): | ||||
|     # GH 18165 | ||||
|     monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) | ||||
|     r = range(50) | ||||
|     df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r])) | ||||
|  | ||||
|     msg = "'Series' object has no attribute 'foo'" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         df["a"].foo() | ||||
|  | ||||
|  | ||||
| def test_can_hold_identifiers(idx): | ||||
|     key = idx[0] | ||||
|     assert idx._can_hold_identifiers_and_holds_name(key) is True | ||||
|  | ||||
|  | ||||
| def test_metadata_immutable(idx): | ||||
|     levels, codes = idx.levels, idx.codes | ||||
|     # shouldn't be able to set at either the top level or base level | ||||
|     mutable_regex = re.compile("does not support mutable operations") | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         levels[0] = levels[0] | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         levels[0][0] = levels[0][0] | ||||
|     # ditto for labels | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         codes[0] = codes[0] | ||||
|     with pytest.raises(ValueError, match="assignment destination is read-only"): | ||||
|         codes[0][0] = codes[0][0] | ||||
|     # and for names | ||||
|     names = idx.names | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         names[0] = names[0] | ||||
|  | ||||
|  | ||||
| def test_level_setting_resets_attributes(): | ||||
|     ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) | ||||
|     assert ind.is_monotonic_increasing | ||||
|     ind = ind.set_levels([["A", "B"], [1, 3, 2]]) | ||||
|     # if this fails, probably didn't reset the cache correctly. | ||||
|     assert not ind.is_monotonic_increasing | ||||
|  | ||||
|  | ||||
| def test_rangeindex_fallback_coercion_bug(): | ||||
|     # GH 12893 | ||||
|     df1 = pd.DataFrame(np.arange(100).reshape((10, 10))) | ||||
|     df2 = pd.DataFrame(np.arange(100).reshape((10, 10))) | ||||
|     df = pd.concat( | ||||
|         {"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)}, | ||||
|         axis=1, | ||||
|     ) | ||||
|     df.index.names = ["fizz", "buzz"] | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"df2": np.arange(100), "df1": np.arange(100)}, | ||||
|         index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(df, expected, check_like=True) | ||||
|  | ||||
|     result = df.index.get_level_values("fizz") | ||||
|     expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = df.index.get_level_values("buzz") | ||||
|     expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_memory_usage(idx): | ||||
|     result = idx.memory_usage() | ||||
|     if len(idx): | ||||
|         idx.get_loc(idx[0]) | ||||
|         result2 = idx.memory_usage() | ||||
|         result3 = idx.memory_usage(deep=True) | ||||
|  | ||||
|         # RangeIndex, IntervalIndex | ||||
|         # don't have engines | ||||
|         if not isinstance(idx, (RangeIndex, IntervalIndex)): | ||||
|             assert result2 > result | ||||
|  | ||||
|         if idx.inferred_type == "object": | ||||
|             assert result3 > result2 | ||||
|  | ||||
|     else: | ||||
|         # we report 0 for no-length | ||||
|         assert result == 0 | ||||
|  | ||||
|  | ||||
| def test_nlevels(idx): | ||||
|     assert idx.nlevels == 2 | ||||
| @ -0,0 +1,103 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_isin_nan(): | ||||
|     idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) | ||||
|     tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True])) | ||||
|     tm.assert_numpy_array_equal( | ||||
|         idx.isin([("bar", float("nan"))]), np.array([False, True]) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_isin_missing(nulls_fixture): | ||||
|     # GH48905 | ||||
|     mi1 = MultiIndex.from_tuples([(1, nulls_fixture)]) | ||||
|     mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)]) | ||||
|     result = mi2.isin(mi1) | ||||
|     expected = np.array([False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin(): | ||||
|     values = [("foo", 2), ("bar", 3), ("quux", 4)] | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) | ||||
|     result = idx.isin(values) | ||||
|     expected = np.array([False, False, True, True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # empty, return dtype bool | ||||
|     idx = MultiIndex.from_arrays([[], []]) | ||||
|     result = idx.isin(values) | ||||
|     assert len(result) == 0 | ||||
|     assert result.dtype == np.bool_ | ||||
|  | ||||
|  | ||||
| def test_isin_level_kwarg(): | ||||
|     idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) | ||||
|  | ||||
|     vals_0 = ["foo", "bar", "quux"] | ||||
|     vals_1 = [2, 3, 10] | ||||
|  | ||||
|     expected = np.array([False, False, True, True]) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) | ||||
|  | ||||
|     msg = "Too many levels: Index has only 2 levels, not 6" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.isin(vals_0, level=5) | ||||
|     msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.isin(vals_0, level=-5) | ||||
|  | ||||
|     with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): | ||||
|         idx.isin(vals_0, level=1.0) | ||||
|     with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): | ||||
|         idx.isin(vals_1, level=-1.0) | ||||
|     with pytest.raises(KeyError, match="'Level A not found'"): | ||||
|         idx.isin(vals_1, level="A") | ||||
|  | ||||
|     idx.names = ["A", "B"] | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A")) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B")) | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'Level C not found'"): | ||||
|         idx.isin(vals_1, level="C") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "labels,expected,level", | ||||
|     [ | ||||
|         ([("b", np.nan)], np.array([False, False, True]), None), | ||||
|         ([np.nan, "a"], np.array([True, True, False]), 0), | ||||
|         (["d", np.nan], np.array([False, True, True]), 1), | ||||
|     ], | ||||
| ) | ||||
| def test_isin_multi_index_with_missing_value(labels, expected, level): | ||||
|     # GH 19132 | ||||
|     midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) | ||||
|     result = midx.isin(labels, level=level) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_empty(): | ||||
|     # GH#51599 | ||||
|     midx = MultiIndex.from_arrays([[1, 2], [3, 4]]) | ||||
|     result = midx.isin([]) | ||||
|     expected = np.array([False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_generator(): | ||||
|     # GH#52568 | ||||
|     midx = MultiIndex.from_tuples([(1, 2)]) | ||||
|     result = midx.isin(x for x in [(1, 2)]) | ||||
|     expected = np.array([True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,268 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Interval, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     StringDtype, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] | ||||
| ) | ||||
| def test_join_level(idx, other, join_type): | ||||
|     join_index, lidx, ridx = other.join( | ||||
|         idx, how=join_type, level="second", return_indexers=True | ||||
|     ) | ||||
|  | ||||
|     exp_level = other.join(idx.levels[1], how=join_type) | ||||
|     assert join_index.levels[0].equals(idx.levels[0]) | ||||
|     assert join_index.levels[1].equals(exp_level) | ||||
|  | ||||
|     # pare down levels | ||||
|     mask = np.array([x[1] in exp_level for x in idx], dtype=bool) | ||||
|     exp_values = idx.values[mask] | ||||
|     tm.assert_numpy_array_equal(join_index.values, exp_values) | ||||
|  | ||||
|     if join_type in ("outer", "inner"): | ||||
|         join_index2, ridx2, lidx2 = idx.join( | ||||
|             other, how=join_type, level="second", return_indexers=True | ||||
|         ) | ||||
|  | ||||
|         assert join_index.equals(join_index2) | ||||
|         tm.assert_numpy_array_equal(lidx, lidx2) | ||||
|         tm.assert_numpy_array_equal(ridx, ridx2) | ||||
|         tm.assert_numpy_array_equal(join_index2.values, exp_values) | ||||
|  | ||||
|  | ||||
| def test_join_level_corner_case(idx): | ||||
|     # some corner cases | ||||
|     index = Index(["three", "one", "two"]) | ||||
|     result = index.join(idx, level="second") | ||||
|     assert isinstance(result, MultiIndex) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): | ||||
|         idx.join(idx, level=1) | ||||
|  | ||||
|  | ||||
| def test_join_self(idx, join_type): | ||||
|     result = idx.join(idx, how=join_type) | ||||
|     expected = idx | ||||
|     if join_type == "outer": | ||||
|         expected = expected.sort_values() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_multi(): | ||||
|     # GH 10665 | ||||
|     midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"]) | ||||
|     idx = Index([1, 2, 5], name="b") | ||||
|  | ||||
|     # inner | ||||
|     jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) | ||||
|     exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) | ||||
|     exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) | ||||
|     exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) | ||||
|     tm.assert_index_equal(jidx, exp_idx) | ||||
|     tm.assert_numpy_array_equal(lidx, exp_lidx) | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|     # flip | ||||
|     jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) | ||||
|     tm.assert_index_equal(jidx, exp_idx) | ||||
|     tm.assert_numpy_array_equal(lidx, exp_lidx) | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|  | ||||
|     # keep MultiIndex | ||||
|     jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) | ||||
|     exp_ridx = np.array( | ||||
|         [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp | ||||
|     ) | ||||
|     tm.assert_index_equal(jidx, midx) | ||||
|     assert lidx is None | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|     # flip | ||||
|     jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) | ||||
|     tm.assert_index_equal(jidx, midx) | ||||
|     assert lidx is None | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|  | ||||
|  | ||||
| def test_join_multi_wrong_order(): | ||||
|     # GH 25760 | ||||
|     # GH 28956 | ||||
|  | ||||
|     midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|     midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) | ||||
|  | ||||
|     join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True) | ||||
|  | ||||
|     exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) | ||||
|  | ||||
|     tm.assert_index_equal(midx1, join_idx) | ||||
|     assert lidx is None | ||||
|     tm.assert_numpy_array_equal(ridx, exp_ridx) | ||||
|  | ||||
|  | ||||
| def test_join_multi_return_indexers(): | ||||
|     # GH 34074 | ||||
|  | ||||
|     midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"]) | ||||
|     midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|  | ||||
|     result = midx1.join(midx2, return_indexers=False) | ||||
|     tm.assert_index_equal(result, midx1) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_interval_level(): | ||||
|     # GH 44096 | ||||
|     idx_1 = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (1, Interval(0.0, 1.0)), | ||||
|             (1, Interval(1.0, 2.0)), | ||||
|             (1, Interval(2.0, 5.0)), | ||||
|             (2, Interval(0.0, 1.0)), | ||||
|             (2, Interval(1.0, 3.0)),  # interval limit is here at 3.0, not at 2.0 | ||||
|             (2, Interval(3.0, 5.0)), | ||||
|         ], | ||||
|         names=["num", "interval"], | ||||
|     ) | ||||
|  | ||||
|     idx_2 = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (1, Interval(2.0, 5.0)), | ||||
|             (1, Interval(0.0, 1.0)), | ||||
|             (1, Interval(1.0, 2.0)), | ||||
|             (2, Interval(3.0, 5.0)), | ||||
|             (2, Interval(0.0, 1.0)), | ||||
|             (2, Interval(1.0, 3.0)), | ||||
|         ], | ||||
|         names=["num", "interval"], | ||||
|     ) | ||||
|  | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (1, Interval(0.0, 1.0)), | ||||
|             (1, Interval(1.0, 2.0)), | ||||
|             (1, Interval(2.0, 5.0)), | ||||
|             (2, Interval(0.0, 1.0)), | ||||
|             (2, Interval(1.0, 3.0)), | ||||
|             (2, Interval(3.0, 5.0)), | ||||
|         ], | ||||
|         names=["num", "interval"], | ||||
|     ) | ||||
|     result = idx_1.join(idx_2, how="outer") | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_midx_ea(): | ||||
|     # GH#49277 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")], | ||||
|         names=["a", "b"], | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"] | ||||
|     ) | ||||
|     result = midx.join(midx2, how="inner") | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([1, 1], dtype="Int64"), | ||||
|             Series([1, 2], dtype="Int64"), | ||||
|             Series([3, 3], dtype="Int64"), | ||||
|         ], | ||||
|         names=["a", "b", "c"], | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_midx_string(): | ||||
|     # GH#49277 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series(["a", "a", "c"], dtype=StringDtype()), | ||||
|             Series(["a", "b", "c"], dtype=StringDtype()), | ||||
|         ], | ||||
|         names=["a", "b"], | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())], | ||||
|         names=["a", "c"], | ||||
|     ) | ||||
|     result = midx.join(midx2, how="inner") | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series(["a", "a"], dtype=StringDtype()), | ||||
|             Series(["a", "b"], dtype=StringDtype()), | ||||
|             Series(["c", "c"], dtype=StringDtype()), | ||||
|         ], | ||||
|         names=["a", "b", "c"], | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_multi_with_nan(): | ||||
|     # GH29252 | ||||
|     df1 = DataFrame( | ||||
|         data={"col1": [1.1, 1.2]}, | ||||
|         index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]), | ||||
|     ) | ||||
|     df2 = DataFrame( | ||||
|         data={"col2": [2.1, 2.2]}, | ||||
|         index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]), | ||||
|     ) | ||||
|     result = df1.join(df2) | ||||
|     expected = DataFrame( | ||||
|         data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]}, | ||||
|         index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [0, 5]) | ||||
| def test_join_dtypes(any_numeric_ea_dtype, val): | ||||
|     # GH#49830 | ||||
|     midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]]) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]] | ||||
|     ) | ||||
|     result = midx.join(midx2, how="outer") | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]] | ||||
|     ).sort_values() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_dtypes_all_nan(any_numeric_ea_dtype): | ||||
|     # GH#49830 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]] | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]] | ||||
|     ) | ||||
|     result = midx.join(midx2, how="outer") | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype), | ||||
|             [np.nan, np.nan, np.nan, np.nan], | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_join_index_levels(): | ||||
|     # GH#53093 | ||||
|     midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")]) | ||||
|     midx2 = MultiIndex.from_tuples([("a", "2019-01-31")]) | ||||
|     result = midx.join(midx2, how="outer") | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")] | ||||
|     ) | ||||
|     tm.assert_index_equal(result.levels[1], expected.levels[1]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,46 @@ | ||||
| from pandas import MultiIndex | ||||
|  | ||||
|  | ||||
| class TestIsLexsorted: | ||||
|     def test_is_lexsorted(self): | ||||
|         levels = [[0, 1], [0, 1, 2]] | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] | ||||
|         ) | ||||
|         assert index._is_lexsorted() | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] | ||||
|         ) | ||||
|         assert not index._is_lexsorted() | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] | ||||
|         ) | ||||
|         assert not index._is_lexsorted() | ||||
|         assert index._lexsort_depth == 0 | ||||
|  | ||||
|  | ||||
| class TestLexsortDepth: | ||||
|     def test_lexsort_depth(self): | ||||
|         # Test that lexsort_depth return the correct sortorder | ||||
|         # when it was given to the MultiIndex const. | ||||
|         # GH#28518 | ||||
|  | ||||
|         levels = [[0, 1], [0, 1, 2]] | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 | ||||
|         ) | ||||
|         assert index._lexsort_depth == 2 | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 | ||||
|         ) | ||||
|         assert index._lexsort_depth == 1 | ||||
|  | ||||
|         index = MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 | ||||
|         ) | ||||
|         assert index._lexsort_depth == 0 | ||||
| @ -0,0 +1,111 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_fillna(idx): | ||||
|     # GH 11343 | ||||
|     msg = "isna is not defined for MultiIndex" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.fillna(idx[0]) | ||||
|  | ||||
|  | ||||
| def test_dropna(): | ||||
|     # GH 6194 | ||||
|     idx = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, np.nan, 3, np.nan, 5], | ||||
|             [1, 2, np.nan, np.nan, 5], | ||||
|             ["a", "b", "c", np.nan, "e"], | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]]) | ||||
|     tm.assert_index_equal(idx.dropna(), exp) | ||||
|     tm.assert_index_equal(idx.dropna(how="any"), exp) | ||||
|  | ||||
|     exp = MultiIndex.from_arrays( | ||||
|         [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]] | ||||
|     ) | ||||
|     tm.assert_index_equal(idx.dropna(how="all"), exp) | ||||
|  | ||||
|     msg = "invalid how option: xxx" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.dropna(how="xxx") | ||||
|  | ||||
|     # GH26408 | ||||
|     # test if missing values are dropped for multiindex constructed | ||||
|     # from codes and values | ||||
|     idx = MultiIndex( | ||||
|         levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], | ||||
|         codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]], | ||||
|     ) | ||||
|     expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) | ||||
|     tm.assert_index_equal(idx.dropna(), expected) | ||||
|     tm.assert_index_equal(idx.dropna(how="any"), expected) | ||||
|  | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]] | ||||
|     ) | ||||
|     tm.assert_index_equal(idx.dropna(how="all"), expected) | ||||
|  | ||||
|  | ||||
| def test_nulls(idx): | ||||
|     # this is really a smoke test for the methods | ||||
|     # as these are adequately tested for function elsewhere | ||||
|  | ||||
|     msg = "isna is not defined for MultiIndex" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.isna() | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(reason="isna is not defined for MultiIndex") | ||||
| def test_hasnans_isnans(idx): | ||||
|     # GH 11343, added tests for hasnans / isnans | ||||
|     index = idx.copy() | ||||
|  | ||||
|     # cases in indices doesn't include NaN | ||||
|     expected = np.array([False] * len(index), dtype=bool) | ||||
|     tm.assert_numpy_array_equal(index._isnan, expected) | ||||
|     assert index.hasnans is False | ||||
|  | ||||
|     index = idx.copy() | ||||
|     values = index.values | ||||
|     values[1] = np.nan | ||||
|  | ||||
|     index = type(idx)(values) | ||||
|  | ||||
|     expected = np.array([False] * len(index), dtype=bool) | ||||
|     expected[1] = True | ||||
|     tm.assert_numpy_array_equal(index._isnan, expected) | ||||
|     assert index.hasnans is True | ||||
|  | ||||
|  | ||||
| def test_nan_stays_float(): | ||||
|     # GH 7031 | ||||
|     idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1]) | ||||
|     idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1]) | ||||
|     idxm = idx0.join(idx1, how="outer") | ||||
|     assert pd.isna(idx0.get_level_values(1)).all() | ||||
|     # the following failed in 0.14.1 | ||||
|     assert pd.isna(idxm.get_level_values(1)[:-1]).all() | ||||
|  | ||||
|     df0 = pd.DataFrame([[1, 2]], index=idx0) | ||||
|     df1 = pd.DataFrame([[3, 4]], index=idx1) | ||||
|     dfm = df0 - df1 | ||||
|     assert pd.isna(df0.index.get_level_values(1)).all() | ||||
|     # the following failed in 0.14.1 | ||||
|     assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() | ||||
|  | ||||
|  | ||||
| def test_tuples_have_na(): | ||||
|     index = MultiIndex( | ||||
|         levels=[[1, 0], [0, 1, 2, 3]], | ||||
|         codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], | ||||
|     ) | ||||
|  | ||||
|     assert pd.isna(index[4][0]) | ||||
|     assert pd.isna(index.values[4][0]) | ||||
| @ -0,0 +1,188 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex): | ||||
|     # string ordering | ||||
|     mi = lexsorted_two_level_string_multiindex | ||||
|     assert mi.is_monotonic_increasing is False | ||||
|     assert Index(mi.values).is_monotonic_increasing is False | ||||
|     assert mi._is_strictly_monotonic_increasing is False | ||||
|     assert Index(mi.values)._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|  | ||||
| def test_is_monotonic_increasing(): | ||||
|     i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"]) | ||||
|     assert i.is_monotonic_increasing is True | ||||
|     assert i._is_strictly_monotonic_increasing is True | ||||
|     assert Index(i.values).is_monotonic_increasing is True | ||||
|     assert i._is_strictly_monotonic_increasing is True | ||||
|  | ||||
|     i = MultiIndex.from_product( | ||||
|         [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] | ||||
|     ) | ||||
|     assert i.is_monotonic_increasing is False | ||||
|     assert i._is_strictly_monotonic_increasing is False | ||||
|     assert Index(i.values).is_monotonic_increasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|     i = MultiIndex.from_product( | ||||
|         [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] | ||||
|     ) | ||||
|     assert i.is_monotonic_increasing is False | ||||
|     assert i._is_strictly_monotonic_increasing is False | ||||
|     assert Index(i.values).is_monotonic_increasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|     i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]]) | ||||
|     assert i.is_monotonic_increasing is False | ||||
|     assert i._is_strictly_monotonic_increasing is False | ||||
|     assert Index(i.values).is_monotonic_increasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|     i = MultiIndex( | ||||
|         levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]], | ||||
|         codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     assert i.is_monotonic_increasing is True | ||||
|     assert Index(i.values).is_monotonic_increasing is True | ||||
|     assert i._is_strictly_monotonic_increasing is True | ||||
|     assert Index(i.values)._is_strictly_monotonic_increasing is True | ||||
|  | ||||
|     # mixed levels, hits the TypeError | ||||
|     i = MultiIndex( | ||||
|         levels=[ | ||||
|             [1, 2, 3, 4], | ||||
|             [ | ||||
|                 "gb00b03mlx29", | ||||
|                 "lu0197800237", | ||||
|                 "nl0000289783", | ||||
|                 "nl0000289965", | ||||
|                 "nl0000301109", | ||||
|             ], | ||||
|         ], | ||||
|         codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], | ||||
|         names=["household_id", "asset_id"], | ||||
|     ) | ||||
|  | ||||
|     assert i.is_monotonic_increasing is False | ||||
|     assert i._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|     # empty | ||||
|     i = MultiIndex.from_arrays([[], []]) | ||||
|     assert i.is_monotonic_increasing is True | ||||
|     assert Index(i.values).is_monotonic_increasing is True | ||||
|     assert i._is_strictly_monotonic_increasing is True | ||||
|     assert Index(i.values)._is_strictly_monotonic_increasing is True | ||||
|  | ||||
|  | ||||
| def test_is_monotonic_decreasing(): | ||||
|     i = MultiIndex.from_product( | ||||
|         [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] | ||||
|     ) | ||||
|     assert i.is_monotonic_decreasing is True | ||||
|     assert i._is_strictly_monotonic_decreasing is True | ||||
|     assert Index(i.values).is_monotonic_decreasing is True | ||||
|     assert i._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|     i = MultiIndex.from_product( | ||||
|         [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] | ||||
|     ) | ||||
|     assert i.is_monotonic_decreasing is False | ||||
|     assert i._is_strictly_monotonic_decreasing is False | ||||
|     assert Index(i.values).is_monotonic_decreasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|     i = MultiIndex.from_product( | ||||
|         [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] | ||||
|     ) | ||||
|     assert i.is_monotonic_decreasing is False | ||||
|     assert i._is_strictly_monotonic_decreasing is False | ||||
|     assert Index(i.values).is_monotonic_decreasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|     i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) | ||||
|     assert i.is_monotonic_decreasing is False | ||||
|     assert i._is_strictly_monotonic_decreasing is False | ||||
|     assert Index(i.values).is_monotonic_decreasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|     # string ordering | ||||
|     i = MultiIndex( | ||||
|         levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], | ||||
|         codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     assert i.is_monotonic_decreasing is False | ||||
|     assert Index(i.values).is_monotonic_decreasing is False | ||||
|     assert i._is_strictly_monotonic_decreasing is False | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|     i = MultiIndex( | ||||
|         levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], | ||||
|         codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     assert i.is_monotonic_decreasing is True | ||||
|     assert Index(i.values).is_monotonic_decreasing is True | ||||
|     assert i._is_strictly_monotonic_decreasing is True | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|     # mixed levels, hits the TypeError | ||||
|     i = MultiIndex( | ||||
|         levels=[ | ||||
|             [4, 3, 2, 1], | ||||
|             [ | ||||
|                 "nl0000301109", | ||||
|                 "nl0000289965", | ||||
|                 "nl0000289783", | ||||
|                 "lu0197800237", | ||||
|                 "gb00b03mlx29", | ||||
|             ], | ||||
|         ], | ||||
|         codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], | ||||
|         names=["household_id", "asset_id"], | ||||
|     ) | ||||
|  | ||||
|     assert i.is_monotonic_decreasing is False | ||||
|     assert i._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|     # empty | ||||
|     i = MultiIndex.from_arrays([[], []]) | ||||
|     assert i.is_monotonic_decreasing is True | ||||
|     assert Index(i.values).is_monotonic_decreasing is True | ||||
|     assert i._is_strictly_monotonic_decreasing is True | ||||
|     assert Index(i.values)._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|  | ||||
| def test_is_strictly_monotonic_increasing(): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] | ||||
|     ) | ||||
|     assert idx.is_monotonic_increasing is True | ||||
|     assert idx._is_strictly_monotonic_increasing is False | ||||
|  | ||||
|  | ||||
| def test_is_strictly_monotonic_decreasing(): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] | ||||
|     ) | ||||
|     assert idx.is_monotonic_decreasing is True | ||||
|     assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "values", | ||||
|     [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], | ||||
| ) | ||||
| def test_is_monotonic_with_nans(values, attr): | ||||
|     # GH: 37220 | ||||
|     idx = MultiIndex.from_tuples(values, names=["test"]) | ||||
|     assert getattr(idx, attr) is False | ||||
| @ -0,0 +1,201 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def check_level_names(index, names): | ||||
|     assert [level.name for level in index.levels] == list(names) | ||||
|  | ||||
|  | ||||
| def test_slice_keep_name(): | ||||
|     x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"]) | ||||
|     assert x[1:].names == x.names | ||||
|  | ||||
|  | ||||
| def test_index_name_retained(): | ||||
|     # GH9857 | ||||
|     result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}) | ||||
|     result = result.set_index("z") | ||||
|     result.loc[10] = [9, 10] | ||||
|     df_expected = pd.DataFrame( | ||||
|         {"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]} | ||||
|     ) | ||||
|     df_expected = df_expected.set_index("z") | ||||
|     tm.assert_frame_equal(result, df_expected) | ||||
|  | ||||
|  | ||||
| def test_changing_names(idx): | ||||
|     assert [level.name for level in idx.levels] == ["first", "second"] | ||||
|  | ||||
|     view = idx.view() | ||||
|     copy = idx.copy() | ||||
|     shallow_copy = idx._view() | ||||
|  | ||||
|     # changing names should not change level names on object | ||||
|     new_names = [name + "a" for name in idx.names] | ||||
|     idx.names = new_names | ||||
|     check_level_names(idx, ["firsta", "seconda"]) | ||||
|  | ||||
|     # and not on copies | ||||
|     check_level_names(view, ["first", "second"]) | ||||
|     check_level_names(copy, ["first", "second"]) | ||||
|     check_level_names(shallow_copy, ["first", "second"]) | ||||
|  | ||||
|     # and copies shouldn't change original | ||||
|     shallow_copy.names = [name + "c" for name in shallow_copy.names] | ||||
|     check_level_names(idx, ["firsta", "seconda"]) | ||||
|  | ||||
|  | ||||
| def test_take_preserve_name(idx): | ||||
|     taken = idx.take([3, 0, 1]) | ||||
|     assert taken.names == idx.names | ||||
|  | ||||
|  | ||||
| def test_copy_names(): | ||||
|     # Check that adding a "names" parameter to the copy is honored | ||||
|     # GH14302 | ||||
|     multi_idx = MultiIndex.from_tuples([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) | ||||
|     multi_idx1 = multi_idx.copy() | ||||
|  | ||||
|     assert multi_idx.equals(multi_idx1) | ||||
|     assert multi_idx.names == ["MyName1", "MyName2"] | ||||
|     assert multi_idx1.names == ["MyName1", "MyName2"] | ||||
|  | ||||
|     multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) | ||||
|  | ||||
|     assert multi_idx.equals(multi_idx2) | ||||
|     assert multi_idx.names == ["MyName1", "MyName2"] | ||||
|     assert multi_idx2.names == ["NewName1", "NewName2"] | ||||
|  | ||||
|     multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) | ||||
|  | ||||
|     assert multi_idx.equals(multi_idx3) | ||||
|     assert multi_idx.names == ["MyName1", "MyName2"] | ||||
|     assert multi_idx3.names == ["NewName1", "NewName2"] | ||||
|  | ||||
|     # gh-35592 | ||||
|     with pytest.raises(ValueError, match="Length of new names must be 2, got 1"): | ||||
|         multi_idx.copy(names=["mario"]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"): | ||||
|         multi_idx.copy(names=[["mario"], ["luigi"]]) | ||||
|  | ||||
|  | ||||
| def test_names(idx): | ||||
|     # names are assigned in setup | ||||
|     assert idx.names == ["first", "second"] | ||||
|     level_names = [level.name for level in idx.levels] | ||||
|     assert level_names == idx.names | ||||
|  | ||||
|     # setting bad names on existing | ||||
|     index = idx | ||||
|     with pytest.raises(ValueError, match="^Length of names"): | ||||
|         setattr(index, "names", list(index.names) + ["third"]) | ||||
|     with pytest.raises(ValueError, match="^Length of names"): | ||||
|         setattr(index, "names", []) | ||||
|  | ||||
|     # initializing with bad names (should always be equivalent) | ||||
|     major_axis, minor_axis = idx.levels | ||||
|     major_codes, minor_codes = idx.codes | ||||
|     with pytest.raises(ValueError, match="^Length of names"): | ||||
|         MultiIndex( | ||||
|             levels=[major_axis, minor_axis], | ||||
|             codes=[major_codes, minor_codes], | ||||
|             names=["first"], | ||||
|         ) | ||||
|     with pytest.raises(ValueError, match="^Length of names"): | ||||
|         MultiIndex( | ||||
|             levels=[major_axis, minor_axis], | ||||
|             codes=[major_codes, minor_codes], | ||||
|             names=["first", "second", "third"], | ||||
|         ) | ||||
|  | ||||
|     # names are assigned on index, but not transferred to the levels | ||||
|     index.names = ["a", "b"] | ||||
|     level_names = [level.name for level in index.levels] | ||||
|     assert level_names == ["a", "b"] | ||||
|  | ||||
|  | ||||
| def test_duplicate_level_names_access_raises(idx): | ||||
|     # GH19029 | ||||
|     idx.names = ["foo", "foo"] | ||||
|     with pytest.raises(ValueError, match="name foo occurs multiple times"): | ||||
|         idx._get_level_number("foo") | ||||
|  | ||||
|  | ||||
| def test_get_names_from_levels(): | ||||
|     idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) | ||||
|  | ||||
|     assert idx.levels[0].name == "a" | ||||
|     assert idx.levels[1].name == "b" | ||||
|  | ||||
|  | ||||
| def test_setting_names_from_levels_raises(): | ||||
|     idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) | ||||
|     with pytest.raises(RuntimeError, match="set_names"): | ||||
|         idx.levels[0].name = "foo" | ||||
|  | ||||
|     with pytest.raises(RuntimeError, match="set_names"): | ||||
|         idx.levels[1].name = "foo" | ||||
|  | ||||
|     new = pd.Series(1, index=idx.levels[0]) | ||||
|     with pytest.raises(RuntimeError, match="set_names"): | ||||
|         new.index.name = "bar" | ||||
|  | ||||
|     assert pd.Index._no_setting_name is False | ||||
|     assert pd.RangeIndex._no_setting_name is False | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["rename", "set_names"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "rename_dict, exp_names", | ||||
|     [ | ||||
|         ({"x": "z"}, ["z", "y", "z"]), | ||||
|         ({"x": "z", "y": "x"}, ["z", "x", "z"]), | ||||
|         ({"y": "z"}, ["x", "z", "x"]), | ||||
|         ({}, ["x", "y", "x"]), | ||||
|         ({"z": "a"}, ["x", "y", "x"]), | ||||
|         ({"y": "z", "a": "b"}, ["x", "z", "x"]), | ||||
|     ], | ||||
| ) | ||||
| def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names): | ||||
|     # GH#20421 | ||||
|     mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"]) | ||||
|     result = getattr(mi, func)(rename_dict) | ||||
|     expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["rename", "set_names"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "rename_dict, exp_names", | ||||
|     [ | ||||
|         ({"x": "z"}, ["z", "y"]), | ||||
|         ({"x": "z", "y": "x"}, ["z", "x"]), | ||||
|         ({"a": "z"}, ["x", "y"]), | ||||
|         ({}, ["x", "y"]), | ||||
|     ], | ||||
| ) | ||||
| def test_name_mi_with_dict_like(func, rename_dict, exp_names): | ||||
|     # GH#20421 | ||||
|     mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) | ||||
|     result = getattr(mi, func)(rename_dict) | ||||
|     expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_index_name_with_dict_like_raising(): | ||||
|     # GH#20421 | ||||
|     ix = pd.Index([1, 2]) | ||||
|     msg = "Can only pass dict-like as `names` for MultiIndex." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ix.set_names({"x": "z"}) | ||||
|  | ||||
|  | ||||
| def test_multiindex_name_and_level_raising(): | ||||
|     # GH#20421 | ||||
|     mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) | ||||
|     with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."): | ||||
|         mi.set_names(names={"x": "z"}, level={"x": "z"}) | ||||
| @ -0,0 +1,148 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     IndexSlice, | ||||
|     MultiIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df(): | ||||
|     #                        c1 | ||||
|     # 2016-01-01 00:00:00 a   0 | ||||
|     #                     b   1 | ||||
|     #                     c   2 | ||||
|     # 2016-01-01 12:00:00 a   3 | ||||
|     #                     b   4 | ||||
|     #                     c   5 | ||||
|     # 2016-01-02 00:00:00 a   6 | ||||
|     #                     b   7 | ||||
|     #                     c   8 | ||||
|     # 2016-01-02 12:00:00 a   9 | ||||
|     #                     b  10 | ||||
|     #                     c  11 | ||||
|     # 2016-01-03 00:00:00 a  12 | ||||
|     #                     b  13 | ||||
|     #                     c  14 | ||||
|     dr = date_range("2016-01-01", "2016-01-03", freq="12h") | ||||
|     abc = ["a", "b", "c"] | ||||
|     mi = MultiIndex.from_product([dr, abc]) | ||||
|     frame = DataFrame({"c1": range(15)}, index=mi) | ||||
|     return frame | ||||
|  | ||||
|  | ||||
| def test_partial_string_matching_single_index(df): | ||||
|     # partial string matching on a single index | ||||
|     for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]: | ||||
|         df_swap = df_swap.sort_index() | ||||
|         just_a = df_swap.loc["a"] | ||||
|         result = just_a.loc["2016-01-01"] | ||||
|         expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2] | ||||
|         expected.index = expected.index.droplevel(1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_loc_partial_timestamp_multiindex(df): | ||||
|     mi = df.index | ||||
|     key = ("2016-01-01", "a") | ||||
|     loc = mi.get_loc(key) | ||||
|  | ||||
|     expected = np.zeros(len(mi), dtype=bool) | ||||
|     expected[[0, 3]] = True | ||||
|     tm.assert_numpy_array_equal(loc, expected) | ||||
|  | ||||
|     key2 = ("2016-01-02", "a") | ||||
|     loc2 = mi.get_loc(key2) | ||||
|     expected2 = np.zeros(len(mi), dtype=bool) | ||||
|     expected2[[6, 9]] = True | ||||
|     tm.assert_numpy_array_equal(loc2, expected2) | ||||
|  | ||||
|     key3 = ("2016-01", "a") | ||||
|     loc3 = mi.get_loc(key3) | ||||
|     expected3 = np.zeros(len(mi), dtype=bool) | ||||
|     expected3[mi.get_level_values(1).get_loc("a")] = True | ||||
|     tm.assert_numpy_array_equal(loc3, expected3) | ||||
|  | ||||
|     key4 = ("2016", "a") | ||||
|     loc4 = mi.get_loc(key4) | ||||
|     expected4 = expected3 | ||||
|     tm.assert_numpy_array_equal(loc4, expected4) | ||||
|  | ||||
|     # non-monotonic | ||||
|     taker = np.arange(len(mi), dtype=np.intp) | ||||
|     taker[::2] = taker[::-2] | ||||
|     mi2 = mi.take(taker) | ||||
|     loc5 = mi2.get_loc(key) | ||||
|     expected5 = np.zeros(len(mi2), dtype=bool) | ||||
|     expected5[[3, 14]] = True | ||||
|     tm.assert_numpy_array_equal(loc5, expected5) | ||||
|  | ||||
|  | ||||
| def test_partial_string_timestamp_multiindex(df): | ||||
|     # GH10331 | ||||
|     df_swap = df.swaplevel(0, 1).sort_index() | ||||
|     SLC = IndexSlice | ||||
|  | ||||
|     # indexing with IndexSlice | ||||
|     result = df.loc[SLC["2016-01-01":"2016-02-01", :], :] | ||||
|     expected = df | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # match on secondary index | ||||
|     result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :] | ||||
|     expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # partial string match on year only | ||||
|     result = df.loc["2016"] | ||||
|     expected = df | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # partial string match on date | ||||
|     result = df.loc["2016-01-01"] | ||||
|     expected = df.iloc[0:6] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # partial string match on date and hour, from middle | ||||
|     result = df.loc["2016-01-02 12"] | ||||
|     # hourly resolution, same as index.levels[0], so we are _not_ slicing on | ||||
|     #  that level, so that level gets dropped | ||||
|     expected = df.iloc[9:12].droplevel(0) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # partial string match on secondary index | ||||
|     result = df_swap.loc[SLC[:, "2016-01-02"], :] | ||||
|     expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # tuple selector with partial string match on date | ||||
|     # "2016-01-01" has daily resolution, so _is_ a slice on the first level. | ||||
|     result = df.loc[("2016-01-01", "a"), :] | ||||
|     expected = df.iloc[[0, 3]] | ||||
|     expected = df.iloc[[0, 3]].droplevel(1) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # Slicing date on first level should break (of course) bc the DTI is the | ||||
|     #  second level on df_swap | ||||
|     with pytest.raises(KeyError, match="'2016-01-01'"): | ||||
|         df_swap.loc["2016-01-01"] | ||||
|  | ||||
|  | ||||
| def test_partial_string_timestamp_multiindex_str_key_raises(df): | ||||
|     # Even though this syntax works on a single index, this is somewhat | ||||
|     # ambiguous and we don't want to extend this behavior forward to work | ||||
|     # in multi-indexes. This would amount to selecting a scalar from a | ||||
|     # column. | ||||
|     with pytest.raises(KeyError, match="'2016-01-01'"): | ||||
|         df["2016-01-01"] | ||||
|  | ||||
|  | ||||
| def test_partial_string_timestamp_multiindex_daily_resolution(df): | ||||
|     # GH12685 (partial string with daily resolution or below) | ||||
|     result = df.loc[IndexSlice["2013-03":"2013-03", :], :] | ||||
|     expected = df.iloc[118:180] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,10 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import MultiIndex | ||||
|  | ||||
|  | ||||
| def test_pickle_compat_construction(): | ||||
|     # this is testing for pickle compat | ||||
|     # need an object to create with | ||||
|     with pytest.raises(TypeError, match="Must pass both levels and codes"): | ||||
|         MultiIndex() | ||||
| @ -0,0 +1,174 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_reindex(idx): | ||||
|     result, indexer = idx.reindex(list(idx[:4])) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     assert result.names == ["first", "second"] | ||||
|     assert [level.name for level in result.levels] == ["first", "second"] | ||||
|  | ||||
|     result, indexer = idx.reindex(list(idx)) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     assert indexer is None | ||||
|     assert result.names == ["first", "second"] | ||||
|     assert [level.name for level in result.levels] == ["first", "second"] | ||||
|  | ||||
|  | ||||
| def test_reindex_level(idx): | ||||
|     index = Index(["one"]) | ||||
|  | ||||
|     target, indexer = idx.reindex(index, level="second") | ||||
|     target2, indexer2 = index.reindex(idx, level="second") | ||||
|  | ||||
|     exp_index = idx.join(index, level="second", how="right") | ||||
|     exp_index2 = idx.join(index, level="second", how="left") | ||||
|  | ||||
|     assert target.equals(exp_index) | ||||
|     exp_indexer = np.array([0, 2, 4]) | ||||
|     tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) | ||||
|  | ||||
|     assert target2.equals(exp_index2) | ||||
|     exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) | ||||
|     tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="Fill method not supported"): | ||||
|         idx.reindex(idx, method="pad", level="second") | ||||
|  | ||||
|  | ||||
| def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): | ||||
|     # GH6552 | ||||
|     idx = idx.copy() | ||||
|     target = idx.copy() | ||||
|     idx.names = target.names = [None, None] | ||||
|  | ||||
|     other_dtype = MultiIndex.from_product([[1, 2], [3, 4]]) | ||||
|  | ||||
|     # list & ndarray cases | ||||
|     assert idx.reindex([])[0].names == [None, None] | ||||
|     assert idx.reindex(np.array([]))[0].names == [None, None] | ||||
|     assert idx.reindex(target.tolist())[0].names == [None, None] | ||||
|     assert idx.reindex(target.values)[0].names == [None, None] | ||||
|     assert idx.reindex(other_dtype.tolist())[0].names == [None, None] | ||||
|     assert idx.reindex(other_dtype.values)[0].names == [None, None] | ||||
|  | ||||
|     idx.names = ["foo", "bar"] | ||||
|     assert idx.reindex([])[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex(target.values)[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] | ||||
|  | ||||
|  | ||||
| def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): | ||||
|     # GH7774 | ||||
|     idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) | ||||
|     assert idx.reindex([], level=0)[0].names == ["foo", "bar"] | ||||
|     assert idx.reindex([], level=1)[0].names == ["foo", "bar"] | ||||
|  | ||||
|  | ||||
| def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array( | ||||
|     using_infer_string, | ||||
| ): | ||||
|     # GH7774 | ||||
|     idx = MultiIndex.from_product([[0, 1], ["a", "b"]]) | ||||
|     assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 | ||||
|     exp = np.object_ if not using_infer_string else str | ||||
|     assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp | ||||
|  | ||||
|     # case with EA levels | ||||
|     cat = pd.Categorical(["foo", "bar"]) | ||||
|     dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific") | ||||
|     mi = MultiIndex.from_product([cat, dti]) | ||||
|     assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype | ||||
|     assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype | ||||
|  | ||||
|  | ||||
| def test_reindex_base(idx): | ||||
|     expected = np.arange(idx.size, dtype=np.intp) | ||||
|  | ||||
|     actual = idx.get_indexer(idx) | ||||
|     tm.assert_numpy_array_equal(expected, actual) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Invalid fill method"): | ||||
|         idx.get_indexer(idx, method="invalid") | ||||
|  | ||||
|  | ||||
| def test_reindex_non_unique(): | ||||
|     idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) | ||||
|     a = pd.Series(np.arange(4), index=idx) | ||||
|     new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) | ||||
|  | ||||
|     msg = "cannot handle a non-unique multi-index!" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         a.reindex(new_idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) | ||||
| def test_reindex_empty_with_level(values): | ||||
|     # GH41170 | ||||
|     idx = MultiIndex.from_arrays(values) | ||||
|     result, result_indexer = idx.reindex(np.array(["b"]), level=0) | ||||
|     expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) | ||||
|     expected_indexer = np.array([], dtype=result_indexer.dtype) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     tm.assert_numpy_array_equal(result_indexer, expected_indexer) | ||||
|  | ||||
|  | ||||
| def test_reindex_not_all_tuples(): | ||||
|     keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"] | ||||
|     mi = MultiIndex.from_tuples(keys[:-1]) | ||||
|     idx = Index(keys) | ||||
|     res, indexer = mi.reindex(idx) | ||||
|  | ||||
|     tm.assert_index_equal(res, idx) | ||||
|     expected = np.array([0, 1, 2, -1], dtype=np.intp) | ||||
|     tm.assert_numpy_array_equal(indexer, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_limit_arg_with_multiindex(): | ||||
|     # GH21247 | ||||
|  | ||||
|     idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")]) | ||||
|  | ||||
|     df = pd.Series([0.02, 0.01, 0.012], index=idx) | ||||
|  | ||||
|     new_idx = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (3, "A"), | ||||
|             (3, "B"), | ||||
|             (4, "A"), | ||||
|             (4, "B"), | ||||
|             (4, "C"), | ||||
|             (5, "B"), | ||||
|             (5, "C"), | ||||
|             (6, "B"), | ||||
|             (6, "C"), | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, | ||||
|         match="limit argument only valid if doing pad, backfill or nearest reindexing", | ||||
|     ): | ||||
|         df.reindex(new_idx, fill_value=0, limit=1) | ||||
|  | ||||
|  | ||||
| def test_reindex_with_none_in_nested_multiindex(): | ||||
|     # GH42883 | ||||
|     index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)]) | ||||
|     index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)]) | ||||
|     df1_dtype = pd.DataFrame([1, 2], index=index) | ||||
|     df2_dtype = pd.DataFrame([2, 1], index=index2) | ||||
|  | ||||
|     result = df1_dtype.reindex_like(df2_dtype) | ||||
|     expected = df2_dtype | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,224 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_insert(idx): | ||||
|     # key contained in all levels | ||||
|     new_index = idx.insert(0, ("bar", "two")) | ||||
|     assert new_index.equal_levels(idx) | ||||
|     assert new_index[0] == ("bar", "two") | ||||
|  | ||||
|     # key not contained in all levels | ||||
|     new_index = idx.insert(0, ("abc", "three")) | ||||
|  | ||||
|     exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") | ||||
|     tm.assert_index_equal(new_index.levels[0], exp0) | ||||
|     assert new_index.names == ["first", "second"] | ||||
|  | ||||
|     exp1 = Index(list(idx.levels[1]) + ["three"], name="second") | ||||
|     tm.assert_index_equal(new_index.levels[1], exp1) | ||||
|     assert new_index[0] == ("abc", "three") | ||||
|  | ||||
|     # key wrong length | ||||
|     msg = "Item must have length equal to number of levels" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.insert(0, ("foo2",)) | ||||
|  | ||||
|     left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) | ||||
|     left.set_index(["1st", "2nd"], inplace=True) | ||||
|     ts = left["3rd"].copy(deep=True) | ||||
|  | ||||
|     left.loc[("b", "x"), "3rd"] = 2 | ||||
|     left.loc[("b", "a"), "3rd"] = -1 | ||||
|     left.loc[("b", "b"), "3rd"] = 3 | ||||
|     left.loc[("a", "x"), "3rd"] = 4 | ||||
|     left.loc[("a", "w"), "3rd"] = 5 | ||||
|     left.loc[("a", "a"), "3rd"] = 6 | ||||
|  | ||||
|     ts.loc[("b", "x")] = 2 | ||||
|     ts.loc["b", "a"] = -1 | ||||
|     ts.loc[("b", "b")] = 3 | ||||
|     ts.loc["a", "x"] = 4 | ||||
|     ts.loc[("a", "w")] = 5 | ||||
|     ts.loc["a", "a"] = 6 | ||||
|  | ||||
|     right = pd.DataFrame( | ||||
|         [ | ||||
|             ["a", "b", 0], | ||||
|             ["b", "d", 1], | ||||
|             ["b", "x", 2], | ||||
|             ["b", "a", -1], | ||||
|             ["b", "b", 3], | ||||
|             ["a", "x", 4], | ||||
|             ["a", "w", 5], | ||||
|             ["a", "a", 6], | ||||
|         ], | ||||
|         columns=["1st", "2nd", "3rd"], | ||||
|     ) | ||||
|     right.set_index(["1st", "2nd"], inplace=True) | ||||
|     # FIXME data types changes to float because | ||||
|     # of intermediate nan insertion; | ||||
|     tm.assert_frame_equal(left, right, check_dtype=False) | ||||
|     tm.assert_series_equal(ts, right["3rd"]) | ||||
|  | ||||
|  | ||||
| def test_insert2(): | ||||
|     # GH9250 | ||||
|     idx = ( | ||||
|         [("test1", i) for i in range(5)] | ||||
|         + [("test2", i) for i in range(6)] | ||||
|         + [("test", 17), ("test", 18)] | ||||
|     ) | ||||
|  | ||||
|     left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2])) | ||||
|  | ||||
|     left.loc[("test", 17)] = 11 | ||||
|     left.loc[("test", 18)] = 12 | ||||
|  | ||||
|     right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx)) | ||||
|  | ||||
|     tm.assert_series_equal(left, right) | ||||
|  | ||||
|  | ||||
| def test_append(idx): | ||||
|     result = idx[:3].append(idx[3:]) | ||||
|     assert result.equals(idx) | ||||
|  | ||||
|     foos = [idx[:1], idx[1:3], idx[3:]] | ||||
|     result = foos[0].append(foos[1:]) | ||||
|     assert result.equals(idx) | ||||
|  | ||||
|     # empty | ||||
|     result = idx.append([]) | ||||
|     assert result.equals(idx) | ||||
|  | ||||
|  | ||||
| def test_append_index(): | ||||
|     idx1 = Index([1.1, 1.2, 1.3]) | ||||
|     idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") | ||||
|     idx3 = Index(["A", "B", "C"]) | ||||
|  | ||||
|     midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) | ||||
|     midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) | ||||
|  | ||||
|     result = idx1.append(midx_lv2) | ||||
|  | ||||
|     # see gh-7112 | ||||
|     tz = pytz.timezone("Asia/Tokyo") | ||||
|     expected_tuples = [ | ||||
|         (1.1, tz.localize(datetime(2011, 1, 1))), | ||||
|         (1.2, tz.localize(datetime(2011, 1, 2))), | ||||
|         (1.3, tz.localize(datetime(2011, 1, 3))), | ||||
|     ] | ||||
|     expected = Index([1.1, 1.2, 1.3] + expected_tuples) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = midx_lv2.append(idx1) | ||||
|     expected = Index(expected_tuples + [1.1, 1.2, 1.3]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = midx_lv2.append(midx_lv2) | ||||
|     expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = midx_lv2.append(midx_lv3) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = midx_lv3.append(midx_lv2) | ||||
|     expected = Index._simple_new( | ||||
|         np.array( | ||||
|             [ | ||||
|                 (1.1, tz.localize(datetime(2011, 1, 1)), "A"), | ||||
|                 (1.2, tz.localize(datetime(2011, 1, 2)), "B"), | ||||
|                 (1.3, tz.localize(datetime(2011, 1, 3)), "C"), | ||||
|             ] | ||||
|             + expected_tuples, | ||||
|             dtype=object, | ||||
|         ), | ||||
|         None, | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)]) | ||||
| def test_append_names_match(name, exp): | ||||
|     # GH#48288 | ||||
|     midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|     midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name]) | ||||
|     result = midx.append(midx2) | ||||
|     expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_append_names_dont_match(): | ||||
|     # GH#48288 | ||||
|     midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|     midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"]) | ||||
|     result = midx.append(midx2) | ||||
|     expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_append_overlapping_interval_levels(): | ||||
|     # GH 54934 | ||||
|     ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0]) | ||||
|     ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5]) | ||||
|     mi1 = MultiIndex.from_product([ivl1, ivl1]) | ||||
|     mi2 = MultiIndex.from_product([ivl2, ivl2]) | ||||
|     result = mi1.append(mi2) | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)), | ||||
|             (pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)), | ||||
|             (pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)), | ||||
|             (pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)), | ||||
|             (pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)), | ||||
|             (pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)), | ||||
|             (pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)), | ||||
|             (pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_repeat(): | ||||
|     reps = 2 | ||||
|     numbers = [1, 2, 3] | ||||
|     names = np.array(["foo", "bar"]) | ||||
|  | ||||
|     m = MultiIndex.from_product([numbers, names], names=names) | ||||
|     expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) | ||||
|     tm.assert_index_equal(m.repeat(reps), expected) | ||||
|  | ||||
|  | ||||
| def test_insert_base(idx): | ||||
|     result = idx[1:4] | ||||
|  | ||||
|     # test 0th element | ||||
|     assert idx[0:4].equals(result.insert(0, idx[0])) | ||||
|  | ||||
|  | ||||
| def test_delete_base(idx): | ||||
|     expected = idx[1:] | ||||
|     result = idx.delete(0) | ||||
|     assert result.equals(expected) | ||||
|     assert result.name == expected.name | ||||
|  | ||||
|     expected = idx[:-1] | ||||
|     result = idx.delete(-1) | ||||
|     assert result.equals(expected) | ||||
|     assert result.name == expected.name | ||||
|  | ||||
|     msg = "index 6 is out of bounds for axis 0 with size 6" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.delete(len(idx)) | ||||
| @ -0,0 +1,772 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import ( | ||||
|     is_float_dtype, | ||||
|     is_unsigned_integer_dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("case", [0.5, "xxx"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "method", ["intersection", "union", "difference", "symmetric_difference"] | ||||
| ) | ||||
| def test_set_ops_error_cases(idx, case, sort, method): | ||||
|     # non-iterable input | ||||
|     msg = "Input must be Index or array-like" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         getattr(idx, method)(case, sort=sort) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) | ||||
| def test_intersection_base(idx, sort, klass): | ||||
|     first = idx[2::-1]  # first 3 elements reversed | ||||
|     second = idx[:5] | ||||
|  | ||||
|     if klass is not MultiIndex: | ||||
|         second = klass(second.values) | ||||
|  | ||||
|     intersect = first.intersection(second, sort=sort) | ||||
|     if sort is None: | ||||
|         expected = first.sort_values() | ||||
|     else: | ||||
|         expected = first | ||||
|     tm.assert_index_equal(intersect, expected) | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.intersection([1, 2, 3], sort=sort) | ||||
|  | ||||
|  | ||||
| @pytest.mark.arm_slow | ||||
| @pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list]) | ||||
| def test_union_base(idx, sort, klass): | ||||
|     first = idx[::-1] | ||||
|     second = idx[:5] | ||||
|  | ||||
|     if klass is not MultiIndex: | ||||
|         second = klass(second.values) | ||||
|  | ||||
|     union = first.union(second, sort=sort) | ||||
|     if sort is None: | ||||
|         expected = first.sort_values() | ||||
|     else: | ||||
|         expected = first | ||||
|     tm.assert_index_equal(union, expected) | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.union([1, 2, 3], sort=sort) | ||||
|  | ||||
|  | ||||
| def test_difference_base(idx, sort): | ||||
|     second = idx[4:] | ||||
|     answer = idx[:4] | ||||
|     result = idx.difference(second, sort=sort) | ||||
|  | ||||
|     if sort is None: | ||||
|         answer = answer.sort_values() | ||||
|  | ||||
|     assert result.equals(answer) | ||||
|     tm.assert_index_equal(result, answer) | ||||
|  | ||||
|     # GH 10149 | ||||
|     cases = [klass(second.values) for klass in [np.array, Series, list]] | ||||
|     for case in cases: | ||||
|         result = idx.difference(case, sort=sort) | ||||
|         tm.assert_index_equal(result, answer) | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.difference([1, 2, 3], sort=sort) | ||||
|  | ||||
|  | ||||
| def test_symmetric_difference(idx, sort): | ||||
|     first = idx[1:] | ||||
|     second = idx[:-1] | ||||
|     answer = idx[[-1, 0]] | ||||
|     result = first.symmetric_difference(second, sort=sort) | ||||
|  | ||||
|     if sort is None: | ||||
|         answer = answer.sort_values() | ||||
|  | ||||
|     tm.assert_index_equal(result, answer) | ||||
|  | ||||
|     # GH 10149 | ||||
|     cases = [klass(second.values) for klass in [np.array, Series, list]] | ||||
|     for case in cases: | ||||
|         result = first.symmetric_difference(case, sort=sort) | ||||
|         tm.assert_index_equal(result, answer) | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.symmetric_difference([1, 2, 3], sort=sort) | ||||
|  | ||||
|  | ||||
| def test_multiindex_symmetric_difference(): | ||||
|     # GH 13490 | ||||
|     idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) | ||||
|     result = idx.symmetric_difference(idx) | ||||
|     assert result.names == idx.names | ||||
|  | ||||
|     idx2 = idx.copy().rename(["A", "B"]) | ||||
|     result = idx.symmetric_difference(idx2) | ||||
|     assert result.names == [None, None] | ||||
|  | ||||
|  | ||||
| def test_empty(idx): | ||||
|     # GH 15270 | ||||
|     assert not idx.empty | ||||
|     assert idx[:0].empty | ||||
|  | ||||
|  | ||||
| def test_difference(idx, sort): | ||||
|     first = idx | ||||
|     result = first.difference(idx[-3:], sort=sort) | ||||
|     vals = idx[:-3].values | ||||
|  | ||||
|     if sort is None: | ||||
|         vals = sorted(vals) | ||||
|  | ||||
|     expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) | ||||
|  | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     assert result.equals(expected) | ||||
|     assert result.names == idx.names | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # empty difference: reflexive | ||||
|     result = idx.difference(idx, sort=sort) | ||||
|     expected = idx[:0] | ||||
|     assert result.equals(expected) | ||||
|     assert result.names == idx.names | ||||
|  | ||||
|     # empty difference: superset | ||||
|     result = idx[-3:].difference(idx, sort=sort) | ||||
|     expected = idx[:0] | ||||
|     assert result.equals(expected) | ||||
|     assert result.names == idx.names | ||||
|  | ||||
|     # empty difference: degenerate | ||||
|     result = idx[:0].difference(idx, sort=sort) | ||||
|     expected = idx[:0] | ||||
|     assert result.equals(expected) | ||||
|     assert result.names == idx.names | ||||
|  | ||||
|     # names not the same | ||||
|     chunklet = idx[-3:] | ||||
|     chunklet.names = ["foo", "baz"] | ||||
|     result = first.difference(chunklet, sort=sort) | ||||
|     assert result.names == (None, None) | ||||
|  | ||||
|     # empty, but non-equal | ||||
|     result = idx.difference(idx.sortlevel(1)[0], sort=sort) | ||||
|     assert len(result) == 0 | ||||
|  | ||||
|     # raise Exception called with non-MultiIndex | ||||
|     result = first.difference(first.values, sort=sort) | ||||
|     assert result.equals(first[:0]) | ||||
|  | ||||
|     # name from empty array | ||||
|     result = first.difference([], sort=sort) | ||||
|     assert first.equals(result) | ||||
|     assert first.names == result.names | ||||
|  | ||||
|     # name from non-empty array | ||||
|     result = first.difference([("foo", "one")], sort=sort) | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")] | ||||
|     ) | ||||
|     expected.names = first.names | ||||
|     assert first.names == result.names | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.difference([1, 2, 3, 4, 5], sort=sort) | ||||
|  | ||||
|  | ||||
| def test_difference_sort_special(): | ||||
|     # GH-24959 | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|     # sort=None, the default | ||||
|     result = idx.difference([]) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|  | ||||
| def test_difference_sort_special_true(): | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|     result = idx.difference([], sort=True) | ||||
|     expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_difference_sort_incomparable(): | ||||
|     # GH-24959 | ||||
|     idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) | ||||
|  | ||||
|     other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) | ||||
|     # sort=None, the default | ||||
|     msg = "sort order is undefined for incomparable objects" | ||||
|     with tm.assert_produces_warning(RuntimeWarning, match=msg): | ||||
|         result = idx.difference(other) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # sort=False | ||||
|     result = idx.difference(other, sort=False) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|  | ||||
| def test_difference_sort_incomparable_true(): | ||||
|     idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) | ||||
|     other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) | ||||
|  | ||||
|     # TODO: this is raising in constructing a Categorical when calling | ||||
|     #  algos.safe_sort. Should we catch and re-raise with a better message? | ||||
|     msg = "'values' is not ordered, please explicitly specify the categories order " | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.difference(other, sort=True) | ||||
|  | ||||
|  | ||||
| def test_union(idx, sort): | ||||
|     piece1 = idx[:5][::-1] | ||||
|     piece2 = idx[3:] | ||||
|  | ||||
|     the_union = piece1.union(piece2, sort=sort) | ||||
|  | ||||
|     if sort in (None, False): | ||||
|         tm.assert_index_equal(the_union.sort_values(), idx.sort_values()) | ||||
|     else: | ||||
|         tm.assert_index_equal(the_union, idx) | ||||
|  | ||||
|     # corner case, pass self or empty thing: | ||||
|     the_union = idx.union(idx, sort=sort) | ||||
|     tm.assert_index_equal(the_union, idx) | ||||
|  | ||||
|     the_union = idx.union(idx[:0], sort=sort) | ||||
|     tm.assert_index_equal(the_union, idx) | ||||
|  | ||||
|     tuples = idx.values | ||||
|     result = idx[:4].union(tuples[4:], sort=sort) | ||||
|     if sort is None: | ||||
|         tm.assert_index_equal(result.sort_values(), idx.sort_values()) | ||||
|     else: | ||||
|         assert result.equals(idx) | ||||
|  | ||||
|  | ||||
| def test_union_with_regular_index(idx, using_infer_string): | ||||
|     other = Index(["A", "B", "C"]) | ||||
|  | ||||
|     result = other.union(idx) | ||||
|     assert ("foo", "one") in result | ||||
|     assert "B" in result | ||||
|  | ||||
|     if using_infer_string: | ||||
|         with pytest.raises(NotImplementedError, match="Can only union"): | ||||
|             idx.union(other) | ||||
|     else: | ||||
|         msg = "The values in the array are unorderable" | ||||
|         with tm.assert_produces_warning(RuntimeWarning, match=msg): | ||||
|             result2 = idx.union(other) | ||||
|         # This is more consistent now, if sorting fails then we don't sort at all | ||||
|         # in the MultiIndex case. | ||||
|         assert not result.equals(result2) | ||||
|  | ||||
|  | ||||
| def test_intersection(idx, sort): | ||||
|     piece1 = idx[:5][::-1] | ||||
|     piece2 = idx[3:] | ||||
|  | ||||
|     the_int = piece1.intersection(piece2, sort=sort) | ||||
|  | ||||
|     if sort in (None, True): | ||||
|         tm.assert_index_equal(the_int, idx[3:5]) | ||||
|     else: | ||||
|         tm.assert_index_equal(the_int.sort_values(), idx[3:5]) | ||||
|  | ||||
|     # corner case, pass self | ||||
|     the_int = idx.intersection(idx, sort=sort) | ||||
|     tm.assert_index_equal(the_int, idx) | ||||
|  | ||||
|     # empty intersection: disjoint | ||||
|     empty = idx[:2].intersection(idx[2:], sort=sort) | ||||
|     expected = idx[:0] | ||||
|     assert empty.equals(expected) | ||||
|  | ||||
|     tuples = idx.values | ||||
|     result = idx.intersection(tuples) | ||||
|     assert result.equals(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", ["intersection", "union", "difference", "symmetric_difference"] | ||||
| ) | ||||
| def test_setop_with_categorical(idx, sort, method): | ||||
|     other = idx.to_flat_index().astype("category") | ||||
|     res_names = [None] * idx.nlevels | ||||
|  | ||||
|     result = getattr(idx, method)(other, sort=sort) | ||||
|     expected = getattr(idx, method)(idx, sort=sort).rename(res_names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = getattr(idx, method)(other[:5], sort=sort) | ||||
|     expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_intersection_non_object(idx, sort): | ||||
|     other = Index(range(3), name="foo") | ||||
|  | ||||
|     result = idx.intersection(other, sort=sort) | ||||
|     expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None) | ||||
|     tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name) | ||||
|     result = idx.intersection(np.asarray(other)[:0], sort=sort) | ||||
|     expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names) | ||||
|     tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     msg = "other must be a MultiIndex or a list of tuples" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         # With non-zero length non-index, we try and fail to convert to tuples | ||||
|         idx.intersection(np.asarray(other), sort=sort) | ||||
|  | ||||
|  | ||||
| def test_intersect_equal_sort(): | ||||
|     # GH-24959 | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|     tm.assert_index_equal(idx.intersection(idx, sort=False), idx) | ||||
|     tm.assert_index_equal(idx.intersection(idx, sort=None), idx) | ||||
|  | ||||
|  | ||||
| def test_intersect_equal_sort_true(): | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|     expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) | ||||
|     result = idx.intersection(idx, sort=True) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) | ||||
| def test_union_sort_other_empty(slice_): | ||||
|     # https://github.com/pandas-dev/pandas/issues/24959 | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|  | ||||
|     # default, sort=None | ||||
|     other = idx[slice_] | ||||
|     tm.assert_index_equal(idx.union(other), idx) | ||||
|     tm.assert_index_equal(other.union(idx), idx) | ||||
|  | ||||
|     # sort=False | ||||
|     tm.assert_index_equal(idx.union(other, sort=False), idx) | ||||
|  | ||||
|  | ||||
| def test_union_sort_other_empty_sort(): | ||||
|     idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) | ||||
|     other = idx[:0] | ||||
|     result = idx.union(other, sort=True) | ||||
|     expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_sort_other_incomparable(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/24959 | ||||
|     idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) | ||||
|  | ||||
|     # default, sort=None | ||||
|     with tm.assert_produces_warning(RuntimeWarning): | ||||
|         result = idx.union(idx[:1]) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # sort=False | ||||
|     result = idx.union(idx[:1], sort=False) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|  | ||||
| def test_union_sort_other_incomparable_sort(): | ||||
|     idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) | ||||
|     msg = "'<' not supported between instances of 'Timestamp' and 'int'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.union(idx[:1], sort=True) | ||||
|  | ||||
|  | ||||
| def test_union_non_object_dtype_raises(): | ||||
|     # GH#32646 raise NotImplementedError instead of less-informative error | ||||
|     mi = MultiIndex.from_product([["a", "b"], [1, 2]]) | ||||
|  | ||||
|     idx = mi.levels[1] | ||||
|  | ||||
|     msg = "Can only union MultiIndex with MultiIndex or Index of tuples" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         mi.union(idx) | ||||
|  | ||||
|  | ||||
| def test_union_empty_self_different_names(): | ||||
|     # GH#38423 | ||||
|     mi = MultiIndex.from_arrays([[]]) | ||||
|     mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|     result = mi.union(mi2) | ||||
|     expected = MultiIndex.from_arrays([[1, 2], [3, 4]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_multiindex_empty_rangeindex(): | ||||
|     # GH#41234 | ||||
|     mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|     ri = pd.RangeIndex(0) | ||||
|  | ||||
|     result_left = mi.union(ri) | ||||
|     tm.assert_index_equal(mi, result_left, check_names=False) | ||||
|  | ||||
|     result_right = ri.union(mi) | ||||
|     tm.assert_index_equal(mi, result_right, check_names=False) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", ["union", "intersection", "difference", "symmetric_difference"] | ||||
| ) | ||||
| def test_setops_sort_validation(method): | ||||
|     idx1 = MultiIndex.from_product([["a", "b"], [1, 2]]) | ||||
|     idx2 = MultiIndex.from_product([["b", "c"], [1, 2]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="The 'sort' keyword only takes"): | ||||
|         getattr(idx1, method)(idx2, sort=2) | ||||
|  | ||||
|     # sort=True is supported as of GH#? | ||||
|     getattr(idx1, method)(idx2, sort=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [pd.NA, 100]) | ||||
| def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val): | ||||
|     # GH#48606 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] | ||||
|     ) | ||||
|     result = midx.difference(midx2) | ||||
|     expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = midx.difference(midx.sort_values(ascending=False)) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)], | ||||
|         names=["a", None], | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [pd.NA, 5]) | ||||
| def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val): | ||||
|     # GH#48607 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] | ||||
|     ) | ||||
|     result = midx.symmetric_difference(midx2) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("tuples", "exp_tuples"), | ||||
|     [ | ||||
|         ([("val1", "test1")], [("val1", "test1")]), | ||||
|         ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]), | ||||
|         ( | ||||
|             [("val2", "test2"), ("val1", "test1")], | ||||
|             [("val2", "test2"), ("val1", "test1")], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_intersect_with_duplicates(tuples, exp_tuples): | ||||
|     # GH#36915 | ||||
|     left = MultiIndex.from_tuples(tuples, names=["first", "second"]) | ||||
|     right = MultiIndex.from_tuples( | ||||
|         [("val1", "test1"), ("val1", "test1"), ("val2", "test2")], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     result = left.intersection(right) | ||||
|     expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, names, expected", | ||||
|     [ | ||||
|         ((1,), None, [None, None]), | ||||
|         ((1,), ["a"], [None, None]), | ||||
|         ((1,), ["b"], [None, None]), | ||||
|         ((1, 2), ["c", "d"], [None, None]), | ||||
|         ((1, 2), ["b", "a"], [None, None]), | ||||
|         ((1, 2, 3), ["a", "b", "c"], [None, None]), | ||||
|         ((1, 2), ["a", "c"], ["a", None]), | ||||
|         ((1, 2), ["c", "b"], [None, "b"]), | ||||
|         ((1, 2), ["a", "b"], ["a", "b"]), | ||||
|         ((1, 2), [None, "b"], [None, "b"]), | ||||
|     ], | ||||
| ) | ||||
| def test_maybe_match_names(data, names, expected): | ||||
|     # GH#38323 | ||||
|     mi = MultiIndex.from_tuples([], names=["a", "b"]) | ||||
|     mi2 = MultiIndex.from_tuples([data], names=names) | ||||
|     result = mi._maybe_match_names(mi2) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_intersection_equal_different_names(): | ||||
|     # GH#30302 | ||||
|     mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"]) | ||||
|     mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) | ||||
|  | ||||
|     result = mi1.intersection(mi2) | ||||
|     expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_intersection_different_names(): | ||||
|     # GH#38323 | ||||
|     mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"]) | ||||
|     mi2 = MultiIndex.from_arrays([[1], [3]]) | ||||
|     result = mi.intersection(mi2) | ||||
|     tm.assert_index_equal(result, mi2) | ||||
|  | ||||
|  | ||||
| def test_intersection_with_missing_values_on_both_sides(nulls_fixture): | ||||
|     # GH#38623 | ||||
|     mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]]) | ||||
|     mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]]) | ||||
|     result = mi1.intersection(mi2) | ||||
|     expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_with_missing_values_on_both_sides(nulls_fixture): | ||||
|     # GH#38623 | ||||
|     mi1 = MultiIndex.from_arrays([[1, nulls_fixture]]) | ||||
|     mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]]) | ||||
|     result = mi1.union(mi2) | ||||
|     expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["float64", "Float64"]) | ||||
| @pytest.mark.parametrize("sort", [None, False]) | ||||
| def test_union_nan_got_duplicated(dtype, sort): | ||||
|     # GH#38977, GH#49010 | ||||
|     mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]]) | ||||
|     mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]]) | ||||
|     result = mi1.union(mi2, sort=sort) | ||||
|     if sort is None: | ||||
|         expected = MultiIndex.from_arrays( | ||||
|             [pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]] | ||||
|         ) | ||||
|     else: | ||||
|         expected = mi2 | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [4, 1]) | ||||
| def test_union_keep_ea_dtype(any_numeric_ea_dtype, val): | ||||
|     # GH#48505 | ||||
|  | ||||
|     arr1 = Series([val, 2], dtype=any_numeric_ea_dtype) | ||||
|     arr2 = Series([2, 1], dtype=any_numeric_ea_dtype) | ||||
|     midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None]) | ||||
|     midx2 = MultiIndex.from_arrays([arr2, [2, 1]]) | ||||
|     result = midx.union(midx2) | ||||
|     if val == 4: | ||||
|         expected = MultiIndex.from_arrays( | ||||
|             [Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]] | ||||
|         ) | ||||
|     else: | ||||
|         expected = MultiIndex.from_arrays( | ||||
|             [Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]] | ||||
|         ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dupe_val", [3, pd.NA]) | ||||
| def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype): | ||||
|     # GH48900 | ||||
|     mi1 = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype), | ||||
|             Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype), | ||||
|         ] | ||||
|     ) | ||||
|     mi2 = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), | ||||
|             Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), | ||||
|         ] | ||||
|     ) | ||||
|     result = mi1.union(mi2) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), | ||||
|             Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||||
| def test_union_duplicates(index, request): | ||||
|     # GH#38977 | ||||
|     if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)): | ||||
|         pytest.skip(f"No duplicates in an empty {type(index).__name__}") | ||||
|  | ||||
|     values = index.unique().values.tolist() | ||||
|     mi1 = MultiIndex.from_arrays([values, [1] * len(values)]) | ||||
|     mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)]) | ||||
|     result = mi2.union(mi1) | ||||
|     expected = mi2.sort_values() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     if ( | ||||
|         is_unsigned_integer_dtype(mi2.levels[0]) | ||||
|         and (mi2.get_level_values(0) < 2**63).all() | ||||
|     ): | ||||
|         # GH#47294 - union uses lib.fast_zip, converting data to Python integers | ||||
|         # and loses type information. Result is then unsigned only when values are | ||||
|         # sufficiently large to require unsigned dtype. This happens only if other | ||||
|         # has dups or one of both have missing values | ||||
|         expected = expected.set_levels( | ||||
|             [expected.levels[0].astype(np.int64), expected.levels[1]] | ||||
|         ) | ||||
|     elif is_float_dtype(mi2.levels[0]): | ||||
|         # mi2 has duplicates witch is a different path than above, Fix that path | ||||
|         # to use correct float dtype? | ||||
|         expected = expected.set_levels( | ||||
|             [expected.levels[0].astype(float), expected.levels[1]] | ||||
|         ) | ||||
|  | ||||
|     result = mi1.union(mi2) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_keep_dtype_precision(any_real_numeric_dtype): | ||||
|     # GH#48498 | ||||
|     arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype) | ||||
|     arr2 = Series([1, 4], dtype=any_real_numeric_dtype) | ||||
|     midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None]) | ||||
|     midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None]) | ||||
|  | ||||
|     result = midx.union(midx2) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         ([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]), | ||||
|         names=["a", None], | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype): | ||||
|     # GH#48498 | ||||
|     arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype) | ||||
|     arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype) | ||||
|     midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None]) | ||||
|     midx2 = MultiIndex.from_arrays([arr2, [1, 2]]) | ||||
|     result = midx.union(midx2) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "levels1, levels2, codes1, codes2, names", | ||||
|     [ | ||||
|         ( | ||||
|             [["a", "b", "c"], [0, ""]], | ||||
|             [["c", "d", "b"], [""]], | ||||
|             [[0, 1, 2], [1, 1, 1]], | ||||
|             [[0, 1, 2], [0, 0, 0]], | ||||
|             ["name1", "name2"], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names): | ||||
|     # GH#25169 | ||||
|     mi1 = MultiIndex(levels=levels1, codes=codes1, names=names) | ||||
|     mi2 = MultiIndex(levels=levels2, codes=codes2, names=names) | ||||
|     mi_int = mi1.intersection(mi2) | ||||
|     assert mi_int._lexsort_depth == 2 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a", | ||||
|     [pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "b", | ||||
|     [ | ||||
|         pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True), | ||||
|         pd.Categorical(["a", "b"], categories=["b", "a"]), | ||||
|     ], | ||||
| ) | ||||
| def test_intersection_with_non_lex_sorted_categories(a, b): | ||||
|     # GH#49974 | ||||
|     other = ["1", "2"] | ||||
|  | ||||
|     df1 = DataFrame({"x": a, "y": other}) | ||||
|     df2 = DataFrame({"x": b, "y": other}) | ||||
|  | ||||
|     expected = MultiIndex.from_arrays([a, other], names=["x", "y"]) | ||||
|  | ||||
|     res1 = MultiIndex.from_frame(df1).intersection( | ||||
|         MultiIndex.from_frame(df2.sort_values(["x", "y"])) | ||||
|     ) | ||||
|     res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2)) | ||||
|     res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection( | ||||
|         MultiIndex.from_frame(df2) | ||||
|     ) | ||||
|     res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection( | ||||
|         MultiIndex.from_frame(df2.sort_values(["x", "y"])) | ||||
|     ) | ||||
|  | ||||
|     tm.assert_index_equal(res1, expected) | ||||
|     tm.assert_index_equal(res2, expected) | ||||
|     tm.assert_index_equal(res3, expected) | ||||
|     tm.assert_index_equal(res4, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [pd.NA, 100]) | ||||
| def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype): | ||||
|     # GH#48604 | ||||
|     midx = MultiIndex.from_arrays( | ||||
|         [Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None] | ||||
|     ) | ||||
|     midx2 = MultiIndex.from_arrays( | ||||
|         [Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]] | ||||
|     ) | ||||
|     result = midx.intersection(midx2) | ||||
|     expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_union_with_na_when_constructing_dataframe(): | ||||
|     # GH43222 | ||||
|     series1 = Series( | ||||
|         (1,), | ||||
|         index=MultiIndex.from_arrays( | ||||
|             [Series([None], dtype="str"), Series([None], dtype="str")] | ||||
|         ), | ||||
|     ) | ||||
|     series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b")))) | ||||
|     result = DataFrame([series1, series2]) | ||||
|     expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,349 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import ( | ||||
|     PerformanceWarning, | ||||
|     UnsortedIndexError, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.indexes.frozen import FrozenList | ||||
|  | ||||
|  | ||||
| def test_sortlevel(idx): | ||||
|     tuples = list(idx) | ||||
|     np.random.default_rng(2).shuffle(tuples) | ||||
|  | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(0) | ||||
|     expected = MultiIndex.from_tuples(sorted(tuples)) | ||||
|     assert sorted_idx.equals(expected) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(0, ascending=False) | ||||
|     assert sorted_idx.equals(expected[::-1]) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(1) | ||||
|     by1 = sorted(tuples, key=lambda x: (x[1], x[0])) | ||||
|     expected = MultiIndex.from_tuples(by1) | ||||
|     assert sorted_idx.equals(expected) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(1, ascending=False) | ||||
|     assert sorted_idx.equals(expected[::-1]) | ||||
|  | ||||
|  | ||||
| def test_sortlevel_not_sort_remaining(): | ||||
|     mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) | ||||
|     sorted_idx, _ = mi.sortlevel("A", sort_remaining=False) | ||||
|     assert sorted_idx.equals(mi) | ||||
|  | ||||
|  | ||||
| def test_sortlevel_deterministic(): | ||||
|     tuples = [ | ||||
|         ("bar", "one"), | ||||
|         ("foo", "two"), | ||||
|         ("qux", "two"), | ||||
|         ("foo", "one"), | ||||
|         ("baz", "two"), | ||||
|         ("qux", "one"), | ||||
|     ] | ||||
|  | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(0) | ||||
|     expected = MultiIndex.from_tuples(sorted(tuples)) | ||||
|     assert sorted_idx.equals(expected) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(0, ascending=False) | ||||
|     assert sorted_idx.equals(expected[::-1]) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(1) | ||||
|     by1 = sorted(tuples, key=lambda x: (x[1], x[0])) | ||||
|     expected = MultiIndex.from_tuples(by1) | ||||
|     assert sorted_idx.equals(expected) | ||||
|  | ||||
|     sorted_idx, _ = index.sortlevel(1, ascending=False) | ||||
|     assert sorted_idx.equals(expected[::-1]) | ||||
|  | ||||
|  | ||||
| def test_sortlevel_na_position(): | ||||
|     # GH#51612 | ||||
|     midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)]) | ||||
|     result = midx.sortlevel(level=[0, 1], na_position="last")[0] | ||||
|     expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_numpy_argsort(idx): | ||||
|     result = np.argsort(idx) | ||||
|     expected = idx.argsort() | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # these are the only two types that perform | ||||
|     # pandas compatibility input validation - the | ||||
|     # rest already perform separate (or no) such | ||||
|     # validation via their 'values' attribute as | ||||
|     # defined in pandas.core.indexes/base.py - they | ||||
|     # cannot be changed at the moment due to | ||||
|     # backwards compatibility concerns | ||||
|     if isinstance(type(idx), (CategoricalIndex, RangeIndex)): | ||||
|         msg = "the 'axis' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(idx, axis=1) | ||||
|  | ||||
|         msg = "the 'kind' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(idx, kind="mergesort") | ||||
|  | ||||
|         msg = "the 'order' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(idx, order=("a", "b")) | ||||
|  | ||||
|  | ||||
| def test_unsortedindex(): | ||||
|     # GH 11897 | ||||
|     mi = MultiIndex.from_tuples( | ||||
|         [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], | ||||
|         names=["one", "two"], | ||||
|     ) | ||||
|     df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) | ||||
|  | ||||
|     # GH 16734: not sorted, but no real slicing | ||||
|     result = df.loc(axis=0)["z", "a"] | ||||
|     expected = df.iloc[0] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     msg = ( | ||||
|         "MultiIndex slicing requires the index to be lexsorted: " | ||||
|         r"slicing on levels \[1\], lexsort depth 0" | ||||
|     ) | ||||
|     with pytest.raises(UnsortedIndexError, match=msg): | ||||
|         df.loc(axis=0)["z", slice("a")] | ||||
|     df.sort_index(inplace=True) | ||||
|     assert len(df.loc(axis=0)["z", :]) == 2 | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'q'"): | ||||
|         df.loc(axis=0)["q", :] | ||||
|  | ||||
|  | ||||
| def test_unsortedindex_doc_examples(): | ||||
|     # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex | ||||
|     dfm = DataFrame( | ||||
|         { | ||||
|             "jim": [0, 0, 1, 1], | ||||
|             "joe": ["x", "x", "z", "y"], | ||||
|             "jolie": np.random.default_rng(2).random(4), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     dfm = dfm.set_index(["jim", "joe"]) | ||||
|     with tm.assert_produces_warning(PerformanceWarning): | ||||
|         dfm.loc[(1, "z")] | ||||
|  | ||||
|     msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" | ||||
|     with pytest.raises(UnsortedIndexError, match=msg): | ||||
|         dfm.loc[(0, "y"):(1, "z")] | ||||
|  | ||||
|     assert not dfm.index._is_lexsorted() | ||||
|     assert dfm.index._lexsort_depth == 1 | ||||
|  | ||||
|     # sort it | ||||
|     dfm = dfm.sort_index() | ||||
|     dfm.loc[(1, "z")] | ||||
|     dfm.loc[(0, "y"):(1, "z")] | ||||
|  | ||||
|     assert dfm.index._is_lexsorted() | ||||
|     assert dfm.index._lexsort_depth == 2 | ||||
|  | ||||
|  | ||||
| def test_reconstruct_sort(): | ||||
|     # starts off lexsorted & monotonic | ||||
|     mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) | ||||
|     assert mi.is_monotonic_increasing | ||||
|     recons = mi._sort_levels_monotonic() | ||||
|     assert recons.is_monotonic_increasing | ||||
|     assert mi is recons | ||||
|  | ||||
|     assert mi.equals(recons) | ||||
|     assert Index(mi.values).equals(Index(recons.values)) | ||||
|  | ||||
|     # cannot convert to lexsorted | ||||
|     mi = MultiIndex.from_tuples( | ||||
|         [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], | ||||
|         names=["one", "two"], | ||||
|     ) | ||||
|     assert not mi.is_monotonic_increasing | ||||
|     recons = mi._sort_levels_monotonic() | ||||
|     assert not recons.is_monotonic_increasing | ||||
|     assert mi.equals(recons) | ||||
|     assert Index(mi.values).equals(Index(recons.values)) | ||||
|  | ||||
|     # cannot convert to lexsorted | ||||
|     mi = MultiIndex( | ||||
|         levels=[["b", "d", "a"], [1, 2, 3]], | ||||
|         codes=[[0, 1, 0, 2], [2, 0, 0, 1]], | ||||
|         names=["col1", "col2"], | ||||
|     ) | ||||
|     assert not mi.is_monotonic_increasing | ||||
|     recons = mi._sort_levels_monotonic() | ||||
|     assert not recons.is_monotonic_increasing | ||||
|     assert mi.equals(recons) | ||||
|     assert Index(mi.values).equals(Index(recons.values)) | ||||
|  | ||||
|  | ||||
| def test_reconstruct_remove_unused(): | ||||
|     # xref to GH 2770 | ||||
|     df = DataFrame( | ||||
|         [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]], | ||||
|         columns=["first", "second", "third"], | ||||
|     ) | ||||
|     df2 = df.set_index(["first", "second"], drop=False) | ||||
|     df2 = df2[df2["first"] != "deleteMe"] | ||||
|  | ||||
|     # removed levels are there | ||||
|     expected = MultiIndex( | ||||
|         levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]], | ||||
|         codes=[[1, 2], [1, 2]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     result = df2.index | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     expected = MultiIndex( | ||||
|         levels=[["keepMe", "keepMeToo"], [2, 3]], | ||||
|         codes=[[0, 1], [0, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     result = df2.index.remove_unused_levels() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # idempotent | ||||
|     result2 = result.remove_unused_levels() | ||||
|     tm.assert_index_equal(result2, expected) | ||||
|     assert result2.is_(result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")] | ||||
| ) | ||||
| def test_remove_unused_levels_large(first_type, second_type): | ||||
|     # GH16556 | ||||
|  | ||||
|     # because tests should be deterministic (and this test in particular | ||||
|     # checks that levels are removed, which is not the case for every | ||||
|     # random input): | ||||
|     rng = np.random.default_rng(10)  # seed is arbitrary value that works | ||||
|  | ||||
|     size = 1 << 16 | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "first": rng.integers(0, 1 << 13, size).astype(first_type), | ||||
|             "second": rng.integers(0, 1 << 10, size).astype(second_type), | ||||
|             "third": rng.random(size), | ||||
|         } | ||||
|     ) | ||||
|     df = df.groupby(["first", "second"]).sum() | ||||
|     df = df[df.third < 0.1] | ||||
|  | ||||
|     result = df.index.remove_unused_levels() | ||||
|     assert len(result.levels[0]) < len(df.index.levels[0]) | ||||
|     assert len(result.levels[1]) < len(df.index.levels[1]) | ||||
|     assert result.equals(df.index) | ||||
|  | ||||
|     expected = df.reset_index().set_index(["first", "second"]).index | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]]) | ||||
| @pytest.mark.parametrize( | ||||
|     "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]] | ||||
| ) | ||||
| def test_remove_unused_nan(level0, level1): | ||||
|     # GH 18417 | ||||
|     mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) | ||||
|  | ||||
|     result = mi.remove_unused_levels() | ||||
|     tm.assert_index_equal(result, mi) | ||||
|     for level in 0, 1: | ||||
|         assert "unused" not in result.levels[level] | ||||
|  | ||||
|  | ||||
| def test_argsort(idx): | ||||
|     result = idx.argsort() | ||||
|     expected = idx.values.argsort() | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_remove_unused_levels_with_nan(): | ||||
|     # GH 37510 | ||||
|     idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) | ||||
|     idx = idx.set_levels(["a", np.nan], level="id1") | ||||
|     idx = idx.remove_unused_levels() | ||||
|     result = idx.levels | ||||
|     expected = FrozenList([["a", np.nan], [4]]) | ||||
|     assert str(result) == str(expected) | ||||
|  | ||||
|  | ||||
| def test_sort_values_nan(): | ||||
|     # GH48495, GH48626 | ||||
|     midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]]) | ||||
|     result = midx.sort_values() | ||||
|     expected = MultiIndex( | ||||
|         levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_sort_values_incomparable(): | ||||
|     # GH48495 | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, Timestamp("2000-01-01")], | ||||
|             [3, 4], | ||||
|         ] | ||||
|     ) | ||||
|     match = "'<' not supported between instances of 'Timestamp' and 'int'" | ||||
|     with pytest.raises(TypeError, match=match): | ||||
|         mi.sort_values() | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_position", ["first", "last"]) | ||||
| @pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"]) | ||||
| def test_sort_values_with_na_na_position(dtype, na_position): | ||||
|     # 51612 | ||||
|     arrays = [ | ||||
|         Series([1, 1, 2], dtype=dtype), | ||||
|         Series([1, None, 3], dtype=dtype), | ||||
|     ] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.sort_values(na_position=na_position) | ||||
|     if na_position == "first": | ||||
|         arrays = [ | ||||
|             Series([1, 1, 2], dtype=dtype), | ||||
|             Series([None, 1, 3], dtype=dtype), | ||||
|         ] | ||||
|     else: | ||||
|         arrays = [ | ||||
|             Series([1, 1, 2], dtype=dtype), | ||||
|             Series([1, None, 3], dtype=dtype), | ||||
|         ] | ||||
|     expected = MultiIndex.from_arrays(arrays) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_sort_unnecessary_warning(): | ||||
|     # GH#55386 | ||||
|     midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)]) | ||||
|     midx = midx.set_levels([2.5, np.nan, 1], level=0) | ||||
|     result = midx.sort_values() | ||||
|     expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,78 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_take(idx): | ||||
|     indexer = [4, 3, 0, 2] | ||||
|     result = idx.take(indexer) | ||||
|     expected = idx[indexer] | ||||
|     assert result.equals(expected) | ||||
|  | ||||
|     # GH 10791 | ||||
|     msg = "'MultiIndex' object has no attribute 'freq'" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         idx.freq | ||||
|  | ||||
|  | ||||
| def test_take_invalid_kwargs(idx): | ||||
|     indices = [1, 2] | ||||
|  | ||||
|     msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.take(indices, foo=2) | ||||
|  | ||||
|     msg = "the 'out' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, out=indices) | ||||
|  | ||||
|     msg = "the 'mode' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, mode="clip") | ||||
|  | ||||
|  | ||||
| def test_take_fill_value(): | ||||
|     # GH 12631 | ||||
|     vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] | ||||
|     idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) | ||||
|  | ||||
|     result = idx.take(np.array([1, 0, -1])) | ||||
|     exp_vals = [ | ||||
|         ("A", pd.Timestamp("2011-01-02")), | ||||
|         ("A", pd.Timestamp("2011-01-01")), | ||||
|         ("B", pd.Timestamp("2011-01-02")), | ||||
|     ] | ||||
|     expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # fill_value | ||||
|     result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|     exp_vals = [ | ||||
|         ("A", pd.Timestamp("2011-01-02")), | ||||
|         ("A", pd.Timestamp("2011-01-01")), | ||||
|         (np.nan, pd.NaT), | ||||
|     ] | ||||
|     expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # allow_fill=False | ||||
|     result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|     exp_vals = [ | ||||
|         ("A", pd.Timestamp("2011-01-02")), | ||||
|         ("A", pd.Timestamp("2011-01-01")), | ||||
|         ("B", pd.Timestamp("2011-01-02")), | ||||
|     ] | ||||
|     expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|     msg = "index -5 is out of bounds for( axis 0 with)? size 4" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.take(np.array([1, -5])) | ||||
		Reference in New Issue
	
	Block a user