done
This commit is contained in:
		| @ -0,0 +1,78 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIndexConstructor: | ||||
|     # Tests for the Index constructor, specifically for cases that do | ||||
|     #  not return a subclass | ||||
|  | ||||
|     @pytest.mark.parametrize("value", [1, np.int64(1)]) | ||||
|     def test_constructor_corner(self, value): | ||||
|         # corner case | ||||
|         msg = ( | ||||
|             r"Index\(\.\.\.\) must be called with a collection of some " | ||||
|             f"kind, {value} was passed" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Index(value) | ||||
|  | ||||
|     @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]]) | ||||
|     def test_construction_list_mixed_tuples(self, index_vals): | ||||
|         # see gh-10697: if we are constructing from a mixed list of tuples, | ||||
|         # make sure that we are independent of the sorting order. | ||||
|         index = Index(index_vals) | ||||
|         assert isinstance(index, Index) | ||||
|         assert not isinstance(index, MultiIndex) | ||||
|  | ||||
|     def test_constructor_cast(self): | ||||
|         msg = "could not convert string to float" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Index(["a", "b", "c"], dtype=float) | ||||
|  | ||||
|     @pytest.mark.parametrize("tuple_list", [[()], [(), ()]]) | ||||
|     def test_construct_empty_tuples(self, tuple_list): | ||||
|         # GH #45608 | ||||
|         result = Index(tuple_list) | ||||
|         expected = MultiIndex.from_tuples(tuple_list) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_index_string_inference(self): | ||||
|         # GH#54430 | ||||
|         expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)) | ||||
|         with pd.option_context("future.infer_string", True): | ||||
|             ser = Index(["a", "b"]) | ||||
|         tm.assert_index_equal(ser, expected) | ||||
|  | ||||
|         expected = Index(["a", 1], dtype="object") | ||||
|         with pd.option_context("future.infer_string", True): | ||||
|             ser = Index(["a", 1]) | ||||
|         tm.assert_index_equal(ser, expected) | ||||
|  | ||||
|     def test_inference_on_pandas_objects(self): | ||||
|         # GH#56012 | ||||
|         idx = Index([pd.Timestamp("2019-12-31")], dtype=object) | ||||
|         with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): | ||||
|             result = Index(idx) | ||||
|         assert result.dtype != np.object_ | ||||
|  | ||||
|         ser = Series([pd.Timestamp("2019-12-31")], dtype=object) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): | ||||
|             result = Index(ser) | ||||
|         assert result.dtype != np.object_ | ||||
|  | ||||
|     def test_constructor_not_read_only(self): | ||||
|         # GH#57130 | ||||
|         ser = Series([1, 2], dtype=object) | ||||
|         with pd.option_context("mode.copy_on_write", True): | ||||
|             idx = Index(ser) | ||||
|             assert idx._values.flags.writeable | ||||
| @ -0,0 +1,163 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
| import pandas._config.config as cf | ||||
|  | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIndexRendering: | ||||
|     def test_repr_is_valid_construction_code(self): | ||||
|         # for the case of Index, where the repr is traditional rather than | ||||
|         # stylized | ||||
|         idx = Index(["a", "b"]) | ||||
|         res = eval(repr(idx)) | ||||
|         tm.assert_index_equal(res, idx) | ||||
|  | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="repr different") | ||||
|     @pytest.mark.parametrize( | ||||
|         "index,expected", | ||||
|         [ | ||||
|             # ASCII | ||||
|             # short | ||||
|             ( | ||||
|                 Index(["a", "bb", "ccc"]), | ||||
|                 """Index(['a', 'bb', 'ccc'], dtype='object')""", | ||||
|             ), | ||||
|             # multiple lines | ||||
|             ( | ||||
|                 Index(["a", "bb", "ccc"] * 10), | ||||
|                 "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " | ||||
|                 "'bb', 'ccc', 'a', 'bb', 'ccc',\n" | ||||
|                 "       'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', " | ||||
|                 "'bb', 'ccc', 'a', 'bb', 'ccc',\n" | ||||
|                 "       'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" | ||||
|                 "      dtype='object')", | ||||
|             ), | ||||
|             # truncated | ||||
|             ( | ||||
|                 Index(["a", "bb", "ccc"] * 100), | ||||
|                 "Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n" | ||||
|                 "       ...\n" | ||||
|                 "       'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n" | ||||
|                 "      dtype='object', length=300)", | ||||
|             ), | ||||
|             # Non-ASCII | ||||
|             # short | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"]), | ||||
|                 """Index(['あ', 'いい', 'ううう'], dtype='object')""", | ||||
|             ), | ||||
|             # multiple lines | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"] * 10), | ||||
|                 ( | ||||
|                     "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " | ||||
|                     "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " | ||||
|                     "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう'],\n" | ||||
|                     "      dtype='object')" | ||||
|                 ), | ||||
|             ), | ||||
|             # truncated | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"] * 100), | ||||
|                 ( | ||||
|                     "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " | ||||
|                     "'あ', 'いい', 'ううう', 'あ',\n" | ||||
|                     "       ...\n" | ||||
|                     "       'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう', 'あ', 'いい', 'ううう'],\n" | ||||
|                     "      dtype='object', length=300)" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_string_index_repr(self, index, expected): | ||||
|         result = repr(index) | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="repr different") | ||||
|     @pytest.mark.parametrize( | ||||
|         "index,expected", | ||||
|         [ | ||||
|             # short | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"]), | ||||
|                 ("Index(['あ', 'いい', 'ううう'], dtype='object')"), | ||||
|             ), | ||||
|             # multiple lines | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"] * 10), | ||||
|                 ( | ||||
|                     "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ', 'いい', 'ううう'],\n" | ||||
|                     "      dtype='object')" | ||||
|                     "" | ||||
|                 ), | ||||
|             ), | ||||
|             # truncated | ||||
|             ( | ||||
|                 Index(["あ", "いい", "ううう"] * 100), | ||||
|                 ( | ||||
|                     "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " | ||||
|                     "'ううう', 'あ', 'いい', 'ううう',\n" | ||||
|                     "       'あ',\n" | ||||
|                     "       ...\n" | ||||
|                     "       'ううう', 'あ', 'いい', 'ううう', 'あ', " | ||||
|                     "'いい', 'ううう', 'あ', 'いい',\n" | ||||
|                     "       'ううう'],\n" | ||||
|                     "      dtype='object', length=300)" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_string_index_repr_with_unicode_option(self, index, expected): | ||||
|         # Enable Unicode option ----------------------------------------- | ||||
|         with cf.option_context("display.unicode.east_asian_width", True): | ||||
|             result = repr(index) | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_repr_summary(self): | ||||
|         with cf.option_context("display.max_seq_items", 10): | ||||
|             result = repr(Index(np.arange(1000))) | ||||
|             assert len(result) < 200 | ||||
|             assert "..." in result | ||||
|  | ||||
|     def test_summary_bug(self): | ||||
|         # GH#3869 | ||||
|         ind = Index(["{other}%s", "~:{range}:0"], name="A") | ||||
|         result = ind._summary() | ||||
|         # shouldn't be formatted accidentally. | ||||
|         assert "~:{range}:0" in result | ||||
|         assert "{other}%s" in result | ||||
|  | ||||
|     def test_index_repr_bool_nan(self): | ||||
|         # GH32146 | ||||
|         arr = Index([True, False, np.nan], dtype=object) | ||||
|         msg = "Index.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             exp1 = arr.format() | ||||
|         out1 = ["True", "False", "NaN"] | ||||
|         assert out1 == exp1 | ||||
|  | ||||
|         exp2 = repr(arr) | ||||
|         out2 = "Index([True, False, nan], dtype='object')" | ||||
|         assert out2 == exp2 | ||||
|  | ||||
|     def test_format_different_scalar_lengths(self): | ||||
|         # GH#35439 | ||||
|         idx = Index(["aaaaaaaaa", "b"]) | ||||
|         expected = ["aaaaaaaaa", "b"] | ||||
|         msg = r"Index\.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert idx.format() == expected | ||||
| @ -0,0 +1,104 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     NaT, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetSliceBounds: | ||||
|     @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) | ||||
|     def test_get_slice_bounds_within(self, side, expected): | ||||
|         index = Index(list("abcdef")) | ||||
|         result = index.get_slice_bound("e", side=side) | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("side", ["left", "right"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)] | ||||
|     ) | ||||
|     def test_get_slice_bounds_outside(self, side, expected, data, bound): | ||||
|         index = Index(data) | ||||
|         result = index.get_slice_bound(bound, side=side) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_get_slice_bounds_invalid_side(self): | ||||
|         with pytest.raises(ValueError, match="Invalid value for side kwarg"): | ||||
|             Index([]).get_slice_bound("a", side="middle") | ||||
|  | ||||
|  | ||||
| class TestGetIndexerNonUnique: | ||||
|     def test_get_indexer_non_unique_dtype_mismatch(self): | ||||
|         # GH#25459 | ||||
|         indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0])) | ||||
|         tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) | ||||
|         tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx_values,idx_non_unique", | ||||
|         [ | ||||
|             ([np.nan, 100, 200, 100], [np.nan, 100]), | ||||
|             ([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique): | ||||
|         indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan])) | ||||
|         tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes) | ||||
|         tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) | ||||
|  | ||||
|         indexes, missing = Index(idx_values).get_indexer_non_unique( | ||||
|             Index(idx_non_unique) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes) | ||||
|         tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing) | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     @pytest.mark.slow  # to_flat_index takes a while | ||||
|     def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch): | ||||
|         # Go through the libindex path for which using | ||||
|         # _bin_search vs ndarray.searchsorted makes a difference | ||||
|  | ||||
|         with monkeypatch.context(): | ||||
|             monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100) | ||||
|             lev = list("ABCD") | ||||
|             dti = pd.date_range("2016-01-01", periods=10) | ||||
|  | ||||
|             mi = pd.MultiIndex.from_product([lev, range(5), dti]) | ||||
|             oidx = mi.to_flat_index() | ||||
|  | ||||
|             loc = len(oidx) // 2 | ||||
|             tup = oidx[loc] | ||||
|  | ||||
|             res = oidx.get_loc(tup) | ||||
|         assert res == loc | ||||
|  | ||||
|     def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): | ||||
|         # case that goes through _maybe_get_bool_indexer | ||||
|         idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object) | ||||
|  | ||||
|         # we dont raise KeyError on nan | ||||
|         res = idx.get_loc(np.nan) | ||||
|         assert res == 1 | ||||
|  | ||||
|         # we only match on None, not on np.nan | ||||
|         res = idx.get_loc(None) | ||||
|         expected = np.array([False, False, True, False, False, True]) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         # we don't match at all on mismatched NA | ||||
|         with pytest.raises(KeyError, match="NaT"): | ||||
|             idx.get_loc(NaT) | ||||
|  | ||||
|  | ||||
| def test_getitem_boolean_ea_indexer(): | ||||
|     # GH#45806 | ||||
|     ser = pd.Series([True, False, pd.NA], dtype="boolean") | ||||
|     result = ser.index[ser] | ||||
|     expected = Index([0]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,11 @@ | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_pickle_preserves_object_dtype(): | ||||
|     # GH#43188, GH#43155 don't infer numeric dtype | ||||
|     index = Index([1, 2, 3], dtype=object) | ||||
|  | ||||
|     result = tm.round_trip_pickle(index) | ||||
|     assert result.dtype == object | ||||
|     tm.assert_index_equal(index, result) | ||||
| @ -0,0 +1,97 @@ | ||||
| """ | ||||
| Tests for ndarray-like method on the base Index class | ||||
| """ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestReshape: | ||||
|     def test_repeat(self): | ||||
|         repeats = 2 | ||||
|         index = Index([1, 2, 3]) | ||||
|         expected = Index([1, 1, 2, 2, 3, 3]) | ||||
|  | ||||
|         result = index.repeat(repeats) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert(self): | ||||
|         # GH 7256 | ||||
|         # validate neg/pos inserts | ||||
|         result = Index(["b", "c", "d"]) | ||||
|  | ||||
|         # test 0th element | ||||
|         tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) | ||||
|  | ||||
|         # test Nth element that follows Python list behavior | ||||
|         tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) | ||||
|  | ||||
|         # test loc +/- neq (0, -1) | ||||
|         tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) | ||||
|  | ||||
|         # test empty | ||||
|         null_index = Index([]) | ||||
|         tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) | ||||
|  | ||||
|     def test_insert_missing(self, request, nulls_fixture, using_infer_string): | ||||
|         if using_infer_string and nulls_fixture is pd.NA: | ||||
|             request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)")) | ||||
|         # GH#22295 | ||||
|         # test there is no mangling of NA values | ||||
|         expected = Index(["a", nulls_fixture, "b", "c"], dtype=object) | ||||
|         result = Index(list("abc"), dtype=object).insert( | ||||
|             1, Index([nulls_fixture], dtype=object) | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")] | ||||
|     ) | ||||
|     @pytest.mark.parametrize("loc", [-1, 2]) | ||||
|     def test_insert_datetime_into_object(self, loc, val): | ||||
|         # GH#44509 | ||||
|         idx = Index(["1", "2", "3"]) | ||||
|         result = idx.insert(loc, val) | ||||
|         expected = Index(["1", "2", val, "3"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert type(expected[2]) is type(val) | ||||
|  | ||||
|     def test_insert_none_into_string_numpy(self, string_dtype_no_object): | ||||
|         # GH#55365 | ||||
|         index = Index(["a", "b", "c"], dtype=string_dtype_no_object) | ||||
|         result = index.insert(-1, None) | ||||
|         expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "pos,expected", | ||||
|         [ | ||||
|             (0, Index(["b", "c", "d"], name="index")), | ||||
|             (-1, Index(["a", "b", "c"], name="index")), | ||||
|         ], | ||||
|     ) | ||||
|     def test_delete(self, pos, expected): | ||||
|         index = Index(["a", "b", "c", "d"], name="index") | ||||
|         result = index.delete(pos) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|  | ||||
|     def test_delete_raises(self): | ||||
|         index = Index(["a", "b", "c", "d"], name="index") | ||||
|         msg = "index 5 is out of bounds for axis 0 with size 4" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             index.delete(5) | ||||
|  | ||||
|     def test_append_multiple(self): | ||||
|         index = Index(["a", "b", "c", "d", "e", "f"]) | ||||
|  | ||||
|         foos = [index[:2], index[2:4], index[4:]] | ||||
|         result = foos[0].append(foos[1:]) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         # empty | ||||
|         result = index.append([]) | ||||
|         tm.assert_index_equal(result, index) | ||||
| @ -0,0 +1,266 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.algorithms import safe_sort | ||||
|  | ||||
|  | ||||
| def equal_contents(arr1, arr2) -> bool: | ||||
|     """ | ||||
|     Checks if the set of unique elements of arr1 and arr2 are equivalent. | ||||
|     """ | ||||
|     return frozenset(arr1) == frozenset(arr2) | ||||
|  | ||||
|  | ||||
| class TestIndexSetOps: | ||||
|     @pytest.mark.parametrize( | ||||
|         "method", ["union", "intersection", "difference", "symmetric_difference"] | ||||
|     ) | ||||
|     def test_setops_sort_validation(self, method): | ||||
|         idx1 = Index(["a", "b"]) | ||||
|         idx2 = Index(["b", "c"]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match="The 'sort' keyword only takes"): | ||||
|             getattr(idx1, method)(idx2, sort=2) | ||||
|  | ||||
|         # sort=True is supported as of GH#?? | ||||
|         getattr(idx1, method)(idx2, sort=True) | ||||
|  | ||||
|     def test_setops_preserve_object_dtype(self): | ||||
|         idx = Index([1, 2, 3], dtype=object) | ||||
|         result = idx.intersection(idx[1:]) | ||||
|         expected = idx[1:] | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # if other is not monotonic increasing, intersection goes through | ||||
|         #  a different route | ||||
|         result = idx.intersection(idx[1:][::-1]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx._union(idx[1:], sort=None) | ||||
|         expected = idx | ||||
|         tm.assert_numpy_array_equal(result, expected.values) | ||||
|  | ||||
|         result = idx.union(idx[1:], sort=None) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # if other is not monotonic increasing, _union goes through | ||||
|         #  a different route | ||||
|         result = idx._union(idx[1:][::-1], sort=None) | ||||
|         tm.assert_numpy_array_equal(result, expected.values) | ||||
|  | ||||
|         result = idx.union(idx[1:][::-1], sort=None) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_union_base(self): | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[3:] | ||||
|         second = index[:5] | ||||
|  | ||||
|         result = first.union(second) | ||||
|  | ||||
|         expected = Index([0, 1, 2, "a", "b", "c"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [np.array, Series, list]) | ||||
|     def test_union_different_type_base(self, klass): | ||||
|         # GH 10149 | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[3:] | ||||
|         second = index[:5] | ||||
|  | ||||
|         result = first.union(klass(second.values)) | ||||
|  | ||||
|         assert equal_contents(result, index) | ||||
|  | ||||
|     def test_union_sort_other_incomparable(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/24959 | ||||
|         idx = Index([1, pd.Timestamp("2000")]) | ||||
|         # default (sort=None) | ||||
|         with tm.assert_produces_warning(RuntimeWarning): | ||||
|             result = idx.union(idx[:1]) | ||||
|  | ||||
|         tm.assert_index_equal(result, idx) | ||||
|  | ||||
|         # sort=None | ||||
|         with tm.assert_produces_warning(RuntimeWarning): | ||||
|             result = idx.union(idx[:1], sort=None) | ||||
|         tm.assert_index_equal(result, idx) | ||||
|  | ||||
|         # sort=False | ||||
|         result = idx.union(idx[:1], sort=False) | ||||
|         tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     def test_union_sort_other_incomparable_true(self): | ||||
|         idx = Index([1, pd.Timestamp("2000")]) | ||||
|         with pytest.raises(TypeError, match=".*"): | ||||
|             idx.union(idx[:1], sort=True) | ||||
|  | ||||
|     def test_intersection_equal_sort_true(self): | ||||
|         idx = Index(["c", "a", "b"]) | ||||
|         sorted_ = Index(["a", "b", "c"]) | ||||
|         tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) | ||||
|  | ||||
|     def test_intersection_base(self, sort): | ||||
|         # (same results for py2 and py3 but sortedness not tested elsewhere) | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[:5] | ||||
|         second = index[:3] | ||||
|  | ||||
|         expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1]) | ||||
|         result = first.intersection(second, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [np.array, Series, list]) | ||||
|     def test_intersection_different_type_base(self, klass, sort): | ||||
|         # GH 10149 | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[:5] | ||||
|         second = index[:3] | ||||
|  | ||||
|         result = first.intersection(klass(second.values), sort=sort) | ||||
|         assert equal_contents(result, second) | ||||
|  | ||||
|     def test_intersection_nosort(self): | ||||
|         result = Index(["c", "b", "a"]).intersection(["b", "a"]) | ||||
|         expected = Index(["b", "a"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection_equal_sort(self): | ||||
|         idx = Index(["c", "a", "b"]) | ||||
|         tm.assert_index_equal(idx.intersection(idx, sort=False), idx) | ||||
|         tm.assert_index_equal(idx.intersection(idx, sort=None), idx) | ||||
|  | ||||
|     def test_intersection_str_dates(self, sort): | ||||
|         dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] | ||||
|  | ||||
|         i1 = Index(dt_dates, dtype=object) | ||||
|         i2 = Index(["aa"], dtype=object) | ||||
|         result = i2.intersection(i1, sort=sort) | ||||
|  | ||||
|         assert len(result) == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index2,expected_arr", | ||||
|         [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])], | ||||
|     ) | ||||
|     def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): | ||||
|         # non-monotonic non-unique | ||||
|         index1 = Index(["A", "B", "A", "C"]) | ||||
|         expected = Index(expected_arr) | ||||
|         result = index1.intersection(index2, sort=sort) | ||||
|         if sort is None: | ||||
|             expected = expected.sort_values() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_difference_base(self, sort): | ||||
|         # (same results for py2 and py3 but sortedness not tested elsewhere) | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[:4] | ||||
|         second = index[3:] | ||||
|  | ||||
|         result = first.difference(second, sort) | ||||
|         expected = Index([0, "a", 1]) | ||||
|         if sort is None: | ||||
|             expected = Index(safe_sort(expected)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_symmetric_difference(self): | ||||
|         # (same results for py2 and py3 but sortedness not tested elsewhere) | ||||
|         index = Index([0, "a", 1, "b", 2, "c"]) | ||||
|         first = index[:4] | ||||
|         second = index[3:] | ||||
|  | ||||
|         result = first.symmetric_difference(second) | ||||
|         expected = Index([0, 1, 2, "a", "c"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "method,expected,sort", | ||||
|         [ | ||||
|             ( | ||||
|                 "intersection", | ||||
|                 np.array( | ||||
|                     [(1, "A"), (2, "A"), (1, "B"), (2, "B")], | ||||
|                     dtype=[("num", int), ("let", "S1")], | ||||
|                 ), | ||||
|                 False, | ||||
|             ), | ||||
|             ( | ||||
|                 "intersection", | ||||
|                 np.array( | ||||
|                     [(1, "A"), (1, "B"), (2, "A"), (2, "B")], | ||||
|                     dtype=[("num", int), ("let", "S1")], | ||||
|                 ), | ||||
|                 None, | ||||
|             ), | ||||
|             ( | ||||
|                 "union", | ||||
|                 np.array( | ||||
|                     [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], | ||||
|                     dtype=[("num", int), ("let", "S1")], | ||||
|                 ), | ||||
|                 None, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_tuple_union_bug(self, method, expected, sort): | ||||
|         index1 = Index( | ||||
|             np.array( | ||||
|                 [(1, "A"), (2, "A"), (1, "B"), (2, "B")], | ||||
|                 dtype=[("num", int), ("let", "S1")], | ||||
|             ) | ||||
|         ) | ||||
|         index2 = Index( | ||||
|             np.array( | ||||
|                 [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], | ||||
|                 dtype=[("num", int), ("let", "S1")], | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         result = getattr(index1, method)(index2, sort=sort) | ||||
|         assert result.ndim == 1 | ||||
|  | ||||
|         expected = Index(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("first_list", [["b", "a"], []]) | ||||
|     @pytest.mark.parametrize("second_list", [["a", "b"], []]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "first_name, second_name, expected_name", | ||||
|         [("A", "B", None), (None, "B", None), ("A", None, None)], | ||||
|     ) | ||||
|     def test_union_name_preservation( | ||||
|         self, first_list, second_list, first_name, second_name, expected_name, sort | ||||
|     ): | ||||
|         first = Index(first_list, name=first_name) | ||||
|         second = Index(second_list, name=second_name) | ||||
|         union = first.union(second, sort=sort) | ||||
|  | ||||
|         vals = set(first_list).union(second_list) | ||||
|  | ||||
|         if sort is None and len(first_list) > 0 and len(second_list) > 0: | ||||
|             expected = Index(sorted(vals), name=expected_name) | ||||
|             tm.assert_index_equal(union, expected) | ||||
|         else: | ||||
|             expected = Index(vals, name=expected_name) | ||||
|             tm.assert_index_equal(union.sort_values(), expected.sort_values()) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "diff_type, expected", | ||||
|         [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]], | ||||
|     ) | ||||
|     def test_difference_object_type(self, diff_type, expected): | ||||
|         # GH 13432 | ||||
|         idx1 = Index([0, 1, "A", "B"]) | ||||
|         idx2 = Index([0, 2, "A", "C"]) | ||||
|         result = getattr(idx1, diff_type)(idx2) | ||||
|         expected = Index(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,13 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where_intlike_str_doesnt_cast_ints(self): | ||||
|         idx = Index(range(3)) | ||||
|         mask = np.array([True, False, True]) | ||||
|         res = idx.where(mask, "2") | ||||
|         expected = Index([0, "2", 2]) | ||||
|         tm.assert_index_equal(res, expected) | ||||
| @ -0,0 +1,62 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAppend: | ||||
|     @pytest.fixture | ||||
|     def ci(self): | ||||
|         categories = list("cab") | ||||
|         return CategoricalIndex(list("aabbca"), categories=categories, ordered=False) | ||||
|  | ||||
|     def test_append(self, ci): | ||||
|         # append cats with the same categories | ||||
|         result = ci[:3].append(ci[3:]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         foos = [ci[:1], ci[1:3], ci[3:]] | ||||
|         result = foos[0].append(foos[1:]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|     def test_append_empty(self, ci): | ||||
|         # empty | ||||
|         result = ci.append([]) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|     def test_append_mismatched_categories(self, ci): | ||||
|         # appending with different categories or reordered is not ok | ||||
|         msg = "all inputs must be Index" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci.append(ci.values.set_categories(list("abcd"))) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci.append(ci.values.reorder_categories(list("abc"))) | ||||
|  | ||||
|     def test_append_category_objects(self, ci): | ||||
|         # with objects | ||||
|         result = ci.append(Index(["c", "a"])) | ||||
|         expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_non_categories(self, ci): | ||||
|         # invalid objects -> cast to object via concat_compat | ||||
|         result = ci.append(Index(["a", "d"])) | ||||
|         expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"]) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_object(self, ci): | ||||
|         # GH#14298 - if base object is not categorical -> coerce to object | ||||
|         result = Index(["c", "a"]).append(ci) | ||||
|         expected = Index(list("caaabbca")) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_append_to_another(self): | ||||
|         # hits Index._concat | ||||
|         fst = Index(["a", "b"]) | ||||
|         snd = CategoricalIndex(["d", "e"]) | ||||
|         result = fst.append(snd) | ||||
|         expected = Index(["a", "b", "d", "e"]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,90 @@ | ||||
| from datetime import date | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     def test_astype(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|         result = ci.astype(object) | ||||
|         tm.assert_index_equal(result, Index(np.array(ci), dtype=object)) | ||||
|  | ||||
|         # this IS equal, but not the same class | ||||
|         assert result.equals(ci) | ||||
|         assert isinstance(result, Index) | ||||
|         assert not isinstance(result, CategoricalIndex) | ||||
|  | ||||
|         # interval | ||||
|         ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right") | ||||
|  | ||||
|         ci = CategoricalIndex( | ||||
|             Categorical.from_codes([0, 1, -1], categories=ii, ordered=True) | ||||
|         ) | ||||
|  | ||||
|         result = ci.astype("interval") | ||||
|         expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = IntervalIndex(result.values) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("name", [None, "foo"]) | ||||
|     @pytest.mark.parametrize("dtype_ordered", [True, False]) | ||||
|     @pytest.mark.parametrize("index_ordered", [True, False]) | ||||
|     def test_astype_category(self, name, dtype_ordered, index_ordered): | ||||
|         # GH#18630 | ||||
|         index = CategoricalIndex( | ||||
|             list("aabbca"), categories=list("cab"), ordered=index_ordered | ||||
|         ) | ||||
|         if name: | ||||
|             index = index.rename(name) | ||||
|  | ||||
|         # standard categories | ||||
|         dtype = CategoricalDtype(ordered=dtype_ordered) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex( | ||||
|             index.tolist(), | ||||
|             name=name, | ||||
|             categories=index.categories, | ||||
|             ordered=dtype_ordered, | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # non-standard categories | ||||
|         dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         if dtype_ordered is False: | ||||
|             # dtype='category' can't specify ordered, so only test once | ||||
|             result = index.astype("category") | ||||
|             expected = index | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [True, False]) | ||||
|     def test_categorical_date_roundtrip(self, box): | ||||
|         # astype to categorical and back should preserve date objects | ||||
|         v = date.today() | ||||
|  | ||||
|         obj = Index([v, v]) | ||||
|         assert obj.dtype == object | ||||
|         if box: | ||||
|             obj = obj.array | ||||
|  | ||||
|         cat = obj.astype("category") | ||||
|  | ||||
|         rtrip = cat.astype(object) | ||||
|         assert rtrip.dtype == object | ||||
|         assert type(rtrip[0]) is date | ||||
| @ -0,0 +1,391 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
| from pandas._libs.arrays import NDArrayBacked | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.indexes.api import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndex: | ||||
|     @pytest.fixture | ||||
|     def simple_index(self) -> CategoricalIndex: | ||||
|         return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|     def test_can_hold_identifiers(self): | ||||
|         idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False) | ||||
|         key = idx[0] | ||||
|         assert idx._can_hold_identifiers_and_holds_name(key) is True | ||||
|  | ||||
|     def test_insert(self, simple_index): | ||||
|         ci = simple_index | ||||
|         categories = ci.categories | ||||
|  | ||||
|         # test 0th element | ||||
|         result = ci.insert(0, "a") | ||||
|         expected = CategoricalIndex(list("aaabbca"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # test Nth element that follows Python list behavior | ||||
|         result = ci.insert(-1, "a") | ||||
|         expected = CategoricalIndex(list("aabbcaa"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # test empty | ||||
|         result = CategoricalIndex([], categories=categories).insert(0, "a") | ||||
|         expected = CategoricalIndex(["a"], categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # invalid -> cast to object | ||||
|         expected = ci.astype(object).insert(0, "d") | ||||
|         result = ci.insert(0, "d").astype(object) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # GH 18295 (test missing) | ||||
|         expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"]) | ||||
|         for na in (np.nan, pd.NaT, None): | ||||
|             result = CategoricalIndex(list("aabcb")).insert(1, na) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_na_mismatched_dtype(self): | ||||
|         ci = CategoricalIndex([0, 1, 1]) | ||||
|         result = ci.insert(0, pd.NaT) | ||||
|         expected = Index([pd.NaT, 0, 1, 1], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_delete(self, simple_index): | ||||
|         ci = simple_index | ||||
|         categories = ci.categories | ||||
|  | ||||
|         result = ci.delete(0) | ||||
|         expected = CategoricalIndex(list("abbca"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         result = ci.delete(-1) | ||||
|         expected = CategoricalIndex(list("aabbc"), categories=categories) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         with tm.external_error_raised((IndexError, ValueError)): | ||||
|             # Either depending on NumPy version | ||||
|             ci.delete(10) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, non_lexsorted_data", | ||||
|         [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], | ||||
|     ) | ||||
|     def test_is_monotonic(self, data, non_lexsorted_data): | ||||
|         c = CategoricalIndex(data) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(data, ordered=True) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(data, categories=reversed(data)) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is True | ||||
|  | ||||
|         c = CategoricalIndex(data, categories=reversed(data), ordered=True) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is True | ||||
|  | ||||
|         # test when data is neither monotonic increasing nor decreasing | ||||
|         reordered_data = [data[0], data[2], data[1]] | ||||
|         c = CategoricalIndex(reordered_data, categories=reversed(data)) | ||||
|         assert c.is_monotonic_increasing is False | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         # non lexsorted categories | ||||
|         categories = non_lexsorted_data | ||||
|  | ||||
|         c = CategoricalIndex(categories[:2], categories=categories) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|         c = CategoricalIndex(categories[1:3], categories=categories) | ||||
|         assert c.is_monotonic_increasing is True | ||||
|         assert c.is_monotonic_decreasing is False | ||||
|  | ||||
|     def test_has_duplicates(self): | ||||
|         idx = CategoricalIndex([0, 0, 0], name="foo") | ||||
|         assert idx.is_unique is False | ||||
|         assert idx.has_duplicates is True | ||||
|  | ||||
|         idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo") | ||||
|         assert idx.is_unique is False | ||||
|         assert idx.has_duplicates is True | ||||
|  | ||||
|         idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo") | ||||
|         assert idx.is_unique is True | ||||
|         assert idx.has_duplicates is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, categories, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 [1, 1, 1], | ||||
|                 [1, 2, 3], | ||||
|                 { | ||||
|                     "first": np.array([False, True, True]), | ||||
|                     "last": np.array([True, True, False]), | ||||
|                     False: np.array([True, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 [1, 1, 1], | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.array([False, True, True]), | ||||
|                     "last": np.array([True, True, False]), | ||||
|                     False: np.array([True, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 [2, "a", "b"], | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.zeros(shape=(3), dtype=np.bool_), | ||||
|                     "last": np.zeros(shape=(3), dtype=np.bool_), | ||||
|                     False: np.zeros(shape=(3), dtype=np.bool_), | ||||
|                 }, | ||||
|             ), | ||||
|             ( | ||||
|                 list("abb"), | ||||
|                 list("abc"), | ||||
|                 { | ||||
|                     "first": np.array([False, False, True]), | ||||
|                     "last": np.array([False, True, False]), | ||||
|                     False: np.array([False, True, True]), | ||||
|                 }, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_drop_duplicates(self, data, categories, expected): | ||||
|         idx = CategoricalIndex(data, categories=categories, name="foo") | ||||
|         for keep, e in expected.items(): | ||||
|             tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) | ||||
|             e = idx[~e] | ||||
|             result = idx.drop_duplicates(keep=keep) | ||||
|             tm.assert_index_equal(result, e) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, categories, expected_data", | ||||
|         [ | ||||
|             ([1, 1, 1], [1, 2, 3], [1]), | ||||
|             ([1, 1, 1], list("abc"), [np.nan]), | ||||
|             ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]), | ||||
|             ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_unique(self, data, categories, expected_data, ordered): | ||||
|         dtype = CategoricalDtype(categories, ordered=ordered) | ||||
|  | ||||
|         idx = CategoricalIndex(data, dtype=dtype) | ||||
|         expected = CategoricalIndex(expected_data, dtype=dtype) | ||||
|         tm.assert_index_equal(idx.unique(), expected) | ||||
|  | ||||
|     def test_repr_roundtrip(self): | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         str(ci) | ||||
|         tm.assert_index_equal(eval(repr(ci)), ci, exact=True) | ||||
|  | ||||
|         # formatting | ||||
|         str(ci) | ||||
|  | ||||
|         # long format | ||||
|         # this is not reprable | ||||
|         ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100)) | ||||
|         str(ci) | ||||
|  | ||||
|     def test_isin(self): | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c"]), np.array([False, False, False, True, False, False]) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False]) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6) | ||||
|         ) | ||||
|  | ||||
|         # mismatched categorical -> coerced to ndarray so doesn't matter | ||||
|         result = ci.isin(ci.set_categories(list("abcdefghi"))) | ||||
|         expected = np.array([True] * 6) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = ci.isin(ci.set_categories(list("defghi"))) | ||||
|         expected = np.array([False] * 5 + [True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_isin_overlapping_intervals(self): | ||||
|         # GH 34974 | ||||
|         idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)]) | ||||
|         result = CategoricalIndex(idx).isin(idx) | ||||
|         expected = np.array([True, True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_identical(self): | ||||
|         ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) | ||||
|         assert ci1.identical(ci1) | ||||
|         assert ci1.identical(ci1.copy()) | ||||
|         assert not ci1.identical(ci2) | ||||
|  | ||||
|     def test_ensure_copied_data(self): | ||||
|         # gh-12309: Check the "copy" argument of each | ||||
|         # Index.__new__ is honored. | ||||
|         # | ||||
|         # Must be tested separately from other indexes because | ||||
|         # self.values is not an ndarray. | ||||
|         index = CategoricalIndex(list("ab") * 5) | ||||
|  | ||||
|         result = CategoricalIndex(index.values, copy=True) | ||||
|         tm.assert_index_equal(index, result) | ||||
|         assert not np.shares_memory(result._data._codes, index._data._codes) | ||||
|  | ||||
|         result = CategoricalIndex(index.values, copy=False) | ||||
|         assert result._data._codes is index._data._codes | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndex2: | ||||
|     def test_view_i8(self): | ||||
|         # GH#25464 | ||||
|         ci = CategoricalIndex(list("ab") * 50) | ||||
|         msg = "When changing to a larger dtype, its size must be a divisor" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.view("i8") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci._data.view("i8") | ||||
|  | ||||
|         ci = ci[:-4]  # length divisible by 8 | ||||
|  | ||||
|         res = ci.view("i8") | ||||
|         expected = ci._data.codes.view("i8") | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         cat = ci._data | ||||
|         tm.assert_numpy_array_equal(cat.view("i8"), expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, engine_type", | ||||
|         [ | ||||
|             (np.int8, libindex.Int8Engine), | ||||
|             (np.int16, libindex.Int16Engine), | ||||
|             (np.int32, libindex.Int32Engine), | ||||
|             (np.int64, libindex.Int64Engine), | ||||
|         ], | ||||
|     ) | ||||
|     def test_engine_type(self, dtype, engine_type): | ||||
|         if dtype != np.int64: | ||||
|             # num. of uniques required to push CategoricalIndex.codes to a | ||||
|             # dtype (128 categories required for .codes dtype to be int16 etc.) | ||||
|             num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] | ||||
|             ci = CategoricalIndex(range(num_uniques)) | ||||
|         else: | ||||
|             # having 2**32 - 2**31 categories would be very memory-intensive, | ||||
|             # so we cheat a bit with the dtype | ||||
|             ci = CategoricalIndex(range(32768))  # == 2**16 - 2**(16 - 1) | ||||
|             arr = ci.values._ndarray.astype("int64") | ||||
|             NDArrayBacked.__init__(ci._data, arr, ci.dtype) | ||||
|         assert np.issubdtype(ci.codes.dtype, dtype) | ||||
|         assert isinstance(ci._engine, engine_type) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "func,op_name", | ||||
|         [ | ||||
|             (lambda idx: idx - idx, "__sub__"), | ||||
|             (lambda idx: idx + idx, "__add__"), | ||||
|             (lambda idx: idx - ["a", "b"], "__sub__"), | ||||
|             (lambda idx: idx + ["a", "b"], "__add__"), | ||||
|             (lambda idx: ["a", "b"] - idx, "__rsub__"), | ||||
|             (lambda idx: ["a", "b"] + idx, "__radd__"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_disallow_addsub_ops(self, func, op_name): | ||||
|         # GH 10039 | ||||
|         # set ops (+/-) raise TypeError | ||||
|         idx = Index(Categorical(["a", "b"])) | ||||
|         cat_or_list = "'(Categorical|list)' and '(Categorical|list)'" | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 f"cannot perform {op_name} with this index type: CategoricalIndex", | ||||
|                 "can only concatenate list", | ||||
|                 rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             func(idx) | ||||
|  | ||||
|     def test_method_delegation(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.set_categories(list("cab")) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.rename_categories(list("efg")) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("ffggef"), categories=list("efg")) | ||||
|         ) | ||||
|  | ||||
|         # GH18862 (let rename_categories take callables) | ||||
|         result = ci.rename_categories(lambda x: x.upper()) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("AABBCA"), categories=list("CAB")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.add_categories(["d"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, CategoricalIndex(list("aabbca"), categories=list("cabd")) | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         result = ci.remove_categories(["c"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), | ||||
|         ) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.as_unordered() | ||||
|         tm.assert_index_equal(result, ci) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) | ||||
|         result = ci.as_ordered() | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), | ||||
|         ) | ||||
|  | ||||
|         # invalid | ||||
|         msg = "cannot use inplace with CategoricalIndex" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.set_categories(list("cab"), inplace=True) | ||||
|  | ||||
|     def test_remove_maintains_order(self): | ||||
|         ci = CategoricalIndex(list("abcdda"), categories=list("abcd")) | ||||
|         result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True), | ||||
|         ) | ||||
|         result = result.remove_categories(["c"]) | ||||
|         tm.assert_index_equal( | ||||
|             result, | ||||
|             CategoricalIndex( | ||||
|                 ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True | ||||
|             ), | ||||
|         ) | ||||
| @ -0,0 +1,142 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexConstructors: | ||||
|     def test_construction_disallows_scalar(self): | ||||
|         msg = "must be called with a collection of some kind" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             CategoricalIndex(data=1, categories=list("abcd"), ordered=False) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             CategoricalIndex(categories=list("abcd"), ordered=False) | ||||
|  | ||||
|     def test_construction(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False) | ||||
|         categories = ci.categories | ||||
|  | ||||
|         result = Index(ci) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = Index(ci.values) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         # empty | ||||
|         result = CategoricalIndex([], categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8")) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         # passing categories | ||||
|         result = CategoricalIndex(list("aabbca"), categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|  | ||||
|         c = Categorical(list("aabbca")) | ||||
|         result = CategoricalIndex(c) | ||||
|         tm.assert_index_equal(result.categories, Index(list("abc"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(c, categories=categories) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         ci = CategoricalIndex(c, categories=list("abcd")) | ||||
|         result = CategoricalIndex(ci) | ||||
|         tm.assert_index_equal(result.categories, Index(categories)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab")) | ||||
|         tm.assert_index_equal(result.categories, Index(list("ab"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") | ||||
|         ) | ||||
|         assert not result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab"), ordered=True) | ||||
|         tm.assert_index_equal(result.categories, Index(list("ab"))) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") | ||||
|         ) | ||||
|         assert result.ordered | ||||
|  | ||||
|         result = CategoricalIndex(ci, categories=list("ab"), ordered=True) | ||||
|         expected = CategoricalIndex( | ||||
|             ci, categories=list("ab"), ordered=True, dtype="category" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # turn me to an Index | ||||
|         result = Index(np.array(ci)) | ||||
|         assert isinstance(result, Index) | ||||
|         assert not isinstance(result, CategoricalIndex) | ||||
|  | ||||
|     def test_construction_with_dtype(self): | ||||
|         # specify dtype | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False) | ||||
|  | ||||
|         result = Index(np.array(ci), dtype="category") | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         result = Index(np.array(ci).tolist(), dtype="category") | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         # these are generally only equal when the categories are reordered | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|  | ||||
|         result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories) | ||||
|         tm.assert_index_equal(result, ci, exact=True) | ||||
|  | ||||
|         # make sure indexes are handled | ||||
|         idx = Index(range(3)) | ||||
|         expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True) | ||||
|         result = CategoricalIndex(idx, categories=idx, ordered=True) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|     def test_construction_empty_with_bool_categories(self): | ||||
|         # see GH#22702 | ||||
|         cat = CategoricalIndex([], categories=[True, False]) | ||||
|         categories = sorted(cat.categories.tolist()) | ||||
|         assert categories == [False, True] | ||||
|  | ||||
|     def test_construction_with_categorical_dtype(self): | ||||
|         # construction with CategoricalDtype | ||||
|         # GH#18109 | ||||
|         data, cats, ordered = "a a b b".split(), "c b a".split(), True | ||||
|         dtype = CategoricalDtype(categories=cats, ordered=ordered) | ||||
|  | ||||
|         result = CategoricalIndex(data, dtype=dtype) | ||||
|         expected = CategoricalIndex(data, categories=cats, ordered=ordered) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # GH#19032 | ||||
|         result = Index(data, dtype=dtype) | ||||
|         tm.assert_index_equal(result, expected, exact=True) | ||||
|  | ||||
|         # error when combining categories/ordered and dtype kwargs | ||||
|         msg = "Cannot specify `categories` or `ordered` together with `dtype`." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             CategoricalIndex(data, categories=cats, dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             CategoricalIndex(data, ordered=ordered, dtype=dtype) | ||||
| @ -0,0 +1,96 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestEquals: | ||||
|     def test_equals_categorical(self): | ||||
|         ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) | ||||
|  | ||||
|         assert ci1.equals(ci1) | ||||
|         assert not ci1.equals(ci2) | ||||
|         assert ci1.equals(ci1.astype(object)) | ||||
|         assert ci1.astype(object).equals(ci1) | ||||
|  | ||||
|         assert (ci1 == ci1).all() | ||||
|         assert not (ci1 != ci1).all() | ||||
|         assert not (ci1 > ci1).all() | ||||
|         assert not (ci1 < ci1).all() | ||||
|         assert (ci1 <= ci1).all() | ||||
|         assert (ci1 >= ci1).all() | ||||
|  | ||||
|         assert not (ci1 == 1).all() | ||||
|         assert (ci1 == Index(["a", "b"])).all() | ||||
|         assert (ci1 == ci1.values).all() | ||||
|  | ||||
|         # invalid comparisons | ||||
|         with pytest.raises(ValueError, match="Lengths must match"): | ||||
|             ci1 == Index(["a", "b", "c"]) | ||||
|  | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == ci2 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == Categorical(ci1.values, ordered=False) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ci1 == Categorical(ci1.values, categories=list("abc")) | ||||
|  | ||||
|         # tests | ||||
|         # make sure that we are testing for category inclusion properly | ||||
|         ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca")) | ||||
|         # Same categories, but different order | ||||
|         # Unordered | ||||
|         assert ci.equals(CategoricalIndex(list("aabca"))) | ||||
|         # Ordered | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca")) | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca"))) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) | ||||
|         assert not ci.equals(list("aabca") + [np.nan]) | ||||
|         assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) | ||||
|         assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True)) | ||||
|         assert ci.equals(ci.copy()) | ||||
|  | ||||
|     def test_equals_categorical_unordered(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16603 | ||||
|         a = CategoricalIndex(["A"], categories=["A", "B"]) | ||||
|         b = CategoricalIndex(["A"], categories=["B", "A"]) | ||||
|         c = CategoricalIndex(["C"], categories=["B", "A"]) | ||||
|         assert a.equals(b) | ||||
|         assert not a.equals(c) | ||||
|         assert not b.equals(c) | ||||
|  | ||||
|     def test_equals_non_category(self): | ||||
|         # GH#37667 Case where other contains a value not among ci's | ||||
|         #  categories ("D") and also contains np.nan | ||||
|         ci = CategoricalIndex(["A", "B", np.nan, np.nan]) | ||||
|         other = Index(["A", "B", "D", np.nan]) | ||||
|  | ||||
|         assert not ci.equals(other) | ||||
|  | ||||
|     def test_equals_multiindex(self): | ||||
|         # dont raise NotImplementedError when calling is_dtype_compat | ||||
|  | ||||
|         mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)]) | ||||
|         ci = mi.to_flat_index().astype("category") | ||||
|  | ||||
|         assert not ci.equals(mi) | ||||
|  | ||||
|     def test_equals_string_dtype(self, any_string_dtype): | ||||
|         # GH#55364 | ||||
|         idx = CategoricalIndex(list("abc"), name="B") | ||||
|         other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype) | ||||
|         assert idx.equals(other) | ||||
| @ -0,0 +1,54 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import CategoricalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestFillNA: | ||||
|     def test_fillna_categorical(self): | ||||
|         # GH#11343 | ||||
|         idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") | ||||
|         # fill by value in categories | ||||
|         exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") | ||||
|         tm.assert_index_equal(idx.fillna(1.0), exp) | ||||
|  | ||||
|         cat = idx._data | ||||
|  | ||||
|         # fill by value not in categories raises TypeError on EA, casts on CI | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(2.0) | ||||
|  | ||||
|         result = idx.fillna(2.0) | ||||
|         expected = idx.astype(object).fillna(2.0) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_copies_with_no_nas(self): | ||||
|         # Nothing to fill, should still get a copy for the Categorical method, | ||||
|         #  but OK to get a view on CategoricalIndex method | ||||
|         ci = CategoricalIndex([0, 1, 1]) | ||||
|         result = ci.fillna(0) | ||||
|         assert result is not ci | ||||
|         assert tm.shares_memory(result, ci) | ||||
|  | ||||
|         # But at the EA level we always get a copy. | ||||
|         cat = ci._data | ||||
|         result = cat.fillna(0) | ||||
|         assert result._ndarray is not cat._ndarray | ||||
|         assert result._ndarray.base is None | ||||
|         assert not tm.shares_memory(result, cat) | ||||
|  | ||||
|     def test_fillna_validates_with_no_nas(self): | ||||
|         # We validate the fill value even if fillna is a no-op | ||||
|         ci = CategoricalIndex([2, 3, 3]) | ||||
|         cat = ci._data | ||||
|  | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         res = ci.fillna(False) | ||||
|         # nothing to fill, so we dont cast | ||||
|         tm.assert_index_equal(res, ci) | ||||
|  | ||||
|         # Same check directly on the Categorical | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(False) | ||||
| @ -0,0 +1,120 @@ | ||||
| """ | ||||
| Tests for CategoricalIndex.__repr__ and related methods. | ||||
| """ | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
| import pandas._config.config as cf | ||||
|  | ||||
| from pandas import CategoricalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexRepr: | ||||
|     def test_format_different_scalar_lengths(self): | ||||
|         # GH#35439 | ||||
|         idx = CategoricalIndex(["aaaaaaaaa", "b"]) | ||||
|         expected = ["aaaaaaaaa", "b"] | ||||
|         msg = r"CategoricalIndex\.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert idx.format() == expected | ||||
|  | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="repr different") | ||||
|     def test_string_categorical_index_repr(self): | ||||
|         # short | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"]) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # multiple lines | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"] * 10) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', | ||||
|                   'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', | ||||
|                   'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], | ||||
|                  categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # truncated | ||||
|         idx = CategoricalIndex(["a", "bb", "ccc"] * 100) | ||||
|         expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', | ||||
|                   ... | ||||
|                   'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], | ||||
|                  categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # larger categories | ||||
|         idx = CategoricalIndex(list("abcdefghijklmmo")) | ||||
|         expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', | ||||
|                   'm', 'm', 'o'], | ||||
|                  categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # short | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"]) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # multiple lines | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', | ||||
|                   'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # truncated | ||||
|         idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) | ||||
|         expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', | ||||
|                   ... | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # larger categories | ||||
|         idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) | ||||
|         expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', | ||||
|                   'す', 'せ', 'そ'], | ||||
|                  categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(idx) == expected | ||||
|  | ||||
|         # Enable Unicode option ----------------------------------------- | ||||
|         with cf.option_context("display.unicode.east_asian_width", True): | ||||
|             # short | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"]) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # multiple lines | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"] * 10) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', | ||||
|                   'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # truncated | ||||
|             idx = CategoricalIndex(["あ", "いい", "ううう"] * 100) | ||||
|             expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', | ||||
|                   'ううう', 'あ', | ||||
|                   ... | ||||
|                   'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', | ||||
|                   'あ', 'いい', 'ううう'], | ||||
|                  categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
|  | ||||
|             # larger categories | ||||
|             idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ")) | ||||
|             expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', | ||||
|                   'さ', 'し', 'す', 'せ', 'そ'], | ||||
|                  categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(idx) == expected | ||||
| @ -0,0 +1,420 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take_fill_value(self): | ||||
|         # GH 12631 | ||||
|  | ||||
|         # numeric category | ||||
|         idx = CategoricalIndex([1, 2, 3], name="xxx") | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = CategoricalIndex([2, 1, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = CategoricalIndex([2, 1, 3], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # object category | ||||
|         idx = CategoricalIndex( | ||||
|             list("CBA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = CategoricalIndex( | ||||
|             list("BCA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = CategoricalIndex( | ||||
|             ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = CategoricalIndex( | ||||
|             list("BCA"), categories=list("ABC"), ordered=True, name="xxx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         tm.assert_categorical_equal(result.values, expected.values) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "index -5 is out of bounds for (axis 0 with )?size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|     def test_take_fill_value_datetime(self): | ||||
|         # datetime category | ||||
|         idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") | ||||
|         idx = CategoricalIndex(idx) | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = pd.DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" | ||||
|         ) | ||||
|         expected = CategoricalIndex(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") | ||||
|         exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) | ||||
|         expected = CategoricalIndex(expected, categories=exp_cats) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = pd.DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" | ||||
|         ) | ||||
|         expected = CategoricalIndex(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "index -5 is out of bounds for (axis 0 with )?size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|     def test_take_invalid_kwargs(self): | ||||
|         idx = CategoricalIndex([1, 2, 3], name="foo") | ||||
|         indices = [1, 0, -1] | ||||
|  | ||||
|         msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             idx.take(indices, foo=2) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, out=indices) | ||||
|  | ||||
|         msg = "the 'mode' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, mode="clip") | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     def test_get_loc(self): | ||||
|         # GH 12531 | ||||
|         cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) | ||||
|         idx1 = Index(list("abcde")) | ||||
|         assert cidx1.get_loc("a") == idx1.get_loc("a") | ||||
|         assert cidx1.get_loc("e") == idx1.get_loc("e") | ||||
|  | ||||
|         for i in [cidx1, idx1]: | ||||
|             with pytest.raises(KeyError, match="'NOT-EXIST'"): | ||||
|                 i.get_loc("NOT-EXIST") | ||||
|  | ||||
|         # non-unique | ||||
|         cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) | ||||
|         idx2 = Index(list("aacded")) | ||||
|  | ||||
|         # results in bool array | ||||
|         res = cidx2.get_loc("d") | ||||
|         tm.assert_numpy_array_equal(res, idx2.get_loc("d")) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             res, np.array([False, False, False, True, False, True]) | ||||
|         ) | ||||
|         # unique element results in scalar | ||||
|         res = cidx2.get_loc("e") | ||||
|         assert res == idx2.get_loc("e") | ||||
|         assert res == 4 | ||||
|  | ||||
|         for i in [cidx2, idx2]: | ||||
|             with pytest.raises(KeyError, match="'NOT-EXIST'"): | ||||
|                 i.get_loc("NOT-EXIST") | ||||
|  | ||||
|         # non-unique, sliceable | ||||
|         cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) | ||||
|         idx3 = Index(list("aabbb")) | ||||
|  | ||||
|         # results in slice | ||||
|         res = cidx3.get_loc("a") | ||||
|         assert res == idx3.get_loc("a") | ||||
|         assert res == slice(0, 2, None) | ||||
|  | ||||
|         res = cidx3.get_loc("b") | ||||
|         assert res == idx3.get_loc("b") | ||||
|         assert res == slice(2, 5, None) | ||||
|  | ||||
|         for i in [cidx3, idx3]: | ||||
|             with pytest.raises(KeyError, match="'c'"): | ||||
|                 i.get_loc("c") | ||||
|  | ||||
|     def test_get_loc_unique(self): | ||||
|         cidx = CategoricalIndex(list("abc")) | ||||
|         result = cidx.get_loc("b") | ||||
|         assert result == 1 | ||||
|  | ||||
|     def test_get_loc_monotonic_nonunique(self): | ||||
|         cidx = CategoricalIndex(list("abbc")) | ||||
|         result = cidx.get_loc("b") | ||||
|         expected = slice(1, 3, None) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_get_loc_nonmonotonic_nonunique(self): | ||||
|         cidx = CategoricalIndex(list("abcb")) | ||||
|         result = cidx.get_loc("b") | ||||
|         expected = np.array([False, True, False, True], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_loc_nan(self): | ||||
|         # GH#41933 | ||||
|         ci = CategoricalIndex(["A", "B", np.nan]) | ||||
|         res = ci.get_loc(np.nan) | ||||
|  | ||||
|         assert res == 2 | ||||
|  | ||||
|  | ||||
| class TestGetIndexer: | ||||
|     def test_get_indexer_base(self): | ||||
|         # Determined by cat ordering. | ||||
|         idx = CategoricalIndex(list("cab"), categories=list("cab")) | ||||
|         expected = np.arange(len(idx), dtype=np.intp) | ||||
|  | ||||
|         actual = idx.get_indexer(idx) | ||||
|         tm.assert_numpy_array_equal(expected, actual) | ||||
|  | ||||
|         with pytest.raises(ValueError, match="Invalid fill method"): | ||||
|             idx.get_indexer(idx, method="invalid") | ||||
|  | ||||
|     def test_get_indexer_requires_unique(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|         oidx = Index(np.array(ci)) | ||||
|  | ||||
|         msg = "Reindexing only valid with uniquely valued Index objects" | ||||
|  | ||||
|         for n in [1, 2, 5, len(ci)]: | ||||
|             finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)] | ||||
|  | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 ci.get_indexer(finder) | ||||
|  | ||||
|         # see gh-17323 | ||||
|         # | ||||
|         # Even when indexer is equal to the | ||||
|         # members in the index, we should | ||||
|         # respect duplicates instead of taking | ||||
|         # the fast-track path. | ||||
|         for finder in [list("aabbca"), list("aababca")]: | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 ci.get_indexer(finder) | ||||
|  | ||||
|     def test_get_indexer_non_unique(self): | ||||
|         idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) | ||||
|         idx2 = CategoricalIndex(list("abf")) | ||||
|  | ||||
|         for indexer in [idx2, list("abf"), Index(list("abf"))]: | ||||
|             msg = "Reindexing only valid with uniquely valued Index objects" | ||||
|             with pytest.raises(InvalidIndexError, match=msg): | ||||
|                 idx1.get_indexer(indexer) | ||||
|  | ||||
|             r1, _ = idx1.get_indexer_non_unique(indexer) | ||||
|             expected = np.array([0, 1, 2, -1], dtype=np.intp) | ||||
|             tm.assert_almost_equal(r1, expected) | ||||
|  | ||||
|     def test_get_indexer_method(self): | ||||
|         idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) | ||||
|         idx2 = CategoricalIndex(list("abf")) | ||||
|  | ||||
|         msg = "method pad not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="pad") | ||||
|         msg = "method backfill not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="backfill") | ||||
|  | ||||
|         msg = "method nearest not yet implemented for CategoricalIndex" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx2.get_indexer(idx1, method="nearest") | ||||
|  | ||||
|     def test_get_indexer_array(self): | ||||
|         arr = np.array( | ||||
|             [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] | ||||
|         ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") | ||||
|         result = ci.get_indexer(arr) | ||||
|         expected = np.array([0, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_same_categories_same_order(self): | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) | ||||
|  | ||||
|         result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"])) | ||||
|         expected = np.array([1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_same_categories_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/19551 | ||||
|         ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) | ||||
|  | ||||
|         result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) | ||||
|         expected = np.array([1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_nans_in_index_and_target(self): | ||||
|         # GH 45361 | ||||
|         ci = CategoricalIndex([1, 2, np.nan, 3]) | ||||
|         other1 = [2, 3, 4, np.nan] | ||||
|         res1 = ci.get_indexer(other1) | ||||
|         expected1 = np.array([1, 3, -1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res1, expected1) | ||||
|         other2 = [1, 4, 2, 3] | ||||
|         res2 = ci.get_indexer(other2) | ||||
|         expected2 = np.array([0, -1, 1, 3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res2, expected2) | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where(self, listlike_box): | ||||
|         klass = listlike_box | ||||
|  | ||||
|         i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) | ||||
|         cond = [True] * len(i) | ||||
|         expected = i | ||||
|         result = i.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         cond = [False] + [True] * (len(i) - 1) | ||||
|         expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories) | ||||
|         result = i.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_non_categories(self): | ||||
|         ci = CategoricalIndex(["a", "b", "c", "d"]) | ||||
|         mask = np.array([True, False, True, False]) | ||||
|  | ||||
|         result = ci.where(mask, 2) | ||||
|         expected = Index(["a", 2, "c", 2], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # Test the Categorical method directly | ||||
|             ci._data._where(mask, 2) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     def test_contains(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False) | ||||
|  | ||||
|         assert "a" in ci | ||||
|         assert "z" not in ci | ||||
|         assert "e" not in ci | ||||
|         assert np.nan not in ci | ||||
|  | ||||
|         # assert codes NOT in index | ||||
|         assert 0 not in ci | ||||
|         assert 1 not in ci | ||||
|  | ||||
|     def test_contains_nan(self): | ||||
|         ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) | ||||
|         assert np.nan in ci | ||||
|  | ||||
|     @pytest.mark.parametrize("unwrap", [True, False]) | ||||
|     def test_contains_na_dtype(self, unwrap): | ||||
|         dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT) | ||||
|         pi = dti.to_period("D") | ||||
|         tdi = dti - dti[-1] | ||||
|         ci = CategoricalIndex(dti) | ||||
|  | ||||
|         obj = ci | ||||
|         if unwrap: | ||||
|             obj = ci._data | ||||
|  | ||||
|         assert np.nan in obj | ||||
|         assert None in obj | ||||
|         assert pd.NaT in obj | ||||
|         assert np.datetime64("NaT") in obj | ||||
|         assert np.timedelta64("NaT") not in obj | ||||
|  | ||||
|         obj2 = CategoricalIndex(tdi) | ||||
|         if unwrap: | ||||
|             obj2 = obj2._data | ||||
|  | ||||
|         assert np.nan in obj2 | ||||
|         assert None in obj2 | ||||
|         assert pd.NaT in obj2 | ||||
|         assert np.datetime64("NaT") not in obj2 | ||||
|         assert np.timedelta64("NaT") in obj2 | ||||
|  | ||||
|         obj3 = CategoricalIndex(pi) | ||||
|         if unwrap: | ||||
|             obj3 = obj3._data | ||||
|  | ||||
|         assert np.nan in obj3 | ||||
|         assert None in obj3 | ||||
|         assert pd.NaT in obj3 | ||||
|         assert np.datetime64("NaT") not in obj3 | ||||
|         assert np.timedelta64("NaT") not in obj3 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "item, expected", | ||||
|         [ | ||||
|             (pd.Interval(0, 1), True), | ||||
|             (1.5, True), | ||||
|             (pd.Interval(0.5, 1.5), False), | ||||
|             ("a", False), | ||||
|             (Timestamp(1), False), | ||||
|             (pd.Timedelta(1), False), | ||||
|         ], | ||||
|         ids=str, | ||||
|     ) | ||||
|     def test_contains_interval(self, item, expected): | ||||
|         # GH 23705 | ||||
|         ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) | ||||
|         result = item in ci | ||||
|         assert result is expected | ||||
|  | ||||
|     def test_contains_list(self): | ||||
|         # GH#21729 | ||||
|         idx = CategoricalIndex([1, 2, 3]) | ||||
|  | ||||
|         assert "a" not in idx | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a"] in idx | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a", "b"] in idx | ||||
| @ -0,0 +1,144 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, categories", | ||||
|     [ | ||||
|         (list("abcbca"), list("cab")), | ||||
|         (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), | ||||
|     ], | ||||
|     ids=["string", "interval"], | ||||
| ) | ||||
| def test_map_str(data, categories, ordered): | ||||
|     # GH 31202 - override base class since we want to maintain categorical/ordered | ||||
|     index = CategoricalIndex(data, categories=categories, ordered=ordered) | ||||
|     result = index.map(str) | ||||
|     expected = CategoricalIndex( | ||||
|         map(str, data), categories=map(str, categories), ordered=ordered | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(): | ||||
|     ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|     result = ci.map(lambda x: x.lower()) | ||||
|     exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     ci = CategoricalIndex( | ||||
|         list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" | ||||
|     ) | ||||
|     result = ci.map(lambda x: x.lower()) | ||||
|     exp = CategoricalIndex( | ||||
|         list("ababc"), categories=list("bac"), ordered=False, name="XXX" | ||||
|     ) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     # GH 12766: Return an index not an array | ||||
|     tm.assert_index_equal( | ||||
|         ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") | ||||
|     ) | ||||
|  | ||||
|     # change categories dtype | ||||
|     ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|  | ||||
|     def f(x): | ||||
|         return {"A": 10, "B": 20, "C": 30}.get(x) | ||||
|  | ||||
|     result = ci.map(f) | ||||
|     exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     result = ci.map(Series([10, 20, 30], index=["A", "B", "C"])) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     result = ci.map({"A": 10, "B": 20, "C": 30}) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_map_with_categorical_series(): | ||||
|     # GH 12756 | ||||
|     a = Index([1, 2, 3, 4]) | ||||
|     b = Series(["even", "odd", "even", "odd"], dtype="category") | ||||
|     c = Series(["even", "odd", "even", "odd"]) | ||||
|  | ||||
|     exp = CategoricalIndex(["odd", "even", "odd", np.nan]) | ||||
|     tm.assert_index_equal(a.map(b), exp) | ||||
|     exp = Index(["odd", "even", "odd", np.nan]) | ||||
|     tm.assert_index_equal(a.map(c), exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), | ||||
|         ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             CategoricalIndex([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_ignore(data, f, expected):  # GH 24241 | ||||
|     values = CategoricalIndex(data) | ||||
|     result = values.map(f, na_action="ignore") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             CategoricalIndex([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_none(data, f, expected):  # GH 24241 | ||||
|     values = CategoricalIndex(data) | ||||
|     result = values.map(f, na_action=None) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_with_dict_or_series(): | ||||
|     orig_values = ["a", "B", 1, "a"] | ||||
|     new_values = ["one", 2, 3.0, "one"] | ||||
|     cur_index = CategoricalIndex(orig_values, name="XXX") | ||||
|     expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"]) | ||||
|  | ||||
|     mapper = Series(new_values[:-1], index=orig_values[:-1]) | ||||
|     result = cur_index.map(mapper) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     mapper = dict(zip(orig_values[:-1], new_values[:-1])) | ||||
|     result = cur_index.map(mapper) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,78 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestReindex: | ||||
|     def test_reindex_list_non_unique(self): | ||||
|         # GH#11586 | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(["a", "c"]) | ||||
|  | ||||
|     def test_reindex_categorical_non_unique(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(Categorical(["a", "c"])) | ||||
|  | ||||
|     def test_reindex_list_non_unique_unused_category(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(["a", "c"]) | ||||
|  | ||||
|     def test_reindex_categorical_non_unique_unused_category(self): | ||||
|         msg = "cannot reindex on an axis with duplicate labels" | ||||
|         ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ci.reindex(Categorical(["a", "c"])) | ||||
|  | ||||
|     def test_reindex_duplicate_target(self): | ||||
|         # See GH25459 | ||||
|         cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         res, indexer = cat.reindex(["a", "c", "c"]) | ||||
|         exp = Index(["a", "c", "c"]) | ||||
|         tm.assert_index_equal(res, exp, exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) | ||||
|  | ||||
|         res, indexer = cat.reindex( | ||||
|             CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) | ||||
|         ) | ||||
|         exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(res, exp, exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) | ||||
|  | ||||
|     def test_reindex_empty_index(self): | ||||
|         # See GH16770 | ||||
|         c = CategoricalIndex([]) | ||||
|         res, indexer = c.reindex(["a", "b"]) | ||||
|         tm.assert_index_equal(res, Index(["a", "b"]), exact=True) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) | ||||
|  | ||||
|     def test_reindex_categorical_added_category(self): | ||||
|         # GH 42424 | ||||
|         ci = CategoricalIndex( | ||||
|             [Interval(0, 1, closed="right"), Interval(1, 2, closed="right")], | ||||
|             ordered=True, | ||||
|         ) | ||||
|         ci_add = CategoricalIndex( | ||||
|             [ | ||||
|                 Interval(0, 1, closed="right"), | ||||
|                 Interval(1, 2, closed="right"), | ||||
|                 Interval(2, 3, closed="right"), | ||||
|                 Interval(3, 4, closed="right"), | ||||
|             ], | ||||
|             ordered=True, | ||||
|         ) | ||||
|         result, _ = ci.reindex(ci_add) | ||||
|         expected = ci_add | ||||
|         tm.assert_index_equal(expected, result) | ||||
| @ -0,0 +1,18 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_value", [None, np.nan]) | ||||
| def test_difference_with_na(na_value): | ||||
|     # GH 57318 | ||||
|     ci = CategoricalIndex(["a", "b", "c", None]) | ||||
|     other = Index(["c", na_value]) | ||||
|     result = ci.difference(other) | ||||
|     expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,41 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     array, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, False]) | ||||
| def sort(request): | ||||
|     """ | ||||
|     Valid values for the 'sort' parameter used in the Index | ||||
|     setops methods (intersection, union, etc.) | ||||
|  | ||||
|     Caution: | ||||
|         Don't confuse this one with the "sort" fixture used | ||||
|         for DataFrame.append or concat. That one has | ||||
|         parameters [True, False]. | ||||
|  | ||||
|         We can't combine them as sort=True is not permitted | ||||
|         in the Index setops methods. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["D", "3D", "-3D", "h", "2h", "-2h", "min", "2min", "s", "-3s"]) | ||||
| def freq_sample(request): | ||||
|     """ | ||||
|     Valid values for 'freq' parameter used to create date_range and | ||||
|     timedelta_range.. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[list, tuple, np.array, array, Series]) | ||||
| def listlike_box(request): | ||||
|     """ | ||||
|     Types that may be passed as the indexer to searchsorted. | ||||
|     """ | ||||
|     return request.param | ||||
| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class DropDuplicates: | ||||
|     def test_drop_duplicates_metadata(self, idx): | ||||
|         # GH#10115 | ||||
|         result = idx.drop_duplicates() | ||||
|         tm.assert_index_equal(idx, result) | ||||
|         assert idx.freq == result.freq | ||||
|  | ||||
|         idx_dup = idx.append(idx) | ||||
|         result = idx_dup.drop_duplicates() | ||||
|  | ||||
|         expected = idx | ||||
|         if not isinstance(idx, PeriodIndex): | ||||
|             # freq is reset except for PeriodIndex | ||||
|             assert idx_dup.freq is None | ||||
|             assert result.freq is None | ||||
|             expected = idx._with_freq(None) | ||||
|         else: | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "keep, expected, index", | ||||
|         [ | ||||
|             ( | ||||
|                 "first", | ||||
|                 np.concatenate(([False] * 10, [True] * 5)), | ||||
|                 np.arange(0, 10, dtype=np.int64), | ||||
|             ), | ||||
|             ( | ||||
|                 "last", | ||||
|                 np.concatenate(([True] * 5, [False] * 10)), | ||||
|                 np.arange(5, 15, dtype=np.int64), | ||||
|             ), | ||||
|             ( | ||||
|                 False, | ||||
|                 np.concatenate(([True] * 5, [False] * 5, [True] * 5)), | ||||
|                 np.arange(5, 10, dtype=np.int64), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_drop_duplicates(self, keep, expected, index, idx): | ||||
|         # to check Index/Series compat | ||||
|         idx = idx.append(idx[:5]) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) | ||||
|         expected = idx[~expected] | ||||
|  | ||||
|         result = idx.drop_duplicates(keep=keep) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = Series(idx).drop_duplicates(keep=keep) | ||||
|         expected = Series(expected, index=index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesPeriodIndex(DropDuplicates): | ||||
|     @pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"]) | ||||
|     def freq(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq): | ||||
|         return period_range("2011-01-01", periods=10, freq=freq, name="idx") | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesDatetimeIndex(DropDuplicates): | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq_sample): | ||||
|         return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") | ||||
|  | ||||
|  | ||||
| class TestDropDuplicatesTimedeltaIndex(DropDuplicates): | ||||
|     @pytest.fixture | ||||
|     def idx(self, freq_sample): | ||||
|         return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") | ||||
| @ -0,0 +1,181 @@ | ||||
| """ | ||||
| Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex | ||||
| """ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class EqualsTests: | ||||
|     def test_not_equals_numeric(self, index): | ||||
|         assert not index.equals(Index(index.asi8)) | ||||
|         assert not index.equals(Index(index.asi8.astype("u8"))) | ||||
|         assert not index.equals(Index(index.asi8).astype("f8")) | ||||
|  | ||||
|     def test_equals(self, index): | ||||
|         assert index.equals(index) | ||||
|         assert index.equals(index.astype(object)) | ||||
|         assert index.equals(CategoricalIndex(index)) | ||||
|         assert index.equals(CategoricalIndex(index.astype(object))) | ||||
|  | ||||
|     def test_not_equals_non_arraylike(self, index): | ||||
|         assert not index.equals(list(index)) | ||||
|  | ||||
|     def test_not_equals_strings(self, index): | ||||
|         other = Index([str(x) for x in index], dtype=object) | ||||
|         assert not index.equals(other) | ||||
|         assert not index.equals(CategoricalIndex(other)) | ||||
|  | ||||
|     def test_not_equals_misc_strs(self, index): | ||||
|         other = Index(list("abc")) | ||||
|         assert not index.equals(other) | ||||
|  | ||||
|  | ||||
| class TestPeriodIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return period_range("2013-01-01", periods=5, freq="D") | ||||
|  | ||||
|     # TODO: de-duplicate with other test_equals2 methods | ||||
|     @pytest.mark.parametrize("freq", ["D", "M"]) | ||||
|     def test_equals2(self, freq): | ||||
|         # GH#13107 | ||||
|         idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h") | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # same internal, different tz | ||||
|         idx3 = PeriodIndex._simple_new( | ||||
|             idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h")) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) | ||||
|         assert not idx.equals(idx3) | ||||
|         assert not idx.equals(idx3.copy()) | ||||
|         assert not idx.equals(idx3.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx3) | ||||
|         assert not idx.equals(list(idx3)) | ||||
|         assert not idx.equals(pd.Series(idx3)) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return date_range("2013-01-01", periods=5) | ||||
|  | ||||
|     def test_equals2(self): | ||||
|         # GH#13107 | ||||
|         idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # same internal, different tz | ||||
|         idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") | ||||
|         tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) | ||||
|         assert not idx.equals(idx3) | ||||
|         assert not idx.equals(idx3.copy()) | ||||
|         assert not idx.equals(idx3.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx3) | ||||
|         assert not idx.equals(list(idx3)) | ||||
|         assert not idx.equals(pd.Series(idx3)) | ||||
|  | ||||
|         # check that we do not raise when comparing with OutOfBounds objects | ||||
|         oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) | ||||
|         assert not idx.equals(oob) | ||||
|         assert not idx2.equals(oob) | ||||
|         assert not idx3.equals(oob) | ||||
|  | ||||
|         # check that we do not raise when comparing with OutOfBounds dt64 | ||||
|         oob2 = oob.map(np.datetime64) | ||||
|         assert not idx.equals(oob2) | ||||
|         assert not idx2.equals(oob2) | ||||
|         assert not idx3.equals(oob2) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_not_equals_bday(self, freq): | ||||
|         rng = date_range("2009-01-01", "2010-01-01", freq=freq) | ||||
|         assert not rng.equals(list(rng)) | ||||
|  | ||||
|  | ||||
| class TestTimedeltaIndexEquals(EqualsTests): | ||||
|     @pytest.fixture | ||||
|     def index(self): | ||||
|         return timedelta_range("1 day", periods=10) | ||||
|  | ||||
|     def test_equals2(self): | ||||
|         # GH#13107 | ||||
|         idx = TimedeltaIndex(["1 days", "2 days", "NaT"]) | ||||
|         assert idx.equals(idx) | ||||
|         assert idx.equals(idx.copy()) | ||||
|         assert idx.equals(idx.astype(object)) | ||||
|         assert idx.astype(object).equals(idx) | ||||
|         assert idx.astype(object).equals(idx.astype(object)) | ||||
|         assert not idx.equals(list(idx)) | ||||
|         assert not idx.equals(pd.Series(idx)) | ||||
|  | ||||
|         idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"]) | ||||
|         assert not idx.equals(idx2) | ||||
|         assert not idx.equals(idx2.copy()) | ||||
|         assert not idx.equals(idx2.astype(object)) | ||||
|         assert not idx.astype(object).equals(idx2) | ||||
|         assert not idx.astype(object).equals(idx2.astype(object)) | ||||
|         assert not idx.equals(list(idx2)) | ||||
|         assert not idx.equals(pd.Series(idx2)) | ||||
|  | ||||
|         # Check that we dont raise OverflowError on comparisons outside the | ||||
|         #  implementation range GH#28532 | ||||
|         oob = Index([timedelta(days=10**6)] * 3, dtype=object) | ||||
|         assert not idx.equals(oob) | ||||
|         assert not idx2.equals(oob) | ||||
|  | ||||
|         oob2 = Index([np.timedelta64(x) for x in oob], dtype=object) | ||||
|         assert (oob == oob2).all() | ||||
|         assert not idx.equals(oob2) | ||||
|         assert not idx2.equals(oob2) | ||||
|  | ||||
|         oob3 = oob.map(np.timedelta64) | ||||
|         assert (oob3 == oob).all() | ||||
|         assert not idx.equals(oob3) | ||||
|         assert not idx2.equals(oob3) | ||||
| @ -0,0 +1,45 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| dtlike_dtypes = [ | ||||
|     np.dtype("timedelta64[ns]"), | ||||
|     np.dtype("datetime64[ns]"), | ||||
|     pd.DatetimeTZDtype("ns", "Asia/Tokyo"), | ||||
|     pd.PeriodDtype("ns"), | ||||
| ] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ldtype", dtlike_dtypes) | ||||
| @pytest.mark.parametrize("rdtype", dtlike_dtypes) | ||||
| def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype): | ||||
|     vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2) | ||||
|  | ||||
|     def construct(dtype): | ||||
|         if dtype is dtlike_dtypes[-1]: | ||||
|             # PeriodArray will try to cast ints to strings | ||||
|             return DatetimeIndex(vals).astype(dtype) | ||||
|         return Index(vals, dtype=dtype) | ||||
|  | ||||
|     left = construct(ldtype) | ||||
|     right = construct(rdtype) | ||||
|  | ||||
|     result = left.get_indexer_non_unique(right) | ||||
|  | ||||
|     if ldtype is rdtype: | ||||
|         ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp) | ||||
|         ex2 = np.array([], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result[0], ex1) | ||||
|         tm.assert_numpy_array_equal(result[1], ex2) | ||||
|  | ||||
|     else: | ||||
|         no_matches = np.array([-1] * 6, dtype=np.intp) | ||||
|         missing = np.arange(6, dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result[0], no_matches) | ||||
|         tm.assert_numpy_array_equal(result[1], missing) | ||||
| @ -0,0 +1,46 @@ | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     NaT, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_is_monotonic_with_nat(): | ||||
|     # GH#31437 | ||||
|     # PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex, | ||||
|     #  in particular never be monotonic when we have NaT | ||||
|     dti = date_range("2016-01-01", periods=3) | ||||
|     pi = dti.to_period("D") | ||||
|     tdi = Index(dti.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert obj.is_monotonic_increasing | ||||
|         assert obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
|  | ||||
|     dti1 = dti.insert(0, NaT) | ||||
|     pi1 = dti1.to_period("D") | ||||
|     tdi1 = Index(dti1.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
|  | ||||
|     dti2 = dti.insert(3, NaT) | ||||
|     pi2 = dti2.to_period("h") | ||||
|     tdi2 = Index(dti2.view("timedelta64[ns]")) | ||||
|  | ||||
|     for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: | ||||
|         if isinstance(obj, Index): | ||||
|             # i.e. not Engines | ||||
|             assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_increasing | ||||
|         assert not obj.is_monotonic_decreasing | ||||
|         assert obj.is_unique | ||||
| @ -0,0 +1,53 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class NATests: | ||||
|     def test_nat(self, index_without_na): | ||||
|         empty_index = index_without_na[:0] | ||||
|  | ||||
|         index_with_na = index_without_na.copy(deep=True) | ||||
|         index_with_na._data[1] = NaT | ||||
|  | ||||
|         assert empty_index._na_value is NaT | ||||
|         assert index_with_na._na_value is NaT | ||||
|         assert index_without_na._na_value is NaT | ||||
|  | ||||
|         idx = index_without_na | ||||
|         assert idx._can_hold_na | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) | ||||
|         assert idx.hasnans is False | ||||
|  | ||||
|         idx = index_with_na | ||||
|         assert idx._can_hold_na | ||||
|  | ||||
|         tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) | ||||
|         assert idx.hasnans is True | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) | ||||
|  | ||||
|  | ||||
| class TestTimedeltaIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self): | ||||
|         return TimedeltaIndex(["1 days", "2 days"]) | ||||
|  | ||||
|  | ||||
| class TestPeriodIndexNA(NATests): | ||||
|     @pytest.fixture | ||||
|     def index_without_na(self): | ||||
|         return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") | ||||
| @ -0,0 +1,315 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     TimedeltaIndex, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def check_freq_ascending(ordered, orig, ascending): | ||||
|     """ | ||||
|     Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex | ||||
|     when the original index is generated (or generate-able) with | ||||
|     period_range/date_range/timedelta_range. | ||||
|     """ | ||||
|     if isinstance(ordered, PeriodIndex): | ||||
|         assert ordered.freq == orig.freq | ||||
|     elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): | ||||
|         if ascending: | ||||
|             assert ordered.freq.n == orig.freq.n | ||||
|         else: | ||||
|             assert ordered.freq.n == -1 * orig.freq.n | ||||
|  | ||||
|  | ||||
| def check_freq_nonmonotonic(ordered, orig): | ||||
|     """ | ||||
|     Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex | ||||
|     when the original index is _not_ generated (or generate-able) with | ||||
|     period_range/date_range//timedelta_range. | ||||
|     """ | ||||
|     if isinstance(ordered, PeriodIndex): | ||||
|         assert ordered.freq == orig.freq | ||||
|     elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): | ||||
|         assert ordered.freq is None | ||||
|  | ||||
|  | ||||
| class TestSortValues: | ||||
|     @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) | ||||
|     def non_monotonic_idx(self, request): | ||||
|         if request.param is DatetimeIndex: | ||||
|             return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) | ||||
|         elif request.param is PeriodIndex: | ||||
|             dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) | ||||
|             return dti.to_period("D") | ||||
|         else: | ||||
|             return TimedeltaIndex( | ||||
|                 ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] | ||||
|             ) | ||||
|  | ||||
|     def test_argmin_argmax(self, non_monotonic_idx): | ||||
|         assert non_monotonic_idx.argmin() == 1 | ||||
|         assert non_monotonic_idx.argmax() == 0 | ||||
|  | ||||
|     def test_sort_values(self, non_monotonic_idx): | ||||
|         idx = non_monotonic_idx | ||||
|         ordered = idx.sort_values() | ||||
|         assert ordered.is_monotonic_increasing | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         assert ordered[::-1].is_monotonic_increasing | ||||
|  | ||||
|         ordered, dexer = idx.sort_values(return_indexer=True) | ||||
|         assert ordered.is_monotonic_increasing | ||||
|         tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) | ||||
|  | ||||
|         ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         assert ordered[::-1].is_monotonic_increasing | ||||
|         tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) | ||||
|  | ||||
|     def check_sort_values_with_freq(self, idx): | ||||
|         ordered = idx.sort_values() | ||||
|         tm.assert_index_equal(ordered, idx) | ||||
|         check_freq_ascending(ordered, idx, True) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         expected = idx[::-1] | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_ascending(ordered, idx, False) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True) | ||||
|         tm.assert_index_equal(ordered, idx) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) | ||||
|         check_freq_ascending(ordered, idx, True) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         expected = idx[::-1] | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) | ||||
|         check_freq_ascending(ordered, idx, False) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["D", "h"]) | ||||
|     def test_sort_values_with_freq_timedeltaindex(self, freq): | ||||
|         # GH#10295 | ||||
|         idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") | ||||
|  | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [ | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" | ||||
|             ), | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], | ||||
|                 freq="h", | ||||
|                 name="tzidx", | ||||
|                 tz="Asia/Tokyo", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_with_freq_datetimeindex(self, idx): | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) | ||||
|     def test_sort_values_with_freq_periodindex(self, freq): | ||||
|         # here with_freq refers to being period_range-like | ||||
|         idx = PeriodIndex( | ||||
|             ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" | ||||
|         ) | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [ | ||||
|             PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"), | ||||
|             Index([2011, 2012, 2013], name="idx"),  # for compatibility check | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_with_freq_periodindex2(self, idx): | ||||
|         # here with_freq indicates this is period_range-like | ||||
|         self.check_sort_values_with_freq(idx) | ||||
|  | ||||
|     def check_sort_values_without_freq(self, idx, expected): | ||||
|         ordered = idx.sort_values(na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         if not idx.isna().any(): | ||||
|             ordered = idx.sort_values() | ||||
|             tm.assert_index_equal(ordered, expected) | ||||
|             check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|  | ||||
|         exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(indexer, exp) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         if not idx.isna().any(): | ||||
|             ordered, indexer = idx.sort_values(return_indexer=True) | ||||
|             tm.assert_index_equal(ordered, expected) | ||||
|  | ||||
|             exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) | ||||
|             tm.assert_numpy_array_equal(indexer, exp) | ||||
|             check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|  | ||||
|         exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(indexer, exp) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|     def test_sort_values_without_freq_timedeltaindex(self): | ||||
|         # GH#10295 | ||||
|  | ||||
|         idx = TimedeltaIndex( | ||||
|             ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" | ||||
|         ) | ||||
|         expected = TimedeltaIndex( | ||||
|             ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" | ||||
|         ) | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index_dates,expected_dates", | ||||
|         [ | ||||
|             ( | ||||
|                 ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], | ||||
|                 ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|             ( | ||||
|                 ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], | ||||
|                 ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|             ( | ||||
|                 [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], | ||||
|                 [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_without_freq_datetimeindex( | ||||
|         self, index_dates, expected_dates, tz_naive_fixture | ||||
|     ): | ||||
|         tz = tz_naive_fixture | ||||
|  | ||||
|         # without freq | ||||
|         idx = DatetimeIndex(index_dates, tz=tz, name="idx") | ||||
|         expected = DatetimeIndex(expected_dates, tz=tz, name="idx") | ||||
|  | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-01", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx1", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx1", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-01", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx2", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [ | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-01", | ||||
|                         "2011-01-02", | ||||
|                         "2011-01-03", | ||||
|                         "2011-01-05", | ||||
|                     ], | ||||
|                     freq="D", | ||||
|                     name="idx2", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], | ||||
|                     freq="D", | ||||
|                     name="idx3", | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], | ||||
|                     freq="D", | ||||
|                     name="idx3", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 PeriodIndex( | ||||
|                     ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y" | ||||
|                 ), | ||||
|                 PeriodIndex( | ||||
|                     ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y" | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 # For compatibility check | ||||
|                 Index([2011, 2013, 2015, 2012, 2011], name="idx"), | ||||
|                 Index([2011, 2011, 2012, 2013, 2015], name="idx"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_without_freq_periodindex(self, idx, expected): | ||||
|         # here without_freq means not generateable by period_range | ||||
|         self.check_sort_values_without_freq(idx, expected) | ||||
|  | ||||
|     def test_sort_values_without_freq_periodindex_nat(self): | ||||
|         # doesn't quite fit into check_sort_values_without_freq | ||||
|         idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") | ||||
|         expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") | ||||
|  | ||||
|         ordered = idx.sort_values(na_position="first") | ||||
|         tm.assert_index_equal(ordered, expected) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|         ordered = idx.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(ordered, expected[::-1]) | ||||
|         check_freq_nonmonotonic(ordered, idx) | ||||
|  | ||||
|  | ||||
| def test_order_stability_compat(): | ||||
|     # GH#35922. sort_values is stable both for normal and datetime-like Index | ||||
|     pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y") | ||||
|     iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") | ||||
|     ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) | ||||
|     ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) | ||||
|     tm.assert_numpy_array_equal(indexer1, indexer2) | ||||
| @ -0,0 +1,103 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     TimedeltaIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestValueCounts: | ||||
|     # GH#7735 | ||||
|  | ||||
|     def test_value_counts_unique_datetimeindex(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def test_value_counts_unique_timedeltaindex(self): | ||||
|         orig = timedelta_range("1 days 09:00:00", freq="h", periods=10) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def test_value_counts_unique_periodindex(self): | ||||
|         orig = period_range("2011-01-01 09:00", freq="h", periods=10) | ||||
|         self._check_value_counts_with_repeats(orig) | ||||
|  | ||||
|     def _check_value_counts_with_repeats(self, orig): | ||||
|         # create repeated values, 'n'th element is repeated by n+1 times | ||||
|         idx = type(orig)( | ||||
|             np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype | ||||
|         ) | ||||
|  | ||||
|         exp_idx = orig[::-1] | ||||
|         if not isinstance(exp_idx, PeriodIndex): | ||||
|             exp_idx = exp_idx._with_freq(None) | ||||
|         expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(idx.unique(), orig) | ||||
|  | ||||
|     def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         idx = DatetimeIndex( | ||||
|             [ | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 NaT, | ||||
|             ], | ||||
|             tz=tz, | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def test_value_counts_unique_timedeltaindex2(self): | ||||
|         idx = TimedeltaIndex( | ||||
|             [ | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 09:00:00", | ||||
|                 "1 days 08:00:00", | ||||
|                 "1 days 08:00:00", | ||||
|                 NaT, | ||||
|             ] | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def test_value_counts_unique_periodindex2(self): | ||||
|         idx = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 09:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 "2013-01-01 08:00", | ||||
|                 NaT, | ||||
|             ], | ||||
|             freq="h", | ||||
|         ) | ||||
|         self._check_value_counts_dropna(idx) | ||||
|  | ||||
|     def _check_value_counts_dropna(self, idx): | ||||
|         exp_idx = idx[[2, 3]] | ||||
|         expected = Series([3, 2], index=exp_idx, name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(), expected) | ||||
|  | ||||
|         exp_idx = idx[[2, 3, -1]] | ||||
|         expected = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         for obj in [idx, Series(idx)]: | ||||
|             tm.assert_series_equal(obj.value_counts(dropna=False), expected) | ||||
|  | ||||
|         tm.assert_index_equal(idx.unique(), exp_idx) | ||||
| @ -0,0 +1,30 @@ | ||||
| from datetime import timedelta | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     isna, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestAsOf: | ||||
|     def test_asof_partial(self): | ||||
|         index = date_range("2010-01-01", periods=2, freq="ME") | ||||
|         expected = Timestamp("2010-02-28") | ||||
|         result = index.asof("2010-02") | ||||
|         assert result == expected | ||||
|         assert not isinstance(result, Index) | ||||
|  | ||||
|     def test_asof(self): | ||||
|         index = date_range("2020-01-01", periods=10) | ||||
|  | ||||
|         dt = index[0] | ||||
|         assert index.asof(dt) == dt | ||||
|         assert isna(index.asof(dt - timedelta(1))) | ||||
|  | ||||
|         dt = index[-1] | ||||
|         assert index.asof(dt + timedelta(1)) == dt | ||||
|  | ||||
|         dt = index[0].to_pydatetime() | ||||
|         assert isinstance(index.asof(dt), Timestamp) | ||||
| @ -0,0 +1,338 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import dateutil | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndex: | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_astype_asobject_around_dst_transition(self, tzstr): | ||||
|         # GH#1345 | ||||
|  | ||||
|         # dates around a dst transition | ||||
|         rng = date_range("2/13/2010", "5/6/2010", tz=tzstr) | ||||
|  | ||||
|         objs = rng.astype(object) | ||||
|         for i, x in enumerate(objs): | ||||
|             exval = rng[i] | ||||
|             assert x == exval | ||||
|             assert x.tzinfo == exval.tzinfo | ||||
|  | ||||
|         objs = rng.astype(object) | ||||
|         for i, x in enumerate(objs): | ||||
|             exval = rng[i] | ||||
|             assert x == exval | ||||
|             assert x.tzinfo == exval.tzinfo | ||||
|  | ||||
|     def test_astype(self): | ||||
|         # GH 13149, GH 13209 | ||||
|         idx = DatetimeIndex( | ||||
|             ["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx" | ||||
|         ) | ||||
|  | ||||
|         result = idx.astype(object) | ||||
|         expected = Index( | ||||
|             [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx.astype(np.int64) | ||||
|         expected = Index( | ||||
|             [1463356800000000000] + [-9223372036854775808] * 3, | ||||
|             dtype=np.int64, | ||||
|             name="idx", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype2(self): | ||||
|         rng = date_range("1/1/2000", periods=10, name="idx") | ||||
|         result = rng.astype("i8") | ||||
|         tm.assert_index_equal(result, Index(rng.asi8, name="idx")) | ||||
|         tm.assert_numpy_array_equal(result.values, rng.asi8) | ||||
|  | ||||
|     def test_astype_uint(self): | ||||
|         arr = date_range("2000", periods=2, name="idx") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): | ||||
|             arr.astype("uint64") | ||||
|         with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): | ||||
|             arr.astype("uint32") | ||||
|  | ||||
|     def test_astype_with_tz(self): | ||||
|         # with tz | ||||
|         rng = date_range("1/1/2000", periods=10, tz="US/Eastern") | ||||
|         msg = "Cannot use .astype to convert from timezone-aware" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # deprecated | ||||
|             rng.astype("datetime64[ns]") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # check DatetimeArray while we're here deprecated | ||||
|             rng._data.astype("datetime64[ns]") | ||||
|  | ||||
|     def test_astype_tzaware_to_tzaware(self): | ||||
|         # GH 18951: tz-aware to tz-aware | ||||
|         idx = date_range("20170101", periods=4, tz="US/Pacific") | ||||
|         result = idx.astype("datetime64[ns, US/Eastern]") | ||||
|         expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|     def test_astype_tznaive_to_tzaware(self): | ||||
|         # GH 18951: tz-naive to tz-aware | ||||
|         idx = date_range("20170101", periods=4) | ||||
|         idx = idx._with_freq(None)  # tz_localize does not preserve freq | ||||
|         msg = "Cannot use .astype to convert from timezone-naive" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # dt64->dt64tz deprecated | ||||
|             idx.astype("datetime64[ns, US/Eastern]") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # dt64->dt64tz deprecated | ||||
|             idx._data.astype("datetime64[ns, US/Eastern]") | ||||
|  | ||||
|     def test_astype_str_nat(self, using_infer_string): | ||||
|         # GH 13149, GH 13209 | ||||
|         # verify that we are returning NaT as a string (and not unicode) | ||||
|  | ||||
|         idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan]) | ||||
|         result = idx.astype(str) | ||||
|         if using_infer_string: | ||||
|             expected = Index(["2016-05-16", None, None, None], dtype="str") | ||||
|         else: | ||||
|             expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype_str(self): | ||||
|         # test astype string - #10442 | ||||
|         dti = date_range("2012-01-01", periods=4, name="test_name") | ||||
|         result = dti.astype(str) | ||||
|         expected = Index( | ||||
|             ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"], | ||||
|             name="test_name", | ||||
|             dtype="str", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype_str_tz_and_name(self): | ||||
|         # test astype string with tz and name | ||||
|         dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern") | ||||
|         result = dti.astype(str) | ||||
|         expected = Index( | ||||
|             [ | ||||
|                 "2012-01-01 00:00:00-05:00", | ||||
|                 "2012-01-02 00:00:00-05:00", | ||||
|                 "2012-01-03 00:00:00-05:00", | ||||
|             ], | ||||
|             name="test_name", | ||||
|             dtype="str", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype_str_freq_and_name(self): | ||||
|         # test astype string with freqH and name | ||||
|         dti = date_range("1/1/2011", periods=3, freq="h", name="test_name") | ||||
|         result = dti.astype(str) | ||||
|         expected = Index( | ||||
|             ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"], | ||||
|             name="test_name", | ||||
|             dtype="str", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype_str_freq_and_tz(self): | ||||
|         # test astype string with freqH and timezone | ||||
|         dti = date_range( | ||||
|             "3/6/2012 00:00", periods=2, freq="h", tz="Europe/London", name="test_name" | ||||
|         ) | ||||
|         result = dti.astype(str) | ||||
|         expected = Index( | ||||
|             ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"], | ||||
|             dtype="str", | ||||
|             name="test_name", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_astype_datetime64(self): | ||||
|         # GH 13149, GH 13209 | ||||
|         idx = DatetimeIndex( | ||||
|             ["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx" | ||||
|         ) | ||||
|  | ||||
|         result = idx.astype("datetime64[ns]") | ||||
|         tm.assert_index_equal(result, idx) | ||||
|         assert result is not idx | ||||
|  | ||||
|         result = idx.astype("datetime64[ns]", copy=False) | ||||
|         tm.assert_index_equal(result, idx) | ||||
|         assert result is idx | ||||
|  | ||||
|         idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], tz="EST", name="idx") | ||||
|         msg = "Cannot use .astype to convert from timezone-aware" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # dt64tz->dt64 deprecated | ||||
|             result = idx_tz.astype("datetime64[ns]") | ||||
|  | ||||
|     def test_astype_object(self): | ||||
|         rng = date_range("1/1/2000", periods=20) | ||||
|  | ||||
|         casted = rng.astype("O") | ||||
|         exp_values = list(rng) | ||||
|  | ||||
|         tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) | ||||
|         assert casted.tolist() == exp_values | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) | ||||
|     def test_astype_object_tz(self, tz): | ||||
|         idx = date_range(start="2013-01-01", periods=4, freq="ME", name="idx", tz=tz) | ||||
|         expected_list = [ | ||||
|             Timestamp("2013-01-31", tz=tz), | ||||
|             Timestamp("2013-02-28", tz=tz), | ||||
|             Timestamp("2013-03-31", tz=tz), | ||||
|             Timestamp("2013-04-30", tz=tz), | ||||
|         ] | ||||
|         expected = Index(expected_list, dtype=object, name="idx") | ||||
|         result = idx.astype(object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert idx.tolist() == expected_list | ||||
|  | ||||
|     def test_astype_object_with_nat(self): | ||||
|         idx = DatetimeIndex( | ||||
|             [datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)], | ||||
|             name="idx", | ||||
|         ) | ||||
|         expected_list = [ | ||||
|             Timestamp("2013-01-01"), | ||||
|             Timestamp("2013-01-02"), | ||||
|             NaT, | ||||
|             Timestamp("2013-01-04"), | ||||
|         ] | ||||
|         expected = Index(expected_list, dtype=object, name="idx") | ||||
|         result = idx.astype(object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert idx.tolist() == expected_list | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         [float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"], | ||||
|     ) | ||||
|     def test_astype_raises(self, dtype): | ||||
|         # GH 13149, GH 13209 | ||||
|         idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan]) | ||||
|         msg = "Cannot cast DatetimeIndex to dtype" | ||||
|         if dtype == "datetime64": | ||||
|             msg = "Casting to unit-less dtype 'datetime64' is not supported" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             idx.astype(dtype) | ||||
|  | ||||
|     def test_index_convert_to_datetime_array(self): | ||||
|         def _check_rng(rng): | ||||
|             converted = rng.to_pydatetime() | ||||
|             assert isinstance(converted, np.ndarray) | ||||
|             for x, stamp in zip(converted, rng): | ||||
|                 assert isinstance(x, datetime) | ||||
|                 assert x == stamp.to_pydatetime() | ||||
|                 assert x.tzinfo == stamp.tzinfo | ||||
|  | ||||
|         rng = date_range("20090415", "20090519") | ||||
|         rng_eastern = date_range("20090415", "20090519", tz="US/Eastern") | ||||
|         rng_utc = date_range("20090415", "20090519", tz="utc") | ||||
|  | ||||
|         _check_rng(rng) | ||||
|         _check_rng(rng_eastern) | ||||
|         _check_rng(rng_utc) | ||||
|  | ||||
|     def test_index_convert_to_datetime_array_explicit_pytz(self): | ||||
|         def _check_rng(rng): | ||||
|             converted = rng.to_pydatetime() | ||||
|             assert isinstance(converted, np.ndarray) | ||||
|             for x, stamp in zip(converted, rng): | ||||
|                 assert isinstance(x, datetime) | ||||
|                 assert x == stamp.to_pydatetime() | ||||
|                 assert x.tzinfo == stamp.tzinfo | ||||
|  | ||||
|         rng = date_range("20090415", "20090519") | ||||
|         rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) | ||||
|         rng_utc = date_range("20090415", "20090519", tz=pytz.utc) | ||||
|  | ||||
|         _check_rng(rng) | ||||
|         _check_rng(rng_eastern) | ||||
|         _check_rng(rng_utc) | ||||
|  | ||||
|     def test_index_convert_to_datetime_array_dateutil(self): | ||||
|         def _check_rng(rng): | ||||
|             converted = rng.to_pydatetime() | ||||
|             assert isinstance(converted, np.ndarray) | ||||
|             for x, stamp in zip(converted, rng): | ||||
|                 assert isinstance(x, datetime) | ||||
|                 assert x == stamp.to_pydatetime() | ||||
|                 assert x.tzinfo == stamp.tzinfo | ||||
|  | ||||
|         rng = date_range("20090415", "20090519") | ||||
|         rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern") | ||||
|         rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc()) | ||||
|  | ||||
|         _check_rng(rng) | ||||
|         _check_rng(rng_eastern) | ||||
|         _check_rng(rng_utc) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz, dtype", | ||||
|         [["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]], | ||||
|     ) | ||||
|     def test_integer_index_astype_datetime(self, tz, dtype): | ||||
|         # GH 20997, 20964, 24559 | ||||
|         val = [Timestamp("2018-01-01", tz=tz).as_unit("ns")._value] | ||||
|         result = Index(val, name="idx").astype(dtype) | ||||
|         expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx").as_unit("ns") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_dti_astype_period(self): | ||||
|         idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx") | ||||
|  | ||||
|         res = idx.astype("period[M]") | ||||
|         exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|         res = idx.astype("period[3M]") | ||||
|         exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Central"]) | ||||
|     def test_astype_category(self, tz): | ||||
|         obj = date_range("2000", periods=2, tz=tz, name="idx") | ||||
|         result = obj.astype("category") | ||||
|         dti = DatetimeIndex(["2000-01-01", "2000-01-02"], tz=tz).as_unit("ns") | ||||
|         expected = pd.CategoricalIndex( | ||||
|             dti, | ||||
|             name="idx", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = obj._data.astype("category") | ||||
|         expected = expected.values | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Central"]) | ||||
|     def test_astype_array_fallback(self, tz): | ||||
|         obj = date_range("2000", periods=2, tz=tz, name="idx") | ||||
|         result = obj.astype(bool) | ||||
|         expected = Index(np.array([True, True]), name="idx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = obj._data.astype(bool) | ||||
|         expected = np.array([True, True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,141 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDelete: | ||||
|     def test_delete(self, unit): | ||||
|         idx = date_range( | ||||
|             start="2000-01-01", periods=5, freq="ME", name="idx", unit=unit | ||||
|         ) | ||||
|  | ||||
|         # preserve freq | ||||
|         expected_0 = date_range( | ||||
|             start="2000-02-01", periods=4, freq="ME", name="idx", unit=unit | ||||
|         ) | ||||
|         expected_4 = date_range( | ||||
|             start="2000-01-01", periods=4, freq="ME", name="idx", unit=unit | ||||
|         ) | ||||
|  | ||||
|         # reset freq to None | ||||
|         expected_1 = DatetimeIndex( | ||||
|             ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"], | ||||
|             freq=None, | ||||
|             name="idx", | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         cases = { | ||||
|             0: expected_0, | ||||
|             -5: expected_0, | ||||
|             -1: expected_4, | ||||
|             4: expected_4, | ||||
|             1: expected_1, | ||||
|         } | ||||
|         for n, expected in cases.items(): | ||||
|             result = idx.delete(n) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.name == expected.name | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|         with pytest.raises((IndexError, ValueError), match="out of bounds"): | ||||
|             # either depending on numpy version | ||||
|             idx.delete(5) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"]) | ||||
|     def test_delete2(self, tz): | ||||
|         idx = date_range( | ||||
|             start="2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz | ||||
|         ) | ||||
|  | ||||
|         expected = date_range( | ||||
|             start="2000-01-01 10:00", periods=9, freq="h", name="idx", tz=tz | ||||
|         ) | ||||
|         result = idx.delete(0) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|         assert result.freqstr == "h" | ||||
|         assert result.tz == expected.tz | ||||
|  | ||||
|         expected = date_range( | ||||
|             start="2000-01-01 09:00", periods=9, freq="h", name="idx", tz=tz | ||||
|         ) | ||||
|         result = idx.delete(-1) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|         assert result.freqstr == "h" | ||||
|         assert result.tz == expected.tz | ||||
|  | ||||
|     def test_delete_slice(self, unit): | ||||
|         idx = date_range( | ||||
|             start="2000-01-01", periods=10, freq="D", name="idx", unit=unit | ||||
|         ) | ||||
|  | ||||
|         # preserve freq | ||||
|         expected_0_2 = date_range( | ||||
|             start="2000-01-04", periods=7, freq="D", name="idx", unit=unit | ||||
|         ) | ||||
|         expected_7_9 = date_range( | ||||
|             start="2000-01-01", periods=7, freq="D", name="idx", unit=unit | ||||
|         ) | ||||
|  | ||||
|         # reset freq to None | ||||
|         expected_3_5 = DatetimeIndex( | ||||
|             [ | ||||
|                 "2000-01-01", | ||||
|                 "2000-01-02", | ||||
|                 "2000-01-03", | ||||
|                 "2000-01-07", | ||||
|                 "2000-01-08", | ||||
|                 "2000-01-09", | ||||
|                 "2000-01-10", | ||||
|             ], | ||||
|             freq=None, | ||||
|             name="idx", | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         cases = { | ||||
|             (0, 1, 2): expected_0_2, | ||||
|             (7, 8, 9): expected_7_9, | ||||
|             (3, 4, 5): expected_3_5, | ||||
|         } | ||||
|         for n, expected in cases.items(): | ||||
|             result = idx.delete(n) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.name == expected.name | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|             result = idx.delete(slice(n[0], n[-1] + 1)) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.name == expected.name | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|     # TODO: belongs in Series.drop tests? | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"]) | ||||
|     def test_delete_slice2(self, tz, unit): | ||||
|         dti = date_range( | ||||
|             "2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz, unit=unit | ||||
|         ) | ||||
|         ts = Series( | ||||
|             1, | ||||
|             index=dti, | ||||
|         ) | ||||
|         # preserve freq | ||||
|         result = ts.drop(ts.index[:5]).index | ||||
|         expected = dti[5:] | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|         assert result.freq == expected.freq | ||||
|         assert result.tz == expected.tz | ||||
|  | ||||
|         # reset freq to None | ||||
|         result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index | ||||
|         expected = dti[::2]._with_freq(None) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|         assert result.freq == expected.freq | ||||
|         assert result.tz == expected.tz | ||||
| @ -0,0 +1,125 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     date_range, | ||||
|     factorize, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexFactorize: | ||||
|     def test_factorize(self): | ||||
|         idx1 = DatetimeIndex( | ||||
|             ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"] | ||||
|         ) | ||||
|  | ||||
|         exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) | ||||
|         exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) | ||||
|  | ||||
|         arr, idx = idx1.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, exp_idx) | ||||
|         assert idx.freq == exp_idx.freq | ||||
|  | ||||
|         arr, idx = idx1.factorize(sort=True) | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, exp_idx) | ||||
|         assert idx.freq == exp_idx.freq | ||||
|  | ||||
|         # tz must be preserved | ||||
|         idx1 = idx1.tz_localize("Asia/Tokyo") | ||||
|         exp_idx = exp_idx.tz_localize("Asia/Tokyo") | ||||
|  | ||||
|         arr, idx = idx1.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, exp_idx) | ||||
|         assert idx.freq == exp_idx.freq | ||||
|  | ||||
|         idx2 = DatetimeIndex( | ||||
|             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] | ||||
|         ) | ||||
|  | ||||
|         exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) | ||||
|         exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) | ||||
|         arr, idx = idx2.factorize(sort=True) | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, exp_idx) | ||||
|         assert idx.freq == exp_idx.freq | ||||
|  | ||||
|         exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) | ||||
|         exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) | ||||
|         arr, idx = idx2.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, exp_idx) | ||||
|         assert idx.freq == exp_idx.freq | ||||
|  | ||||
|     def test_factorize_preserves_freq(self): | ||||
|         # GH#38120 freq should be preserved | ||||
|         idx3 = date_range("2000-01", periods=4, freq="ME", tz="Asia/Tokyo") | ||||
|         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) | ||||
|  | ||||
|         arr, idx = idx3.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, idx3) | ||||
|         assert idx.freq == idx3.freq | ||||
|  | ||||
|         arr, idx = factorize(idx3) | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         tm.assert_index_equal(idx, idx3) | ||||
|         assert idx.freq == idx3.freq | ||||
|  | ||||
|     def test_factorize_tz(self, tz_naive_fixture, index_or_series): | ||||
|         tz = tz_naive_fixture | ||||
|         # GH#13750 | ||||
|         base = date_range("2016-11-05", freq="h", periods=100, tz=tz) | ||||
|         idx = base.repeat(5) | ||||
|  | ||||
|         exp_arr = np.arange(100, dtype=np.intp).repeat(5) | ||||
|  | ||||
|         obj = index_or_series(idx) | ||||
|  | ||||
|         arr, res = obj.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, exp_arr) | ||||
|         expected = base._with_freq(None) | ||||
|         tm.assert_index_equal(res, expected) | ||||
|         assert res.freq == expected.freq | ||||
|  | ||||
|     def test_factorize_dst(self, index_or_series): | ||||
|         # GH#13750 | ||||
|         idx = date_range("2016-11-06", freq="h", periods=12, tz="US/Eastern") | ||||
|         obj = index_or_series(idx) | ||||
|  | ||||
|         arr, res = obj.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) | ||||
|         tm.assert_index_equal(res, idx) | ||||
|         if index_or_series is Index: | ||||
|             assert res.freq == idx.freq | ||||
|  | ||||
|         idx = date_range("2016-06-13", freq="h", periods=12, tz="US/Eastern") | ||||
|         obj = index_or_series(idx) | ||||
|  | ||||
|         arr, res = obj.factorize() | ||||
|         tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) | ||||
|         tm.assert_index_equal(res, idx) | ||||
|         if index_or_series is Index: | ||||
|             assert res.freq == idx.freq | ||||
|  | ||||
|     @pytest.mark.parametrize("sort", [True, False]) | ||||
|     def test_factorize_no_freq_non_nano(self, tz_naive_fixture, sort): | ||||
|         # GH#51978 case that does not go through the fastpath based on | ||||
|         #  non-None freq | ||||
|         tz = tz_naive_fixture | ||||
|         idx = date_range("2016-11-06", freq="h", periods=5, tz=tz)[[0, 4, 1, 3, 2]] | ||||
|         exp_codes, exp_uniques = idx.factorize(sort=sort) | ||||
|  | ||||
|         res_codes, res_uniques = idx.as_unit("s").factorize(sort=sort) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(res_codes, exp_codes) | ||||
|         tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s")) | ||||
|  | ||||
|         res_codes, res_uniques = idx.as_unit("s").to_series().factorize(sort=sort) | ||||
|         tm.assert_numpy_array_equal(res_codes, exp_codes) | ||||
|         tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s")) | ||||
| @ -0,0 +1,62 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexFillNA: | ||||
|     @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) | ||||
|     def test_fillna_datetime64(self, tz): | ||||
|         # GH 11343 | ||||
|         idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"]) | ||||
|  | ||||
|         exp = pd.DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"] | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) | ||||
|  | ||||
|         # tz mismatch | ||||
|         exp = pd.Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01 09:00"), | ||||
|                 pd.Timestamp("2011-01-01 10:00", tz=tz), | ||||
|                 pd.Timestamp("2011-01-01 11:00"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) | ||||
|  | ||||
|         # object | ||||
|         exp = pd.Index( | ||||
|             [pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna("x"), exp) | ||||
|  | ||||
|         idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz) | ||||
|  | ||||
|         exp = pd.DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) | ||||
|  | ||||
|         exp = pd.Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01 09:00", tz=tz), | ||||
|                 pd.Timestamp("2011-01-01 10:00"), | ||||
|                 pd.Timestamp("2011-01-01 11:00", tz=tz), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) | ||||
|  | ||||
|         # object | ||||
|         exp = pd.Index( | ||||
|             [ | ||||
|                 pd.Timestamp("2011-01-01 09:00", tz=tz), | ||||
|                 "x", | ||||
|                 pd.Timestamp("2011-01-01 11:00", tz=tz), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_index_equal(idx.fillna("x"), exp) | ||||
| @ -0,0 +1,265 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestInsert: | ||||
|     @pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA]) | ||||
|     @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) | ||||
|     def test_insert_nat(self, tz, null): | ||||
|         # GH#16537, GH#18295 (test missing) | ||||
|  | ||||
|         idx = DatetimeIndex(["2017-01-01"], tz=tz) | ||||
|         expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz) | ||||
|         if tz is not None and isinstance(null, np.datetime64): | ||||
|             expected = Index([null, idx[0]], dtype=object) | ||||
|  | ||||
|         res = idx.insert(0, null) | ||||
|         tm.assert_index_equal(res, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) | ||||
|     def test_insert_invalid_na(self, tz): | ||||
|         idx = DatetimeIndex(["2017-01-01"], tz=tz) | ||||
|  | ||||
|         item = np.timedelta64("NaT") | ||||
|         result = idx.insert(0, item) | ||||
|         expected = Index([item] + list(idx), dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_empty_preserves_freq(self, tz_naive_fixture): | ||||
|         # GH#33573 | ||||
|         tz = tz_naive_fixture | ||||
|         dti = DatetimeIndex([], tz=tz, freq="D") | ||||
|         item = Timestamp("2017-04-05").tz_localize(tz) | ||||
|  | ||||
|         result = dti.insert(0, item) | ||||
|         assert result.freq == dti.freq | ||||
|  | ||||
|         # But not when we insert an item that doesn't conform to freq | ||||
|         dti = DatetimeIndex([], tz=tz, freq="W-THU") | ||||
|         result = dti.insert(0, item) | ||||
|         assert result.freq is None | ||||
|  | ||||
|     def test_insert(self, unit): | ||||
|         idx = DatetimeIndex( | ||||
|             ["2000-01-04", "2000-01-01", "2000-01-02"], name="idx" | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         result = idx.insert(2, datetime(2000, 1, 5)) | ||||
|         exp = DatetimeIndex( | ||||
|             ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx" | ||||
|         ).as_unit(unit) | ||||
|         tm.assert_index_equal(result, exp) | ||||
|  | ||||
|         # insertion of non-datetime should coerce to object index | ||||
|         result = idx.insert(1, "inserted") | ||||
|         expected = Index( | ||||
|             [ | ||||
|                 datetime(2000, 1, 4), | ||||
|                 "inserted", | ||||
|                 datetime(2000, 1, 1), | ||||
|                 datetime(2000, 1, 2), | ||||
|             ], | ||||
|             name="idx", | ||||
|         ) | ||||
|         assert not isinstance(result, DatetimeIndex) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|  | ||||
|     def test_insert2(self, unit): | ||||
|         idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit) | ||||
|  | ||||
|         # preserve freq | ||||
|         expected_0 = DatetimeIndex( | ||||
|             ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], | ||||
|             name="idx", | ||||
|             freq="ME", | ||||
|         ).as_unit(unit) | ||||
|         expected_3 = DatetimeIndex( | ||||
|             ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], | ||||
|             name="idx", | ||||
|             freq="ME", | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         # reset freq to None | ||||
|         expected_1_nofreq = DatetimeIndex( | ||||
|             ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"], | ||||
|             name="idx", | ||||
|             freq=None, | ||||
|         ).as_unit(unit) | ||||
|         expected_3_nofreq = DatetimeIndex( | ||||
|             ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], | ||||
|             name="idx", | ||||
|             freq=None, | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         cases = [ | ||||
|             (0, datetime(1999, 12, 31), expected_0), | ||||
|             (-3, datetime(1999, 12, 31), expected_0), | ||||
|             (3, datetime(2000, 4, 30), expected_3), | ||||
|             (1, datetime(2000, 1, 31), expected_1_nofreq), | ||||
|             (3, datetime(2000, 1, 2), expected_3_nofreq), | ||||
|         ] | ||||
|  | ||||
|         for n, d, expected in cases: | ||||
|             result = idx.insert(n, d) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.name == expected.name | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|     def test_insert3(self, unit): | ||||
|         idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit) | ||||
|  | ||||
|         # reset freq to None | ||||
|         result = idx.insert(3, datetime(2000, 1, 2)) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], | ||||
|             name="idx", | ||||
|             freq=None, | ||||
|         ).as_unit(unit) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.name == expected.name | ||||
|         assert result.freq is None | ||||
|  | ||||
|     def test_insert4(self, unit): | ||||
|         for tz in ["US/Pacific", "Asia/Singapore"]: | ||||
|             idx = date_range( | ||||
|                 "1/1/2000 09:00", periods=6, freq="h", tz=tz, name="idx", unit=unit | ||||
|             ) | ||||
|             # preserve freq | ||||
|             expected = date_range( | ||||
|                 "1/1/2000 09:00", periods=7, freq="h", tz=tz, name="idx", unit=unit | ||||
|             ) | ||||
|             for d in [ | ||||
|                 Timestamp("2000-01-01 15:00", tz=tz), | ||||
|                 pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)), | ||||
|             ]: | ||||
|                 result = idx.insert(6, d) | ||||
|                 tm.assert_index_equal(result, expected) | ||||
|                 assert result.name == expected.name | ||||
|                 assert result.freq == expected.freq | ||||
|                 assert result.tz == expected.tz | ||||
|  | ||||
|             expected = DatetimeIndex( | ||||
|                 [ | ||||
|                     "2000-01-01 09:00", | ||||
|                     "2000-01-01 10:00", | ||||
|                     "2000-01-01 11:00", | ||||
|                     "2000-01-01 12:00", | ||||
|                     "2000-01-01 13:00", | ||||
|                     "2000-01-01 14:00", | ||||
|                     "2000-01-01 10:00", | ||||
|                 ], | ||||
|                 name="idx", | ||||
|                 tz=tz, | ||||
|                 freq=None, | ||||
|             ).as_unit(unit) | ||||
|             # reset freq to None | ||||
|             for d in [ | ||||
|                 Timestamp("2000-01-01 10:00", tz=tz), | ||||
|                 pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)), | ||||
|             ]: | ||||
|                 result = idx.insert(6, d) | ||||
|                 tm.assert_index_equal(result, expected) | ||||
|                 assert result.name == expected.name | ||||
|                 assert result.tz == expected.tz | ||||
|                 assert result.freq is None | ||||
|  | ||||
|     # TODO: also changes DataFrame.__setitem__ with expansion | ||||
|     def test_insert_mismatched_tzawareness(self): | ||||
|         # see GH#7299 | ||||
|         idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") | ||||
|  | ||||
|         # mismatched tz-awareness | ||||
|         item = Timestamp("2000-01-04") | ||||
|         result = idx.insert(3, item) | ||||
|         expected = Index( | ||||
|             list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # mismatched tz-awareness | ||||
|         item = datetime(2000, 1, 4) | ||||
|         result = idx.insert(3, item) | ||||
|         expected = Index( | ||||
|             list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # TODO: also changes DataFrame.__setitem__ with expansion | ||||
|     def test_insert_mismatched_tz(self): | ||||
|         # see GH#7299 | ||||
|         # pre-2.0 with mismatched tzs we would cast to object | ||||
|         idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") | ||||
|  | ||||
|         # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) | ||||
|         item = Timestamp("2000-01-04", tz="US/Eastern") | ||||
|         result = idx.insert(3, item) | ||||
|         expected = Index( | ||||
|             list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), | ||||
|             name="idx", | ||||
|         ) | ||||
|         assert expected.dtype == idx.dtype | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) | ||||
|         result = idx.insert(3, item) | ||||
|         expected = Index( | ||||
|             list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), | ||||
|             name="idx", | ||||
|         ) | ||||
|         assert expected.dtype == idx.dtype | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)] | ||||
|     ) | ||||
|     def test_insert_mismatched_types_raises(self, tz_aware_fixture, item): | ||||
|         # GH#33703 dont cast these to dt64 | ||||
|         tz = tz_aware_fixture | ||||
|         dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz) | ||||
|  | ||||
|         result = dti.insert(1, item) | ||||
|  | ||||
|         if isinstance(item, np.ndarray): | ||||
|             assert item.item() == 0 | ||||
|             expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9) | ||||
|         else: | ||||
|             expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_castable_str(self, tz_aware_fixture): | ||||
|         # GH#33703 | ||||
|         tz = tz_aware_fixture | ||||
|         dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) | ||||
|  | ||||
|         value = "2019-11-05" | ||||
|         result = dti.insert(0, value) | ||||
|  | ||||
|         ts = Timestamp(value).tz_localize(tz) | ||||
|         expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_insert_non_castable_str(self, tz_aware_fixture): | ||||
|         # GH#33703 | ||||
|         tz = tz_aware_fixture | ||||
|         dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz) | ||||
|  | ||||
|         value = "foo" | ||||
|         result = dti.insert(0, value) | ||||
|  | ||||
|         expected = Index(["foo"] + list(dti), dtype=object, name=9) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,28 @@ | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_isocalendar_returns_correct_values_close_to_new_year_with_tz(): | ||||
|     # GH#6538: Check that DatetimeIndex and its TimeStamp elements | ||||
|     # return the same weekofyear accessor close to new year w/ tz | ||||
|     dates = ["2013/12/29", "2013/12/30", "2013/12/31"] | ||||
|     dates = DatetimeIndex(dates, tz="Europe/Brussels") | ||||
|     result = dates.isocalendar() | ||||
|     expected_data_frame = DataFrame( | ||||
|         [[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]], | ||||
|         columns=["year", "week", "day"], | ||||
|         index=dates, | ||||
|         dtype="UInt32", | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected_data_frame) | ||||
|  | ||||
|  | ||||
| def test_dti_timestamp_isocalendar_fields(): | ||||
|     idx = date_range("2020-01-01", periods=10) | ||||
|     expected = tuple(idx.isocalendar().iloc[-1].to_list()) | ||||
|     result = idx[-1].isocalendar() | ||||
|     assert result == expected | ||||
| @ -0,0 +1,47 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Period, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestMap: | ||||
|     def test_map(self): | ||||
|         rng = date_range("1/1/2000", periods=10) | ||||
|  | ||||
|         f = lambda x: x.strftime("%Y%m%d") | ||||
|         result = rng.map(f) | ||||
|         exp = Index([f(x) for x in rng]) | ||||
|         tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     def test_map_fallthrough(self, capsys): | ||||
|         # GH#22067, check we don't get warnings about silently ignored errors | ||||
|         dti = date_range("2017-01-01", "2018-01-01", freq="B") | ||||
|  | ||||
|         dti.map(lambda x: Period(year=x.year, month=x.month, freq="M")) | ||||
|  | ||||
|         captured = capsys.readouterr() | ||||
|         assert captured.err == "" | ||||
|  | ||||
|     def test_map_bug_1677(self): | ||||
|         index = DatetimeIndex(["2012-04-25 09:30:00.393000"]) | ||||
|         f = index.asof | ||||
|  | ||||
|         result = index.map(f) | ||||
|         expected = Index([f(index[0])]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("name", [None, "name"]) | ||||
|     def test_index_map(self, name): | ||||
|         # see GH#20990 | ||||
|         count = 6 | ||||
|         index = date_range("2018-01-01", periods=count, freq="ME", name=name).map( | ||||
|             lambda x: (x.year, x.month) | ||||
|         ) | ||||
|         exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name]) | ||||
|         tm.assert_index_equal(index, exp_index) | ||||
| @ -0,0 +1,95 @@ | ||||
| from dateutil.tz import tzlocal | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestNormalize: | ||||
|     def test_normalize(self): | ||||
|         rng = date_range("1/1/2000 9:30", periods=10, freq="D") | ||||
|  | ||||
|         result = rng.normalize() | ||||
|         expected = date_range("1/1/2000", periods=10, freq="D") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype( | ||||
|             "datetime64[ns]" | ||||
|         ) | ||||
|         rng_ns = DatetimeIndex(arr_ns) | ||||
|         rng_ns_normalized = rng_ns.normalize() | ||||
|  | ||||
|         arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype( | ||||
|             "datetime64[ns]" | ||||
|         ) | ||||
|         expected = DatetimeIndex(arr_ns) | ||||
|         tm.assert_index_equal(rng_ns_normalized, expected) | ||||
|  | ||||
|         assert result.is_normalized | ||||
|         assert not rng.is_normalized | ||||
|  | ||||
|     def test_normalize_nat(self): | ||||
|         dti = DatetimeIndex([NaT, Timestamp("2018-01-01 01:00:00")]) | ||||
|         result = dti.normalize() | ||||
|         expected = DatetimeIndex([NaT, Timestamp("2018-01-01")]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_normalize_tz(self): | ||||
|         rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern") | ||||
|  | ||||
|         result = rng.normalize()  # does not preserve freq | ||||
|         expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern") | ||||
|         tm.assert_index_equal(result, expected._with_freq(None)) | ||||
|  | ||||
|         assert result.is_normalized | ||||
|         assert not rng.is_normalized | ||||
|  | ||||
|         rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC") | ||||
|  | ||||
|         result = rng.normalize() | ||||
|         expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         assert result.is_normalized | ||||
|         assert not rng.is_normalized | ||||
|  | ||||
|         rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) | ||||
|         result = rng.normalize()  # does not preserve freq | ||||
|         expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) | ||||
|         tm.assert_index_equal(result, expected._with_freq(None)) | ||||
|  | ||||
|         assert result.is_normalized | ||||
|         assert not rng.is_normalized | ||||
|  | ||||
|     @td.skip_if_windows | ||||
|     @pytest.mark.parametrize( | ||||
|         "timezone", | ||||
|         [ | ||||
|             "US/Pacific", | ||||
|             "US/Eastern", | ||||
|             "UTC", | ||||
|             "Asia/Kolkata", | ||||
|             "Asia/Shanghai", | ||||
|             "Australia/Canberra", | ||||
|         ], | ||||
|     ) | ||||
|     def test_normalize_tz_local(self, timezone): | ||||
|         # GH#13459 | ||||
|         with tm.set_timezone(timezone): | ||||
|             rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) | ||||
|  | ||||
|             result = rng.normalize() | ||||
|             expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) | ||||
|             expected = expected._with_freq(None) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|             assert result.is_normalized | ||||
|             assert not rng.is_normalized | ||||
| @ -0,0 +1,83 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestRepeat: | ||||
|     def test_repeat_range(self, tz_naive_fixture): | ||||
|         rng = date_range("1/1/2000", "1/1/2001") | ||||
|  | ||||
|         result = rng.repeat(5) | ||||
|         assert result.freq is None | ||||
|         assert len(result) == 5 * len(rng) | ||||
|  | ||||
|     def test_repeat_range2(self, tz_naive_fixture, unit): | ||||
|         tz = tz_naive_fixture | ||||
|         index = date_range("2001-01-01", periods=2, freq="D", tz=tz, unit=unit) | ||||
|         exp = DatetimeIndex( | ||||
|             ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz | ||||
|         ).as_unit(unit) | ||||
|         for res in [index.repeat(2), np.repeat(index, 2)]: | ||||
|             tm.assert_index_equal(res, exp) | ||||
|             assert res.freq is None | ||||
|  | ||||
|     def test_repeat_range3(self, tz_naive_fixture, unit): | ||||
|         tz = tz_naive_fixture | ||||
|         index = date_range("2001-01-01", periods=2, freq="2D", tz=tz, unit=unit) | ||||
|         exp = DatetimeIndex( | ||||
|             ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz | ||||
|         ).as_unit(unit) | ||||
|         for res in [index.repeat(2), np.repeat(index, 2)]: | ||||
|             tm.assert_index_equal(res, exp) | ||||
|             assert res.freq is None | ||||
|  | ||||
|     def test_repeat_range4(self, tz_naive_fixture, unit): | ||||
|         tz = tz_naive_fixture | ||||
|         index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz).as_unit(unit) | ||||
|         exp = DatetimeIndex( | ||||
|             [ | ||||
|                 "2001-01-01", | ||||
|                 "2001-01-01", | ||||
|                 "2001-01-01", | ||||
|                 "NaT", | ||||
|                 "NaT", | ||||
|                 "NaT", | ||||
|                 "2003-01-01", | ||||
|                 "2003-01-01", | ||||
|                 "2003-01-01", | ||||
|             ], | ||||
|             tz=tz, | ||||
|         ).as_unit(unit) | ||||
|         for res in [index.repeat(3), np.repeat(index, 3)]: | ||||
|             tm.assert_index_equal(res, exp) | ||||
|             assert res.freq is None | ||||
|  | ||||
|     def test_repeat(self, tz_naive_fixture, unit): | ||||
|         tz = tz_naive_fixture | ||||
|         reps = 2 | ||||
|         msg = "the 'axis' parameter is not supported" | ||||
|  | ||||
|         rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz, unit=unit) | ||||
|  | ||||
|         expected_rng = DatetimeIndex( | ||||
|             [ | ||||
|                 Timestamp("2016-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:30:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:30:00", tz=tz), | ||||
|             ] | ||||
|         ).as_unit(unit) | ||||
|  | ||||
|         res = rng.repeat(reps) | ||||
|         tm.assert_index_equal(res, expected_rng) | ||||
|         assert res.freq is None | ||||
|  | ||||
|         tm.assert_index_equal(np.repeat(rng, reps), expected_rng) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.repeat(rng, reps, axis=1) | ||||
| @ -0,0 +1,31 @@ | ||||
| from dateutil.tz import tzlocal | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import IS64 | ||||
|  | ||||
| from pandas import date_range | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "freq,expected", | ||||
|     [ | ||||
|         ("YE", "day"), | ||||
|         ("QE", "day"), | ||||
|         ("ME", "day"), | ||||
|         ("D", "day"), | ||||
|         ("h", "hour"), | ||||
|         ("min", "minute"), | ||||
|         ("s", "second"), | ||||
|         ("ms", "millisecond"), | ||||
|         ("us", "microsecond"), | ||||
|     ], | ||||
| ) | ||||
| def test_dti_resolution(request, tz_naive_fixture, freq, expected): | ||||
|     tz = tz_naive_fixture | ||||
|     if freq == "YE" and not IS64 and isinstance(tz, tzlocal): | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038") | ||||
|         ) | ||||
|  | ||||
|     idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) | ||||
|     assert idx.resolution == expected | ||||
| @ -0,0 +1,221 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import to_offset | ||||
| from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexRound: | ||||
|     def test_round_daily(self): | ||||
|         dti = date_range("20130101 09:10:11", periods=5) | ||||
|         result = dti.round("D") | ||||
|         expected = date_range("20130101", periods=5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         dti = dti.tz_localize("UTC").tz_convert("US/Eastern") | ||||
|         result = dti.round("D") | ||||
|         expected = date_range("20130101", periods=5).tz_localize("US/Eastern") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = dti.round("s") | ||||
|         tm.assert_index_equal(result, dti) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, error_msg", | ||||
|         [ | ||||
|             ("YE", "<YearEnd: month=12> is a non-fixed frequency"), | ||||
|             ("ME", "<MonthEnd> is a non-fixed frequency"), | ||||
|             ("foobar", "Invalid frequency: foobar"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_round_invalid(self, freq, error_msg): | ||||
|         dti = date_range("20130101 09:10:11", periods=5) | ||||
|         dti = dti.tz_localize("UTC").tz_convert("US/Eastern") | ||||
|         with pytest.raises(ValueError, match=error_msg): | ||||
|             dti.round(freq) | ||||
|  | ||||
|     def test_round(self, tz_naive_fixture, unit): | ||||
|         tz = tz_naive_fixture | ||||
|         rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz, unit=unit) | ||||
|         elt = rng[1] | ||||
|  | ||||
|         expected_rng = DatetimeIndex( | ||||
|             [ | ||||
|                 Timestamp("2016-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 01:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 02:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 02:00:00", tz=tz), | ||||
|             ] | ||||
|         ).as_unit(unit) | ||||
|         expected_elt = expected_rng[1] | ||||
|  | ||||
|         result = rng.round(freq="h") | ||||
|         tm.assert_index_equal(result, expected_rng) | ||||
|         assert elt.round(freq="h") == expected_elt | ||||
|  | ||||
|         msg = INVALID_FREQ_ERR_MSG | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             rng.round(freq="foo") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             elt.round(freq="foo") | ||||
|  | ||||
|         msg = "<MonthEnd> is a non-fixed frequency" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             rng.round(freq="ME") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             elt.round(freq="ME") | ||||
|  | ||||
|     def test_round2(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         # GH#14440 & GH#15578 | ||||
|         index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz).as_unit("ns") | ||||
|         result = index.round("ms") | ||||
|         expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz).as_unit("ns") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         for freq in ["us", "ns"]: | ||||
|             tm.assert_index_equal(index, index.round(freq)) | ||||
|  | ||||
|     def test_round3(self, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz).as_unit("ns") | ||||
|         result = index.round("ms") | ||||
|         expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz).as_unit("ns") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_round4(self, tz_naive_fixture): | ||||
|         index = DatetimeIndex(["2016-10-17 12:00:00.001501031"], dtype="M8[ns]") | ||||
|         result = index.round("10ns") | ||||
|         expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"], dtype="M8[ns]") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         ts = "2016-10-17 12:00:00.001501031" | ||||
|         dti = DatetimeIndex([ts], dtype="M8[ns]") | ||||
|         with tm.assert_produces_warning(False): | ||||
|             dti.round("1010ns") | ||||
|  | ||||
|     def test_no_rounding_occurs(self, tz_naive_fixture): | ||||
|         # GH 21262 | ||||
|         tz = tz_naive_fixture | ||||
|         rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz) | ||||
|  | ||||
|         expected_rng = DatetimeIndex( | ||||
|             [ | ||||
|                 Timestamp("2016-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:02:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:04:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:06:00", tz=tz), | ||||
|                 Timestamp("2016-01-01 00:08:00", tz=tz), | ||||
|             ] | ||||
|         ).as_unit("ns") | ||||
|  | ||||
|         result = rng.round(freq="2min") | ||||
|         tm.assert_index_equal(result, expected_rng) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "test_input, rounder, freq, expected", | ||||
|         [ | ||||
|             (["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]), | ||||
|             (["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]), | ||||
|             ( | ||||
|                 ["2117-01-01 00:00:45.000000012"], | ||||
|                 "floor", | ||||
|                 "10ns", | ||||
|                 ["2117-01-01 00:00:45.000000010"], | ||||
|             ), | ||||
|             ( | ||||
|                 ["1823-01-01 00:00:01.000000012"], | ||||
|                 "ceil", | ||||
|                 "10ns", | ||||
|                 ["1823-01-01 00:00:01.000000020"], | ||||
|             ), | ||||
|             (["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]), | ||||
|             (["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]), | ||||
|             (["2018-01-01 00:15:00"], "ceil", "15min", ["2018-01-01 00:15:00"]), | ||||
|             (["2018-01-01 00:15:00"], "floor", "15min", ["2018-01-01 00:15:00"]), | ||||
|             (["1823-01-01 03:00:00"], "ceil", "3h", ["1823-01-01 03:00:00"]), | ||||
|             (["1823-01-01 03:00:00"], "floor", "3h", ["1823-01-01 03:00:00"]), | ||||
|             ( | ||||
|                 ("NaT", "1823-01-01 00:00:01"), | ||||
|                 "floor", | ||||
|                 "1s", | ||||
|                 ("NaT", "1823-01-01 00:00:01"), | ||||
|             ), | ||||
|             ( | ||||
|                 ("NaT", "1823-01-01 00:00:01"), | ||||
|                 "ceil", | ||||
|                 "1s", | ||||
|                 ("NaT", "1823-01-01 00:00:01"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_ceil_floor_edge(self, test_input, rounder, freq, expected): | ||||
|         dt = DatetimeIndex(list(test_input)) | ||||
|         func = getattr(dt, rounder) | ||||
|         result = func(freq) | ||||
|         expected = DatetimeIndex(list(expected)) | ||||
|         assert expected.equals(result) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, index_freq, periods", | ||||
|         [("2018-01-01", "12h", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "round_freq", | ||||
|         [ | ||||
|             "2ns", | ||||
|             "3ns", | ||||
|             "4ns", | ||||
|             "5ns", | ||||
|             "6ns", | ||||
|             "7ns", | ||||
|             "250ns", | ||||
|             "500ns", | ||||
|             "750ns", | ||||
|             "1us", | ||||
|             "19us", | ||||
|             "250us", | ||||
|             "500us", | ||||
|             "750us", | ||||
|             "1s", | ||||
|             "2s", | ||||
|             "3s", | ||||
|             "12h", | ||||
|             "1D", | ||||
|         ], | ||||
|     ) | ||||
|     def test_round_int64(self, start, index_freq, periods, round_freq): | ||||
|         dt = date_range(start=start, freq=index_freq, periods=periods) | ||||
|         unit = to_offset(round_freq).nanos | ||||
|  | ||||
|         # test floor | ||||
|         result = dt.floor(round_freq) | ||||
|         diff = dt.asi8 - result.asi8 | ||||
|         mod = result.asi8 % unit | ||||
|         assert (mod == 0).all(), f"floor not a {round_freq} multiple" | ||||
|         assert (0 <= diff).all() and (diff < unit).all(), "floor error" | ||||
|  | ||||
|         # test ceil | ||||
|         result = dt.ceil(round_freq) | ||||
|         diff = result.asi8 - dt.asi8 | ||||
|         mod = result.asi8 % unit | ||||
|         assert (mod == 0).all(), f"ceil not a {round_freq} multiple" | ||||
|         assert (0 <= diff).all() and (diff < unit).all(), "ceil error" | ||||
|  | ||||
|         # test round | ||||
|         result = dt.round(round_freq) | ||||
|         diff = abs(result.asi8 - dt.asi8) | ||||
|         mod = result.asi8 % unit | ||||
|         assert (mod == 0).all(), f"round not a {round_freq} multiple" | ||||
|         assert (diff <= unit // 2).all(), "round error" | ||||
|         if unit % 2 == 0: | ||||
|             assert ( | ||||
|                 result.asi8[diff == unit // 2] % 2 == 0 | ||||
|             ).all(), "round half to even error" | ||||
| @ -0,0 +1,169 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas.errors import NullFrequencyError | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexShift: | ||||
|     # ------------------------------------------------------------- | ||||
|     # DatetimeIndex.shift is used in integer addition | ||||
|  | ||||
|     def test_dti_shift_tzaware(self, tz_naive_fixture, unit): | ||||
|         # GH#9903 | ||||
|         tz = tz_naive_fixture | ||||
|         idx = DatetimeIndex([], name="xxx", tz=tz).as_unit(unit) | ||||
|         tm.assert_index_equal(idx.shift(0, freq="h"), idx) | ||||
|         tm.assert_index_equal(idx.shift(3, freq="h"), idx) | ||||
|  | ||||
|         idx = DatetimeIndex( | ||||
|             ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], | ||||
|             name="xxx", | ||||
|             tz=tz, | ||||
|             freq="h", | ||||
|         ).as_unit(unit) | ||||
|         tm.assert_index_equal(idx.shift(0, freq="h"), idx) | ||||
|         exp = DatetimeIndex( | ||||
|             ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], | ||||
|             name="xxx", | ||||
|             tz=tz, | ||||
|             freq="h", | ||||
|         ).as_unit(unit) | ||||
|         tm.assert_index_equal(idx.shift(3, freq="h"), exp) | ||||
|         exp = DatetimeIndex( | ||||
|             ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], | ||||
|             name="xxx", | ||||
|             tz=tz, | ||||
|             freq="h", | ||||
|         ).as_unit(unit) | ||||
|         tm.assert_index_equal(idx.shift(-3, freq="h"), exp) | ||||
|  | ||||
|     def test_dti_shift_freqs(self, unit): | ||||
|         # test shift for DatetimeIndex and non DatetimeIndex | ||||
|         # GH#8083 | ||||
|         drange = date_range("20130101", periods=5, unit=unit) | ||||
|         result = drange.shift(1) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], | ||||
|             dtype=f"M8[{unit}]", | ||||
|             freq="D", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = drange.shift(-1) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], | ||||
|             dtype=f"M8[{unit}]", | ||||
|             freq="D", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = drange.shift(3, freq="2D") | ||||
|         expected = DatetimeIndex( | ||||
|             ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], | ||||
|             dtype=f"M8[{unit}]", | ||||
|             freq="D", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_dti_shift_int(self, unit): | ||||
|         rng = date_range("1/1/2000", periods=20, unit=unit) | ||||
|  | ||||
|         result = rng + 5 * rng.freq | ||||
|         expected = rng.shift(5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = rng - 5 * rng.freq | ||||
|         expected = rng.shift(-5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_dti_shift_no_freq(self, unit): | ||||
|         # GH#19147 | ||||
|         dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None).as_unit(unit) | ||||
|         with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): | ||||
|             dti.shift(2) | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_shift_localized(self, tzstr, unit): | ||||
|         dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI", unit=unit) | ||||
|         dr_tz = dr.tz_localize(tzstr) | ||||
|  | ||||
|         result = dr_tz.shift(1, "10min") | ||||
|         assert result.tz == dr_tz.tz | ||||
|  | ||||
|     def test_dti_shift_across_dst(self, unit): | ||||
|         # GH 8616 | ||||
|         idx = date_range( | ||||
|             "2013-11-03", tz="America/Chicago", periods=7, freq="h", unit=unit | ||||
|         ) | ||||
|         ser = Series(index=idx[:-1], dtype=object) | ||||
|         result = ser.shift(freq="h") | ||||
|         expected = Series(index=idx[1:], dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "shift, result_time", | ||||
|         [ | ||||
|             [0, "2014-11-14 00:00:00"], | ||||
|             [-1, "2014-11-13 23:00:00"], | ||||
|             [1, "2014-11-14 01:00:00"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_shift_near_midnight(self, shift, result_time, unit): | ||||
|         # GH 8616 | ||||
|         dt = datetime(2014, 11, 14, 0) | ||||
|         dt_est = pytz.timezone("EST").localize(dt) | ||||
|         idx = DatetimeIndex([dt_est]).as_unit(unit) | ||||
|         ser = Series(data=[1], index=idx) | ||||
|         result = ser.shift(shift, freq="h") | ||||
|         exp_index = DatetimeIndex([result_time], tz="EST").as_unit(unit) | ||||
|         expected = Series(1, index=exp_index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_shift_periods(self, unit): | ||||
|         # GH#22458 : argument 'n' was deprecated in favor of 'periods' | ||||
|         idx = date_range(start=START, end=END, periods=3, unit=unit) | ||||
|         tm.assert_index_equal(idx.shift(periods=0), idx) | ||||
|         tm.assert_index_equal(idx.shift(0), idx) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_shift_bday(self, freq, unit): | ||||
|         rng = date_range(START, END, freq=freq, unit=unit) | ||||
|         shifted = rng.shift(5) | ||||
|         assert shifted[0] == rng[5] | ||||
|         assert shifted.freq == rng.freq | ||||
|  | ||||
|         shifted = rng.shift(-5) | ||||
|         assert shifted[5] == rng[0] | ||||
|         assert shifted.freq == rng.freq | ||||
|  | ||||
|         shifted = rng.shift(0) | ||||
|         assert shifted[0] == rng[0] | ||||
|         assert shifted.freq == rng.freq | ||||
|  | ||||
|     def test_shift_bmonth(self, unit): | ||||
|         rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit) | ||||
|         shifted = rng.shift(1, freq=pd.offsets.BDay()) | ||||
|         assert shifted[0] == rng[0] + pd.offsets.BDay() | ||||
|  | ||||
|         rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit) | ||||
|         with tm.assert_produces_warning(pd.errors.PerformanceWarning): | ||||
|             shifted = rng.shift(1, freq=pd.offsets.CDay()) | ||||
|             assert shifted[0] == rng[0] + pd.offsets.CDay() | ||||
|  | ||||
|     def test_shift_empty(self, unit): | ||||
|         # GH#14811 | ||||
|         dti = date_range(start="2016-10-21", end="2016-10-21", freq="BME", unit=unit) | ||||
|         result = dti.shift(1) | ||||
|         tm.assert_index_equal(result, dti) | ||||
| @ -0,0 +1,47 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"]) | ||||
| @pytest.mark.parametrize("name", [None, "my_dti"]) | ||||
| @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) | ||||
| def test_dti_snap(name, tz, unit): | ||||
|     dti = DatetimeIndex( | ||||
|         [ | ||||
|             "1/1/2002", | ||||
|             "1/2/2002", | ||||
|             "1/3/2002", | ||||
|             "1/4/2002", | ||||
|             "1/5/2002", | ||||
|             "1/6/2002", | ||||
|             "1/7/2002", | ||||
|         ], | ||||
|         name=name, | ||||
|         tz=tz, | ||||
|         freq="D", | ||||
|     ) | ||||
|     dti = dti.as_unit(unit) | ||||
|  | ||||
|     result = dti.snap(freq="W-MON") | ||||
|     expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon") | ||||
|     expected = expected.repeat([3, 4]) | ||||
|     expected = expected.as_unit(unit) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     assert result.tz == expected.tz | ||||
|     assert result.freq is None | ||||
|     assert expected.freq is None | ||||
|  | ||||
|     result = dti.snap(freq="B") | ||||
|  | ||||
|     expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b") | ||||
|     expected = expected.repeat([1, 1, 1, 2, 2]) | ||||
|     expected = expected.as_unit(unit) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     assert result.tz == expected.tz | ||||
|     assert result.freq is None | ||||
|     assert expected.freq is None | ||||
| @ -0,0 +1,28 @@ | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestToFrame: | ||||
|     def test_to_frame_datetime_tz(self): | ||||
|         # GH#25809 | ||||
|         idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") | ||||
|         result = idx.to_frame() | ||||
|         expected = DataFrame(idx, index=idx) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_to_frame_respects_none_name(self): | ||||
|         # GH#44212 if we explicitly pass name=None, then that should be respected, | ||||
|         #  not changed to 0 | ||||
|         # GH-45448 this is first deprecated to only change in the future | ||||
|         idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") | ||||
|         result = idx.to_frame(name=None) | ||||
|         exp_idx = Index([None], dtype=object) | ||||
|         tm.assert_index_equal(exp_idx, result.columns) | ||||
|  | ||||
|         result = idx.rename("foo").to_frame(name=None) | ||||
|         exp_idx = Index([None], dtype=object) | ||||
|         tm.assert_index_equal(exp_idx, result.columns) | ||||
| @ -0,0 +1,45 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDateTimeIndexToJulianDate: | ||||
|     def test_1700(self): | ||||
|         dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D") | ||||
|         r1 = Index([x.to_julian_date() for x in dr]) | ||||
|         r2 = dr.to_julian_date() | ||||
|         assert isinstance(r2, Index) and r2.dtype == np.float64 | ||||
|         tm.assert_index_equal(r1, r2) | ||||
|  | ||||
|     def test_2000(self): | ||||
|         dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D") | ||||
|         r1 = Index([x.to_julian_date() for x in dr]) | ||||
|         r2 = dr.to_julian_date() | ||||
|         assert isinstance(r2, Index) and r2.dtype == np.float64 | ||||
|         tm.assert_index_equal(r1, r2) | ||||
|  | ||||
|     def test_hour(self): | ||||
|         dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="h") | ||||
|         r1 = Index([x.to_julian_date() for x in dr]) | ||||
|         r2 = dr.to_julian_date() | ||||
|         assert isinstance(r2, Index) and r2.dtype == np.float64 | ||||
|         tm.assert_index_equal(r1, r2) | ||||
|  | ||||
|     def test_minute(self): | ||||
|         dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="min") | ||||
|         r1 = Index([x.to_julian_date() for x in dr]) | ||||
|         r2 = dr.to_julian_date() | ||||
|         assert isinstance(r2, Index) and r2.dtype == np.float64 | ||||
|         tm.assert_index_equal(r1, r2) | ||||
|  | ||||
|     def test_second(self): | ||||
|         dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="s") | ||||
|         r1 = Index([x.to_julian_date() for x in dr]) | ||||
|         r2 = dr.to_julian_date() | ||||
|         assert isinstance(r2, Index) and r2.dtype == np.float64 | ||||
|         tm.assert_index_equal(r1, r2) | ||||
| @ -0,0 +1,225 @@ | ||||
| import dateutil.tz | ||||
| from dateutil.tz import tzlocal | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas._libs.tslibs.ccalendar import MONTHS | ||||
| from pandas._libs.tslibs.offsets import MonthEnd | ||||
| from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Period, | ||||
|     PeriodIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestToPeriod: | ||||
|     def test_dti_to_period(self): | ||||
|         dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") | ||||
|         pi1 = dti.to_period() | ||||
|         pi2 = dti.to_period(freq="D") | ||||
|         pi3 = dti.to_period(freq="3D") | ||||
|  | ||||
|         assert pi1[0] == Period("Jan 2005", freq="M") | ||||
|         assert pi2[0] == Period("1/31/2005", freq="D") | ||||
|         assert pi3[0] == Period("1/31/2005", freq="3D") | ||||
|  | ||||
|         assert pi1[-1] == Period("Nov 2005", freq="M") | ||||
|         assert pi2[-1] == Period("11/30/2005", freq="D") | ||||
|         assert pi3[-1], Period("11/30/2005", freq="3D") | ||||
|  | ||||
|         tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M")) | ||||
|         tm.assert_index_equal( | ||||
|             pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D") | ||||
|         ) | ||||
|         tm.assert_index_equal( | ||||
|             pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D") | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize("month", MONTHS) | ||||
|     def test_to_period_quarterly(self, month): | ||||
|         # make sure we can make the round trip | ||||
|         freq = f"Q-{month}" | ||||
|         rng = period_range("1989Q3", "1991Q3", freq=freq) | ||||
|         stamps = rng.to_timestamp() | ||||
|         result = stamps.to_period(freq) | ||||
|         tm.assert_index_equal(rng, result) | ||||
|  | ||||
|     @pytest.mark.parametrize("off", ["BQE", "QS", "BQS"]) | ||||
|     def test_to_period_quarterlyish(self, off): | ||||
|         rng = date_range("01-Jan-2012", periods=8, freq=off) | ||||
|         prng = rng.to_period() | ||||
|         assert prng.freq == "QE-DEC" | ||||
|  | ||||
|     @pytest.mark.parametrize("off", ["BYE", "YS", "BYS"]) | ||||
|     def test_to_period_annualish(self, off): | ||||
|         rng = date_range("01-Jan-2012", periods=8, freq=off) | ||||
|         prng = rng.to_period() | ||||
|         assert prng.freq == "YE-DEC" | ||||
|  | ||||
|     def test_to_period_monthish(self): | ||||
|         offsets = ["MS", "BME"] | ||||
|         for off in offsets: | ||||
|             rng = date_range("01-Jan-2012", periods=8, freq=off) | ||||
|             prng = rng.to_period() | ||||
|             assert prng.freqstr == "M" | ||||
|  | ||||
|         rng = date_range("01-Jan-2012", periods=8, freq="ME") | ||||
|         prng = rng.to_period() | ||||
|         assert prng.freqstr == "M" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): | ||||
|             date_range("01-Jan-2012", periods=8, freq="EOM") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq_offset, freq_period", | ||||
|         [ | ||||
|             ("2ME", "2M"), | ||||
|             (MonthEnd(2), MonthEnd(2)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_to_period_2monthish(self, freq_offset, freq_period): | ||||
|         dti = date_range("2020-01-01", periods=3, freq=freq_offset) | ||||
|         pi = dti.to_period() | ||||
|  | ||||
|         tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq_period)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, freq_depr", | ||||
|         [ | ||||
|             ("2ME", "2M"), | ||||
|             ("2QE", "2Q"), | ||||
|             ("2QE-SEP", "2Q-SEP"), | ||||
|             ("1YE", "1Y"), | ||||
|             ("2YE-MAR", "2Y-MAR"), | ||||
|             ("1YE", "1A"), | ||||
|             ("2YE-MAR", "2A-MAR"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_period_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): | ||||
|         # GH#9586 | ||||
|         msg = f"'{freq_depr[1:]}' is deprecated and will be removed " | ||||
|         f"in a future version, please use '{freq[1:]}' instead." | ||||
|  | ||||
|         rng = date_range("01-Jan-2012", periods=8, freq=freq) | ||||
|         prng = rng.to_period() | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert prng.freq == freq_depr | ||||
|  | ||||
|     def test_to_period_infer(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33358 | ||||
|         rng = date_range( | ||||
|             start="2019-12-22 06:40:00+00:00", | ||||
|             end="2019-12-22 08:45:00+00:00", | ||||
|             freq="5min", | ||||
|         ) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             pi1 = rng.to_period("5min") | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             pi2 = rng.to_period() | ||||
|  | ||||
|         tm.assert_index_equal(pi1, pi2) | ||||
|  | ||||
|     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") | ||||
|     def test_period_dt64_round_trip(self): | ||||
|         dti = date_range("1/1/2000", "1/7/2002", freq="B") | ||||
|         pi = dti.to_period() | ||||
|         tm.assert_index_equal(pi.to_timestamp(), dti) | ||||
|  | ||||
|         dti = date_range("1/1/2000", "1/7/2002", freq="B") | ||||
|         pi = dti.to_period(freq="h") | ||||
|         tm.assert_index_equal(pi.to_timestamp(), dti) | ||||
|  | ||||
|     def test_to_period_millisecond(self): | ||||
|         index = DatetimeIndex( | ||||
|             [ | ||||
|                 Timestamp("2007-01-01 10:11:12.123456Z"), | ||||
|                 Timestamp("2007-01-01 10:11:13.789123Z"), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             # warning that timezone info will be lost | ||||
|             period = index.to_period(freq="ms") | ||||
|         assert 2 == len(period) | ||||
|         assert period[0] == Period("2007-01-01 10:11:12.123Z", "ms") | ||||
|         assert period[1] == Period("2007-01-01 10:11:13.789Z", "ms") | ||||
|  | ||||
|     def test_to_period_microsecond(self): | ||||
|         index = DatetimeIndex( | ||||
|             [ | ||||
|                 Timestamp("2007-01-01 10:11:12.123456Z"), | ||||
|                 Timestamp("2007-01-01 10:11:13.789123Z"), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             # warning that timezone info will be lost | ||||
|             period = index.to_period(freq="us") | ||||
|         assert 2 == len(period) | ||||
|         assert period[0] == Period("2007-01-01 10:11:12.123456Z", "us") | ||||
|         assert period[1] == Period("2007-01-01 10:11:13.789123Z", "us") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", | ||||
|         ["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()], | ||||
|     ) | ||||
|     def test_to_period_tz(self, tz): | ||||
|         ts = date_range("1/1/2000", "2/1/2000", tz=tz) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             # GH#21333 warning that timezone info will be lost | ||||
|             # filter warning about freq deprecation | ||||
|  | ||||
|             result = ts.to_period()[0] | ||||
|             expected = ts[0].to_period(ts.freq) | ||||
|  | ||||
|         assert result == expected | ||||
|  | ||||
|         expected = date_range("1/1/2000", "2/1/2000").to_period() | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             # GH#21333 warning that timezone info will be lost | ||||
|             result = ts.to_period(ts.freq) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"]) | ||||
|     def test_to_period_tz_utc_offset_consistency(self, tz): | ||||
|         # GH#22905 | ||||
|         ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1") | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             result = ts.to_period()[0] | ||||
|             expected = ts[0].to_period(ts.freq) | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_to_period_nofreq(self): | ||||
|         idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"]) | ||||
|         msg = "You must pass a freq argument as current index has none." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.to_period() | ||||
|  | ||||
|         idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer") | ||||
|         assert idx.freqstr == "D" | ||||
|         expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D") | ||||
|         tm.assert_index_equal(idx.to_period(), expected) | ||||
|  | ||||
|         # GH#7606 | ||||
|         idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) | ||||
|         assert idx.freqstr is None | ||||
|         tm.assert_index_equal(idx.to_period(), expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["2BMS", "1SME-15"]) | ||||
|     def test_to_period_offsets_not_supported(self, freq): | ||||
|         # GH#56243 | ||||
|         msg = f"{freq[1:]} is not supported as period frequency" | ||||
|         ts = date_range("1/1/2012", periods=4, freq=freq) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.to_period() | ||||
| @ -0,0 +1,51 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timezone, | ||||
| ) | ||||
|  | ||||
| import dateutil.parser | ||||
| import dateutil.tz | ||||
| from dateutil.tz import tzlocal | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.indexes.datetimes.test_timezones import FixedOffset | ||||
|  | ||||
| fixed_off = FixedOffset(-420, "-07:00") | ||||
|  | ||||
|  | ||||
| class TestToPyDatetime: | ||||
|     def test_dti_to_pydatetime(self): | ||||
|         dt = dateutil.parser.parse("2012-06-13T01:39:00Z") | ||||
|         dt = dt.replace(tzinfo=tzlocal()) | ||||
|  | ||||
|         arr = np.array([dt], dtype=object) | ||||
|  | ||||
|         result = to_datetime(arr, utc=True) | ||||
|         assert result.tz is timezone.utc | ||||
|  | ||||
|         rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal()) | ||||
|         arr = rng.to_pydatetime() | ||||
|         result = to_datetime(arr, utc=True) | ||||
|         assert result.tz is timezone.utc | ||||
|  | ||||
|     def test_dti_to_pydatetime_fizedtz(self): | ||||
|         dates = np.array( | ||||
|             [ | ||||
|                 datetime(2000, 1, 1, tzinfo=fixed_off), | ||||
|                 datetime(2000, 1, 2, tzinfo=fixed_off), | ||||
|                 datetime(2000, 1, 3, tzinfo=fixed_off), | ||||
|             ] | ||||
|         ) | ||||
|         dti = DatetimeIndex(dates) | ||||
|  | ||||
|         result = dti.to_pydatetime() | ||||
|         tm.assert_numpy_array_equal(dates, result) | ||||
|  | ||||
|         result = dti._mpl_repr() | ||||
|         tm.assert_numpy_array_equal(dates, result) | ||||
| @ -0,0 +1,18 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestToSeries: | ||||
|     def test_to_series(self): | ||||
|         naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") | ||||
|         idx = naive.tz_localize("US/Pacific") | ||||
|  | ||||
|         expected = Series(np.array(idx.tolist(), dtype="object"), name="B") | ||||
|         result = idx.to_series(index=[0, 1]) | ||||
|         assert expected.dtype == idx.dtype | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,283 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import dateutil.tz | ||||
| from dateutil.tz import gettz | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas._libs.tslibs import timezones | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     offsets, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestTZConvert: | ||||
|     def test_tz_convert_nat(self): | ||||
|         # GH#5546 | ||||
|         dates = [NaT] | ||||
|         idx = DatetimeIndex(dates) | ||||
|         idx = idx.tz_localize("US/Pacific") | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) | ||||
|         idx = idx.tz_convert("US/Eastern") | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern")) | ||||
|         idx = idx.tz_convert("UTC") | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC")) | ||||
|  | ||||
|         dates = ["2010-12-01 00:00", "2010-12-02 00:00", NaT] | ||||
|         idx = DatetimeIndex(dates) | ||||
|         idx = idx.tz_localize("US/Pacific") | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) | ||||
|         idx = idx.tz_convert("US/Eastern") | ||||
|         expected = ["2010-12-01 03:00", "2010-12-02 03:00", NaT] | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) | ||||
|  | ||||
|         idx = idx + offsets.Hour(5) | ||||
|         expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT] | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) | ||||
|         idx = idx.tz_convert("US/Pacific") | ||||
|         expected = ["2010-12-01 05:00", "2010-12-02 05:00", NaT] | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) | ||||
|  | ||||
|         idx = idx + np.timedelta64(3, "h") | ||||
|         expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT] | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) | ||||
|  | ||||
|         idx = idx.tz_convert("US/Eastern") | ||||
|         expected = ["2010-12-01 11:00", "2010-12-02 11:00", NaT] | ||||
|         tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) | ||||
|  | ||||
|     @pytest.mark.parametrize("prefix", ["", "dateutil/"]) | ||||
|     def test_dti_tz_convert_compat_timestamp(self, prefix): | ||||
|         strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] | ||||
|         idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern") | ||||
|  | ||||
|         conv = idx[0].tz_convert(prefix + "US/Pacific") | ||||
|         expected = idx.tz_convert(prefix + "US/Pacific")[0] | ||||
|  | ||||
|         assert conv == expected | ||||
|  | ||||
|     def test_dti_tz_convert_hour_overflow_dst(self): | ||||
|         # Regression test for GH#13306 | ||||
|  | ||||
|         # sorted case US/Eastern -> UTC | ||||
|         ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] | ||||
|         tt = DatetimeIndex(ts).tz_localize("US/Eastern") | ||||
|         ut = tt.tz_convert("UTC") | ||||
|         expected = Index([13, 14, 13], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # sorted case UTC -> US/Eastern | ||||
|         ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] | ||||
|         tt = DatetimeIndex(ts).tz_localize("UTC") | ||||
|         ut = tt.tz_convert("US/Eastern") | ||||
|         expected = Index([9, 9, 9], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # unsorted case US/Eastern -> UTC | ||||
|         ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] | ||||
|         tt = DatetimeIndex(ts).tz_localize("US/Eastern") | ||||
|         ut = tt.tz_convert("UTC") | ||||
|         expected = Index([13, 14, 13], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # unsorted case UTC -> US/Eastern | ||||
|         ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] | ||||
|         tt = DatetimeIndex(ts).tz_localize("UTC") | ||||
|         ut = tt.tz_convert("US/Eastern") | ||||
|         expected = Index([9, 9, 9], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): | ||||
|         # Regression test for GH#13306 | ||||
|  | ||||
|         # sorted case US/Eastern -> UTC | ||||
|         ts = [ | ||||
|             Timestamp("2008-05-12 09:50:00", tz=tz), | ||||
|             Timestamp("2008-12-12 09:50:35", tz=tz), | ||||
|             Timestamp("2009-05-12 09:50:32", tz=tz), | ||||
|         ] | ||||
|         tt = DatetimeIndex(ts) | ||||
|         ut = tt.tz_convert("UTC") | ||||
|         expected = Index([13, 14, 13], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # sorted case UTC -> US/Eastern | ||||
|         ts = [ | ||||
|             Timestamp("2008-05-12 13:50:00", tz="UTC"), | ||||
|             Timestamp("2008-12-12 14:50:35", tz="UTC"), | ||||
|             Timestamp("2009-05-12 13:50:32", tz="UTC"), | ||||
|         ] | ||||
|         tt = DatetimeIndex(ts) | ||||
|         ut = tt.tz_convert("US/Eastern") | ||||
|         expected = Index([9, 9, 9], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # unsorted case US/Eastern -> UTC | ||||
|         ts = [ | ||||
|             Timestamp("2008-05-12 09:50:00", tz=tz), | ||||
|             Timestamp("2008-12-12 09:50:35", tz=tz), | ||||
|             Timestamp("2008-05-12 09:50:32", tz=tz), | ||||
|         ] | ||||
|         tt = DatetimeIndex(ts) | ||||
|         ut = tt.tz_convert("UTC") | ||||
|         expected = Index([13, 14, 13], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|         # unsorted case UTC -> US/Eastern | ||||
|         ts = [ | ||||
|             Timestamp("2008-05-12 13:50:00", tz="UTC"), | ||||
|             Timestamp("2008-12-12 14:50:35", tz="UTC"), | ||||
|             Timestamp("2008-05-12 13:50:32", tz="UTC"), | ||||
|         ] | ||||
|         tt = DatetimeIndex(ts) | ||||
|         ut = tt.tz_convert("US/Eastern") | ||||
|         expected = Index([9, 9, 9], dtype=np.int32) | ||||
|         tm.assert_index_equal(ut.hour, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq, n", [("h", 1), ("min", 60), ("s", 3600)]) | ||||
|     def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): | ||||
|         # Regression test for tslib.tz_convert(vals, tz1, tz2). | ||||
|         # See GH#4496 for details. | ||||
|         idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq) | ||||
|         idx = idx.tz_localize("UTC") | ||||
|         idx = idx.tz_convert("Europe/Moscow") | ||||
|  | ||||
|         expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) | ||||
|         tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) | ||||
|  | ||||
|     def test_dti_tz_convert_dst(self): | ||||
|         for freq, n in [("h", 1), ("min", 60), ("s", 3600)]: | ||||
|             # Start DST | ||||
|             idx = date_range( | ||||
|                 "2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC" | ||||
|             ) | ||||
|             idx = idx.tz_convert("US/Eastern") | ||||
|             expected = np.repeat( | ||||
|                 np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), | ||||
|                 np.array([n, n, n, n, n, n, n, n, n, n, 1]), | ||||
|             ) | ||||
|             tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) | ||||
|  | ||||
|             idx = date_range( | ||||
|                 "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" | ||||
|             ) | ||||
|             idx = idx.tz_convert("UTC") | ||||
|             expected = np.repeat( | ||||
|                 np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), | ||||
|                 np.array([n, n, n, n, n, n, n, n, n, n, 1]), | ||||
|             ) | ||||
|             tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) | ||||
|  | ||||
|             # End DST | ||||
|             idx = date_range( | ||||
|                 "2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC" | ||||
|             ) | ||||
|             idx = idx.tz_convert("US/Eastern") | ||||
|             expected = np.repeat( | ||||
|                 np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), | ||||
|                 np.array([n, n, n, n, n, n, n, n, n, n, 1]), | ||||
|             ) | ||||
|             tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) | ||||
|  | ||||
|             idx = date_range( | ||||
|                 "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" | ||||
|             ) | ||||
|             idx = idx.tz_convert("UTC") | ||||
|             expected = np.repeat( | ||||
|                 np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), | ||||
|                 np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), | ||||
|             ) | ||||
|             tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) | ||||
|  | ||||
|         # daily | ||||
|         # Start DST | ||||
|         idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") | ||||
|         idx = idx.tz_convert("US/Eastern") | ||||
|         tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32)) | ||||
|  | ||||
|         idx = date_range( | ||||
|             "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" | ||||
|         ) | ||||
|         idx = idx.tz_convert("UTC") | ||||
|         tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32)) | ||||
|  | ||||
|         # End DST | ||||
|         idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") | ||||
|         idx = idx.tz_convert("US/Eastern") | ||||
|         tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32)) | ||||
|  | ||||
|         idx = date_range( | ||||
|             "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" | ||||
|         ) | ||||
|         idx = idx.tz_convert("UTC") | ||||
|         tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32)) | ||||
|  | ||||
|     def test_tz_convert_roundtrip(self, tz_aware_fixture): | ||||
|         tz = tz_aware_fixture | ||||
|         idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME", tz="UTC") | ||||
|         exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME") | ||||
|  | ||||
|         idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC") | ||||
|         exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D") | ||||
|  | ||||
|         idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="h", tz="UTC") | ||||
|         exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="h") | ||||
|  | ||||
|         idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="min", tz="UTC") | ||||
|         exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="min") | ||||
|  | ||||
|         for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]: | ||||
|             converted = idx.tz_convert(tz) | ||||
|             reset = converted.tz_convert(None) | ||||
|             tm.assert_index_equal(reset, expected) | ||||
|             assert reset.tzinfo is None | ||||
|             expected = converted.tz_convert("UTC").tz_localize(None) | ||||
|             expected = expected._with_freq("infer") | ||||
|             tm.assert_index_equal(reset, expected) | ||||
|  | ||||
|     def test_dti_tz_convert_tzlocal(self): | ||||
|         # GH#13583 | ||||
|         # tz_convert doesn't affect to internal | ||||
|         dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC") | ||||
|         dti2 = dti.tz_convert(dateutil.tz.tzlocal()) | ||||
|         tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) | ||||
|  | ||||
|         dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) | ||||
|         dti2 = dti.tz_convert(None) | ||||
|         tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", | ||||
|         [ | ||||
|             "US/Eastern", | ||||
|             "dateutil/US/Eastern", | ||||
|             pytz.timezone("US/Eastern"), | ||||
|             gettz("US/Eastern"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_tz_convert_utc_to_local_no_modify(self, tz): | ||||
|         rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc") | ||||
|         rng_eastern = rng.tz_convert(tz) | ||||
|  | ||||
|         # Values are unmodified | ||||
|         tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) | ||||
|  | ||||
|         assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_tz_convert_unsorted(self, tzstr): | ||||
|         dr = date_range("2012-03-09", freq="h", periods=100, tz="utc") | ||||
|         dr = dr.tz_convert(tzstr) | ||||
|  | ||||
|         result = dr[::-1].hour | ||||
|         exp = dr.hour[::-1] | ||||
|         tm.assert_almost_equal(result, exp) | ||||
| @ -0,0 +1,402 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import dateutil.tz | ||||
| from dateutil.tz import gettz | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Timestamp, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     offsets, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| try: | ||||
|     from zoneinfo import ZoneInfo | ||||
| except ImportError: | ||||
|     # Cannot assign to a type  [misc] | ||||
|     ZoneInfo = None  # type: ignore[misc, assignment] | ||||
|  | ||||
|  | ||||
| easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")] | ||||
| if ZoneInfo is not None: | ||||
|     try: | ||||
|         tz = ZoneInfo("US/Eastern") | ||||
|     except KeyError: | ||||
|         # no tzdata | ||||
|         pass | ||||
|     else: | ||||
|         easts.append(tz) | ||||
|  | ||||
|  | ||||
| class TestTZLocalize: | ||||
|     def test_tz_localize_invalidates_freq(self): | ||||
|         # we only preserve freq in unambiguous cases | ||||
|  | ||||
|         # if localized to US/Eastern, this crosses a DST transition | ||||
|         dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="h") | ||||
|         assert dti.freq == "h" | ||||
|  | ||||
|         result = dti.tz_localize(None)  # no-op | ||||
|         assert result.freq == "h" | ||||
|  | ||||
|         result = dti.tz_localize("UTC")  # unambiguous freq preservation | ||||
|         assert result.freq == "h" | ||||
|  | ||||
|         result = dti.tz_localize("US/Eastern", nonexistent="shift_forward") | ||||
|         assert result.freq is None | ||||
|         assert result.inferred_freq is None  # i.e. we are not _too_ strict here | ||||
|  | ||||
|         # Case where we _can_ keep freq because we're length==1 | ||||
|         dti2 = dti[:1] | ||||
|         result = dti2.tz_localize("US/Eastern") | ||||
|         assert result.freq == "h" | ||||
|  | ||||
|     def test_tz_localize_utc_copies(self, utc_fixture): | ||||
|         # GH#46460 | ||||
|         times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] | ||||
|         index = DatetimeIndex(times) | ||||
|  | ||||
|         res = index.tz_localize(utc_fixture) | ||||
|         assert not tm.shares_memory(res, index) | ||||
|  | ||||
|         res2 = index._data.tz_localize(utc_fixture) | ||||
|         assert not tm.shares_memory(index._data, res2) | ||||
|  | ||||
|     def test_dti_tz_localize_nonexistent_raise_coerce(self): | ||||
|         # GH#13057 | ||||
|         times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] | ||||
|         index = DatetimeIndex(times) | ||||
|         tz = "US/Eastern" | ||||
|         with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): | ||||
|             index.tz_localize(tz=tz) | ||||
|  | ||||
|         with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)): | ||||
|             index.tz_localize(tz=tz, nonexistent="raise") | ||||
|  | ||||
|         result = index.tz_localize(tz=tz, nonexistent="NaT") | ||||
|         test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] | ||||
|         dti = to_datetime(test_times, utc=True) | ||||
|         expected = dti.tz_convert("US/Eastern") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_infer(self, tz): | ||||
|         # November 6, 2011, fall back, repeat 2 AM hour | ||||
|         # With no repeated hours, we cannot infer the transition | ||||
|         dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour()) | ||||
|         with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): | ||||
|             dr.tz_localize(tz) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_infer2(self, tz, unit): | ||||
|         # With repeated hours, we can infer the transition | ||||
|         dr = date_range( | ||||
|             datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit | ||||
|         ) | ||||
|         times = [ | ||||
|             "11/06/2011 00:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 02:00", | ||||
|             "11/06/2011 03:00", | ||||
|         ] | ||||
|         di = DatetimeIndex(times).as_unit(unit) | ||||
|         result = di.tz_localize(tz, ambiguous="infer") | ||||
|         expected = dr._with_freq(None) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         result2 = DatetimeIndex(times, tz=tz, ambiguous="infer").as_unit(unit) | ||||
|         tm.assert_index_equal(result2, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_infer3(self, tz): | ||||
|         # When there is no dst transition, nothing special happens | ||||
|         dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour()) | ||||
|         localized = dr.tz_localize(tz) | ||||
|         localized_infer = dr.tz_localize(tz, ambiguous="infer") | ||||
|         tm.assert_index_equal(localized, localized_infer) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_times(self, tz): | ||||
|         # March 13, 2011, spring forward, skip from 2 AM to 3 AM | ||||
|         dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour()) | ||||
|         with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"): | ||||
|             dr.tz_localize(tz) | ||||
|  | ||||
|         # after dst transition, it works | ||||
|         dr = date_range( | ||||
|             datetime(2011, 3, 13, 3, 30), periods=3, freq=offsets.Hour(), tz=tz | ||||
|         ) | ||||
|  | ||||
|         # November 6, 2011, fall back, repeat 2 AM hour | ||||
|         dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour()) | ||||
|         with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): | ||||
|             dr.tz_localize(tz) | ||||
|  | ||||
|         # UTC is OK | ||||
|         dr = date_range( | ||||
|             datetime(2011, 3, 13), periods=48, freq=offsets.Minute(30), tz=pytz.utc | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): | ||||
|         strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] | ||||
|  | ||||
|         idx = DatetimeIndex(strdates) | ||||
|         conv = idx.tz_localize(tzstr) | ||||
|  | ||||
|         fromdates = DatetimeIndex(strdates, tz=tzstr) | ||||
|  | ||||
|         assert conv.tz == fromdates.tz | ||||
|         tm.assert_numpy_array_equal(conv.values, fromdates.values) | ||||
|  | ||||
|     @pytest.mark.parametrize("prefix", ["", "dateutil/"]) | ||||
|     def test_dti_tz_localize(self, prefix): | ||||
|         tzstr = prefix + "US/Eastern" | ||||
|         dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms") | ||||
|         dti2 = dti.tz_localize(tzstr) | ||||
|  | ||||
|         dti_utc = date_range( | ||||
|             start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc" | ||||
|         ) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(dti2.values, dti_utc.values) | ||||
|  | ||||
|         dti3 = dti2.tz_convert(prefix + "US/Pacific") | ||||
|         tm.assert_numpy_array_equal(dti3.values, dti_utc.values) | ||||
|  | ||||
|         dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms") | ||||
|         with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"): | ||||
|             dti.tz_localize(tzstr) | ||||
|  | ||||
|         dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms") | ||||
|         with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"): | ||||
|             dti.tz_localize(tzstr) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", | ||||
|         [ | ||||
|             "US/Eastern", | ||||
|             "dateutil/US/Eastern", | ||||
|             pytz.timezone("US/Eastern"), | ||||
|             gettz("US/Eastern"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_tz_localize_utc_conversion(self, tz): | ||||
|         # Localizing to time zone should: | ||||
|         #  1) check for DST ambiguities | ||||
|         #  2) convert to UTC | ||||
|  | ||||
|         rng = date_range("3/10/2012", "3/11/2012", freq="30min") | ||||
|  | ||||
|         converted = rng.tz_localize(tz) | ||||
|         expected_naive = rng + offsets.Hour(5) | ||||
|         tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) | ||||
|  | ||||
|         # DST ambiguity, this should fail | ||||
|         rng = date_range("3/11/2012", "3/12/2012", freq="30min") | ||||
|         # Is this really how it should fail?? | ||||
|         with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"): | ||||
|             rng.tz_localize(tz) | ||||
|  | ||||
|     def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): | ||||
|         # note: this tz tests that a tz-naive index can be localized | ||||
|         # and de-localized successfully, when there are no DST transitions | ||||
|         # in the range. | ||||
|         idx = date_range(start="2014-06-01", end="2014-08-30", freq="15min") | ||||
|         tz = tz_aware_fixture | ||||
|         localized = idx.tz_localize(tz) | ||||
|         # can't localize a tz-aware object | ||||
|         with pytest.raises( | ||||
|             TypeError, match="Already tz-aware, use tz_convert to convert" | ||||
|         ): | ||||
|             localized.tz_localize(tz) | ||||
|         reset = localized.tz_localize(None) | ||||
|         assert reset.tzinfo is None | ||||
|         expected = idx._with_freq(None) | ||||
|         tm.assert_index_equal(reset, expected) | ||||
|  | ||||
|     def test_dti_tz_localize_naive(self): | ||||
|         rng = date_range("1/1/2011", periods=100, freq="h") | ||||
|  | ||||
|         conv = rng.tz_localize("US/Pacific") | ||||
|         exp = date_range("1/1/2011", periods=100, freq="h", tz="US/Pacific") | ||||
|  | ||||
|         tm.assert_index_equal(conv, exp._with_freq(None)) | ||||
|  | ||||
|     def test_dti_tz_localize_tzlocal(self): | ||||
|         # GH#13583 | ||||
|         offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) | ||||
|         offset = int(offset.total_seconds() * 1000000000) | ||||
|  | ||||
|         dti = date_range(start="2001-01-01", end="2001-03-01") | ||||
|         dti2 = dti.tz_localize(dateutil.tz.tzlocal()) | ||||
|         tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) | ||||
|  | ||||
|         dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) | ||||
|         dti2 = dti.tz_localize(None) | ||||
|         tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_nat(self, tz): | ||||
|         times = [ | ||||
|             "11/06/2011 00:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 02:00", | ||||
|             "11/06/2011 03:00", | ||||
|         ] | ||||
|         di = DatetimeIndex(times) | ||||
|         localized = di.tz_localize(tz, ambiguous="NaT") | ||||
|  | ||||
|         times = [ | ||||
|             "11/06/2011 00:00", | ||||
|             np.nan, | ||||
|             np.nan, | ||||
|             "11/06/2011 02:00", | ||||
|             "11/06/2011 03:00", | ||||
|         ] | ||||
|         di_test = DatetimeIndex(times, tz="US/Eastern") | ||||
|  | ||||
|         # left dtype is datetime64[ns, US/Eastern] | ||||
|         # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] | ||||
|         tm.assert_numpy_array_equal(di_test.values, localized.values) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_flags(self, tz, unit): | ||||
|         # November 6, 2011, fall back, repeat 2 AM hour | ||||
|  | ||||
|         # Pass in flags to determine right dst transition | ||||
|         dr = date_range( | ||||
|             datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit | ||||
|         ) | ||||
|         times = [ | ||||
|             "11/06/2011 00:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 01:00", | ||||
|             "11/06/2011 02:00", | ||||
|             "11/06/2011 03:00", | ||||
|         ] | ||||
|  | ||||
|         # Test tz_localize | ||||
|         di = DatetimeIndex(times).as_unit(unit) | ||||
|         is_dst = [1, 1, 0, 0, 0] | ||||
|         localized = di.tz_localize(tz, ambiguous=is_dst) | ||||
|         expected = dr._with_freq(None) | ||||
|         tm.assert_index_equal(expected, localized) | ||||
|  | ||||
|         result = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) | ||||
|         tm.assert_index_equal(dr, localized) | ||||
|  | ||||
|         localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool")) | ||||
|         tm.assert_index_equal(dr, localized) | ||||
|  | ||||
|         # Test constructor | ||||
|         localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit) | ||||
|         tm.assert_index_equal(dr, localized) | ||||
|  | ||||
|         # Test duplicate times where inferring the dst fails | ||||
|         times += times | ||||
|         di = DatetimeIndex(times).as_unit(unit) | ||||
|  | ||||
|         # When the sizes are incompatible, make sure error is raised | ||||
|         msg = "Length of ambiguous bool-array must be the same size as vals" | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             di.tz_localize(tz, ambiguous=is_dst) | ||||
|  | ||||
|         # When sizes are compatible and there are repeats ('infer' won't work) | ||||
|         is_dst = np.hstack((is_dst, is_dst)) | ||||
|         localized = di.tz_localize(tz, ambiguous=is_dst) | ||||
|         dr = dr.append(dr) | ||||
|         tm.assert_index_equal(dr, localized) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_dti_tz_localize_ambiguous_flags2(self, tz, unit): | ||||
|         # When there is no dst transition, nothing special happens | ||||
|         dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour()) | ||||
|         is_dst = np.array([1] * 10) | ||||
|         localized = dr.tz_localize(tz) | ||||
|         localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) | ||||
|         tm.assert_index_equal(localized, localized_is_dst) | ||||
|  | ||||
|     def test_dti_tz_localize_bdate_range(self): | ||||
|         dr = bdate_range("1/1/2009", "1/1/2010") | ||||
|         dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc) | ||||
|         localized = dr.tz_localize(pytz.utc) | ||||
|         tm.assert_index_equal(dr_utc, localized) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start_ts, tz, end_ts, shift", | ||||
|         [ | ||||
|             ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], | ||||
|             [ | ||||
|                 "2015-03-29 02:20:00", | ||||
|                 "Europe/Warsaw", | ||||
|                 "2015-03-29 01:59:59.999999999", | ||||
|                 "backward", | ||||
|             ], | ||||
|             [ | ||||
|                 "2015-03-29 02:20:00", | ||||
|                 "Europe/Warsaw", | ||||
|                 "2015-03-29 03:20:00", | ||||
|                 timedelta(hours=1), | ||||
|             ], | ||||
|             [ | ||||
|                 "2015-03-29 02:20:00", | ||||
|                 "Europe/Warsaw", | ||||
|                 "2015-03-29 01:20:00", | ||||
|                 timedelta(hours=-1), | ||||
|             ], | ||||
|             ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], | ||||
|             [ | ||||
|                 "2018-03-11 02:33:00", | ||||
|                 "US/Pacific", | ||||
|                 "2018-03-11 01:59:59.999999999", | ||||
|                 "backward", | ||||
|             ], | ||||
|             [ | ||||
|                 "2018-03-11 02:33:00", | ||||
|                 "US/Pacific", | ||||
|                 "2018-03-11 03:33:00", | ||||
|                 timedelta(hours=1), | ||||
|             ], | ||||
|             [ | ||||
|                 "2018-03-11 02:33:00", | ||||
|                 "US/Pacific", | ||||
|                 "2018-03-11 01:33:00", | ||||
|                 timedelta(hours=-1), | ||||
|             ], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) | ||||
|     def test_dti_tz_localize_nonexistent_shift( | ||||
|         self, start_ts, tz, end_ts, shift, tz_type, unit | ||||
|     ): | ||||
|         # GH#8917 | ||||
|         tz = tz_type + tz | ||||
|         if isinstance(shift, str): | ||||
|             shift = "shift_" + shift | ||||
|         dti = DatetimeIndex([Timestamp(start_ts)]).as_unit(unit) | ||||
|         result = dti.tz_localize(tz, nonexistent=shift) | ||||
|         expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz).as_unit(unit) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("offset", [-1, 1]) | ||||
|     def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, warsaw): | ||||
|         # GH#8917 | ||||
|         tz = warsaw | ||||
|         dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")]) | ||||
|         msg = "The provided timedelta will relocalize on a nonexistent time" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) | ||||
| @ -0,0 +1,77 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_unique(tz_naive_fixture): | ||||
|     idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture) | ||||
|     expected = idx[:1] | ||||
|  | ||||
|     result = idx.unique() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     # GH#21737 | ||||
|     # Ensure the underlying data is consistent | ||||
|     assert result[0] == expected[0] | ||||
|  | ||||
|  | ||||
| def test_index_unique(rand_series_with_duplicate_datetimeindex): | ||||
|     dups = rand_series_with_duplicate_datetimeindex | ||||
|     index = dups.index | ||||
|  | ||||
|     uniques = index.unique() | ||||
|     expected = DatetimeIndex( | ||||
|         [ | ||||
|             datetime(2000, 1, 2), | ||||
|             datetime(2000, 1, 3), | ||||
|             datetime(2000, 1, 4), | ||||
|             datetime(2000, 1, 5), | ||||
|         ], | ||||
|         dtype=index.dtype, | ||||
|     ) | ||||
|     assert uniques.dtype == index.dtype  # sanity | ||||
|     tm.assert_index_equal(uniques, expected) | ||||
|     assert index.nunique() == 4 | ||||
|  | ||||
|     # GH#2563 | ||||
|     assert isinstance(uniques, DatetimeIndex) | ||||
|  | ||||
|     dups_local = index.tz_localize("US/Eastern") | ||||
|     dups_local.name = "foo" | ||||
|     result = dups_local.unique() | ||||
|     expected = DatetimeIndex(expected, name="foo") | ||||
|     expected = expected.tz_localize("US/Eastern") | ||||
|     assert result.tz is not None | ||||
|     assert result.name == "foo" | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_index_unique2(): | ||||
|     # NaT, note this is excluded | ||||
|     arr = [1370745748 + t for t in range(20)] + [NaT._value] | ||||
|     idx = DatetimeIndex(arr * 3) | ||||
|     tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) | ||||
|     assert idx.nunique() == 20 | ||||
|     assert idx.nunique(dropna=False) == 21 | ||||
|  | ||||
|  | ||||
| def test_index_unique3(): | ||||
|     arr = [ | ||||
|         Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20) | ||||
|     ] + [NaT] | ||||
|     idx = DatetimeIndex(arr * 3) | ||||
|     tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) | ||||
|     assert idx.nunique() == 20 | ||||
|     assert idx.nunique(dropna=False) == 21 | ||||
|  | ||||
|  | ||||
| def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex): | ||||
|     index = rand_series_with_duplicate_datetimeindex.index | ||||
|     assert not index.is_unique | ||||
| @ -0,0 +1,56 @@ | ||||
| # Arithmetic tests specific to DatetimeIndex are generally about `freq` | ||||
| #  rentention or inference.  Other arithmetic tests belong in | ||||
| #  tests/arithmetic/test_datetime64.py | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Timedelta, | ||||
|     TimedeltaIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexArithmetic: | ||||
|     def test_add_timedelta_preserves_freq(self): | ||||
|         # GH#37295 should hold for any DTI with freq=None or Tick freq | ||||
|         tz = "Canada/Eastern" | ||||
|         dti = date_range( | ||||
|             start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), | ||||
|             end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), | ||||
|             freq="D", | ||||
|         ) | ||||
|         result = dti + Timedelta(days=1) | ||||
|         assert result.freq == dti.freq | ||||
|  | ||||
|     def test_sub_datetime_preserves_freq(self, tz_naive_fixture): | ||||
|         # GH#48818 | ||||
|         dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture) | ||||
|  | ||||
|         res = dti - dti[0] | ||||
|         expected = timedelta_range("0 Days", "11 Days") | ||||
|         tm.assert_index_equal(res, expected) | ||||
|         assert res.freq == expected.freq | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="The inherited freq is incorrect bc dti.freq is incorrect " | ||||
|         "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461" | ||||
|     ) | ||||
|     def test_sub_datetime_preserves_freq_across_dst(self): | ||||
|         # GH#48818 | ||||
|         ts = Timestamp("2016-03-11", tz="US/Pacific") | ||||
|         dti = date_range(ts, periods=4) | ||||
|  | ||||
|         res = dti - dti[0] | ||||
|         expected = TimedeltaIndex( | ||||
|             [ | ||||
|                 Timedelta(days=0), | ||||
|                 Timedelta(days=1), | ||||
|                 Timedelta(days=2), | ||||
|                 Timedelta(days=2, hours=23), | ||||
|             ] | ||||
|         ) | ||||
|         tm.assert_index_equal(res, expected) | ||||
|         assert res.freq == expected.freq | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,216 @@ | ||||
| import datetime as dt | ||||
| from datetime import date | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_long | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     offsets, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndex: | ||||
|     def test_is_(self): | ||||
|         dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") | ||||
|         assert dti.is_(dti) | ||||
|         assert dti.is_(dti.view()) | ||||
|         assert not dti.is_(dti.copy()) | ||||
|  | ||||
|     def test_time_overflow_for_32bit_machines(self): | ||||
|         # GH8943.  On some machines NumPy defaults to np.int32 (for example, | ||||
|         # 32-bit Linux machines).  In the function _generate_regular_range | ||||
|         # found in tseries/index.py, `periods` gets multiplied by `strides` | ||||
|         # (which has value 1e9) and since the max value for np.int32 is ~2e9, | ||||
|         # and since those machines won't promote np.int32 to np.int64, we get | ||||
|         # overflow. | ||||
|         periods = np_long(1000) | ||||
|  | ||||
|         idx1 = date_range(start="2000", periods=periods, freq="s") | ||||
|         assert len(idx1) == periods | ||||
|  | ||||
|         idx2 = date_range(end="2000", periods=periods, freq="s") | ||||
|         assert len(idx2) == periods | ||||
|  | ||||
|     def test_nat(self): | ||||
|         assert DatetimeIndex([np.nan])[0] is pd.NaT | ||||
|  | ||||
|     def test_week_of_month_frequency(self): | ||||
|         # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise | ||||
|         d1 = date(2002, 9, 1) | ||||
|         d2 = date(2013, 10, 27) | ||||
|         d3 = date(2012, 9, 30) | ||||
|         idx1 = DatetimeIndex([d1, d2]) | ||||
|         idx2 = DatetimeIndex([d3]) | ||||
|         result_append = idx1.append(idx2) | ||||
|         expected = DatetimeIndex([d1, d2, d3]) | ||||
|         tm.assert_index_equal(result_append, expected) | ||||
|         result_union = idx1.union(idx2) | ||||
|         expected = DatetimeIndex([d1, d3, d2]) | ||||
|         tm.assert_index_equal(result_union, expected) | ||||
|  | ||||
|     def test_append_nondatetimeindex(self): | ||||
|         rng = date_range("1/1/2000", periods=10) | ||||
|         idx = Index(["a", "b", "c", "d"]) | ||||
|  | ||||
|         result = rng.append(idx) | ||||
|         assert isinstance(result[0], Timestamp) | ||||
|  | ||||
|     def test_misc_coverage(self): | ||||
|         rng = date_range("1/1/2000", periods=5) | ||||
|         result = rng.groupby(rng.day) | ||||
|         assert isinstance(next(iter(result.values()))[0], Timestamp) | ||||
|  | ||||
|     # TODO: belongs in frame groupby tests? | ||||
|     def test_groupby_function_tuple_1677(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).random(100), | ||||
|             index=date_range("1/1/2000", periods=100), | ||||
|         ) | ||||
|         monthly_group = df.groupby(lambda x: (x.year, x.month)) | ||||
|  | ||||
|         result = monthly_group.mean() | ||||
|         assert isinstance(result.index[0], tuple) | ||||
|  | ||||
|     def assert_index_parameters(self, index): | ||||
|         assert index.freq == "40960ns" | ||||
|         assert index.inferred_freq == "40960ns" | ||||
|  | ||||
|     def test_ns_index(self): | ||||
|         nsamples = 400 | ||||
|         ns = int(1e9 / 24414) | ||||
|         dtstart = np.datetime64("2012-09-20T00:00:00") | ||||
|  | ||||
|         dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns") | ||||
|         freq = ns * offsets.Nano() | ||||
|         index = DatetimeIndex(dt, freq=freq, name="time") | ||||
|         self.assert_index_parameters(index) | ||||
|  | ||||
|         new_index = date_range(start=index[0], end=index[-1], freq=index.freq) | ||||
|         self.assert_index_parameters(new_index) | ||||
|  | ||||
|     def test_asarray_tz_naive(self): | ||||
|         # This shouldn't produce a warning. | ||||
|         idx = date_range("2000", periods=2) | ||||
|         # M8[ns] by default | ||||
|         result = np.asarray(idx) | ||||
|  | ||||
|         expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # optionally, object | ||||
|         result = np.asarray(idx, dtype=object) | ||||
|  | ||||
|         expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_asarray_tz_aware(self): | ||||
|         tz = "US/Central" | ||||
|         idx = date_range("2000", periods=2, tz=tz) | ||||
|         expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") | ||||
|         result = np.asarray(idx, dtype="datetime64[ns]") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # Old behavior with no warning | ||||
|         result = np.asarray(idx, dtype="M8[ns]") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # Future behavior with no warning | ||||
|         expected = np.array( | ||||
|             [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)] | ||||
|         ) | ||||
|         result = np.asarray(idx, dtype=object) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_CBH_deprecated(self): | ||||
|         msg = "'CBH' is deprecated and will be removed in a future version." | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             expected = date_range( | ||||
|                 dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH" | ||||
|             ) | ||||
|         result = DatetimeIndex( | ||||
|             [ | ||||
|                 "2022-12-12 09:00:00", | ||||
|                 "2022-12-12 10:00:00", | ||||
|                 "2022-12-12 11:00:00", | ||||
|                 "2022-12-12 12:00:00", | ||||
|                 "2022-12-12 13:00:00", | ||||
|                 "2022-12-12 14:00:00", | ||||
|                 "2022-12-12 15:00:00", | ||||
|                 "2022-12-12 16:00:00", | ||||
|             ], | ||||
|             dtype="datetime64[ns]", | ||||
|             freq="cbh", | ||||
|         ) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq_depr, expected_values, expected_freq", | ||||
|         [ | ||||
|             ( | ||||
|                 "AS-AUG", | ||||
|                 ["2021-08-01", "2022-08-01", "2023-08-01"], | ||||
|                 "YS-AUG", | ||||
|             ), | ||||
|             ( | ||||
|                 "1BAS-MAY", | ||||
|                 ["2021-05-03", "2022-05-02", "2023-05-01"], | ||||
|                 "1BYS-MAY", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq): | ||||
|         # GH#55479 | ||||
|         freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1] | ||||
|         msg = f"'{freq_msg}' is deprecated and will be removed in a future version." | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             expected = date_range( | ||||
|                 dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr | ||||
|             ) | ||||
|         result = DatetimeIndex( | ||||
|             expected_values, | ||||
|             dtype="datetime64[ns]", | ||||
|             freq=expected_freq, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, expected_values, freq_depr", | ||||
|         [ | ||||
|             ("2BYE-MAR", ["2016-03-31"], "2BA-MAR"), | ||||
|             ("2BYE-JUN", ["2016-06-30"], "2BY-JUN"), | ||||
|             ("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"), | ||||
|             ("2BQE", ["2016-03-31"], "2BQ"), | ||||
|             ("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr): | ||||
|         # GH#52064 | ||||
|         msg = f"'{freq_depr[1:]}' is deprecated and will be removed " | ||||
|         f"in a future version, please use '{freq[1:]}' instead." | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr) | ||||
|         result = DatetimeIndex( | ||||
|             data=expected_values, | ||||
|             dtype="datetime64[ns]", | ||||
|             freq=freq, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,356 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import dateutil.tz | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["s", "ms", "us", "ns"]) | ||||
| def unit(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_get_values_for_csv(): | ||||
|     index = pd.date_range(freq="1D", periods=3, start="2017-01-01") | ||||
|  | ||||
|     # First, with no arguments. | ||||
|     expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object) | ||||
|  | ||||
|     result = index._get_values_for_csv() | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # No NaN values, so na_rep has no effect | ||||
|     result = index._get_values_for_csv(na_rep="pandas") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # Make sure date formatting works | ||||
|     expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object) | ||||
|  | ||||
|     result = index._get_values_for_csv(date_format="%m-%Y-%d") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # NULL object handling should work | ||||
|     index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"]) | ||||
|     expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object) | ||||
|  | ||||
|     result = index._get_values_for_csv(na_rep="NaT") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object) | ||||
|  | ||||
|     result = index._get_values_for_csv(na_rep="pandas") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f") | ||||
|     expected = np.array( | ||||
|         ["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"], | ||||
|         dtype=object, | ||||
|     ) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # invalid format | ||||
|     result = index._get_values_for_csv(na_rep="NaT", date_format="foo") | ||||
|     expected = np.array(["foo", "NaT", "foo"], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexRendering: | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_with_timezone_repr(self, tzstr): | ||||
|         rng = pd.date_range("4/13/2010", "5/6/2010") | ||||
|  | ||||
|         rng_eastern = rng.tz_localize(tzstr) | ||||
|  | ||||
|         rng_repr = repr(rng_eastern) | ||||
|         assert "2010-04-13 00:00:00" in rng_repr | ||||
|  | ||||
|     def test_dti_repr_dates(self): | ||||
|         text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)])) | ||||
|         assert "['2013-01-01'," in text | ||||
|         assert ", '2014-01-01']" in text | ||||
|  | ||||
|     def test_dti_repr_mixed(self): | ||||
|         text = str( | ||||
|             pd.to_datetime( | ||||
|                 [datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)] | ||||
|             ) | ||||
|         ) | ||||
|         assert "'2013-01-01 00:00:00'," in text | ||||
|         assert "'2014-01-01 00:00:00']" in text | ||||
|  | ||||
|     def test_dti_repr_short(self): | ||||
|         dr = pd.date_range(start="1/1/2012", periods=1) | ||||
|         repr(dr) | ||||
|  | ||||
|         dr = pd.date_range(start="1/1/2012", periods=2) | ||||
|         repr(dr) | ||||
|  | ||||
|         dr = pd.date_range(start="1/1/2012", periods=3) | ||||
|         repr(dr) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dates, freq, expected_repr", | ||||
|         [ | ||||
|             ( | ||||
|                 ["2012-01-01 00:00:00"], | ||||
|                 "60min", | ||||
|                 ( | ||||
|                     "DatetimeIndex(['2012-01-01 00:00:00'], " | ||||
|                     "dtype='datetime64[ns]', freq='60min')" | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 ["2012-01-01 00:00:00", "2012-01-01 01:00:00"], | ||||
|                 "60min", | ||||
|                 "DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], " | ||||
|                 "dtype='datetime64[ns]', freq='60min')", | ||||
|             ), | ||||
|             ( | ||||
|                 ["2012-01-01"], | ||||
|                 "24h", | ||||
|                 "DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit): | ||||
|         # GH53634 | ||||
|         dti = DatetimeIndex(dates, freq).as_unit(unit) | ||||
|         actual_repr = repr(dti) | ||||
|         assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]") | ||||
|  | ||||
|     def test_dti_representation(self, unit): | ||||
|         idxs = [] | ||||
|         idxs.append(DatetimeIndex([], freq="D")) | ||||
|         idxs.append(DatetimeIndex(["2011-01-01"], freq="D")) | ||||
|         idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")) | ||||
|         idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")) | ||||
|         idxs.append( | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], | ||||
|                 freq="h", | ||||
|                 tz="Asia/Tokyo", | ||||
|             ) | ||||
|         ) | ||||
|         idxs.append( | ||||
|             DatetimeIndex( | ||||
|                 ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern" | ||||
|             ) | ||||
|         ) | ||||
|         idxs.append( | ||||
|             DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC") | ||||
|         ) | ||||
|  | ||||
|         exp = [] | ||||
|         exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')") | ||||
|         exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')") | ||||
|         exp.append( | ||||
|             "DatetimeIndex(['2011-01-01', '2011-01-02'], " | ||||
|             "dtype='datetime64[ns]', freq='D')" | ||||
|         ) | ||||
|         exp.append( | ||||
|             "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " | ||||
|             "dtype='datetime64[ns]', freq='D')" | ||||
|         ) | ||||
|         exp.append( | ||||
|             "DatetimeIndex(['2011-01-01 09:00:00+09:00', " | ||||
|             "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']" | ||||
|             ", dtype='datetime64[ns, Asia/Tokyo]', freq='h')" | ||||
|         ) | ||||
|         exp.append( | ||||
|             "DatetimeIndex(['2011-01-01 09:00:00-05:00', " | ||||
|             "'2011-01-01 10:00:00-05:00', 'NaT'], " | ||||
|             "dtype='datetime64[ns, US/Eastern]', freq=None)" | ||||
|         ) | ||||
|         exp.append( | ||||
|             "DatetimeIndex(['2011-01-01 09:00:00+00:00', " | ||||
|             "'2011-01-01 10:00:00+00:00', 'NaT'], " | ||||
|             "dtype='datetime64[ns, UTC]', freq=None)" | ||||
|             "" | ||||
|         ) | ||||
|  | ||||
|         with pd.option_context("display.width", 300): | ||||
|             for index, expected in zip(idxs, exp): | ||||
|                 index = index.as_unit(unit) | ||||
|                 expected = expected.replace("[ns", f"[{unit}") | ||||
|                 result = repr(index) | ||||
|                 assert result == expected | ||||
|                 result = str(index) | ||||
|                 assert result == expected | ||||
|  | ||||
|     # TODO: this is a Series.__repr__ test | ||||
|     def test_dti_representation_to_series(self, unit): | ||||
|         idx1 = DatetimeIndex([], freq="D") | ||||
|         idx2 = DatetimeIndex(["2011-01-01"], freq="D") | ||||
|         idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") | ||||
|         idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") | ||||
|         idx5 = DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], | ||||
|             freq="h", | ||||
|             tz="Asia/Tokyo", | ||||
|         ) | ||||
|         idx6 = DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern" | ||||
|         ) | ||||
|         idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"]) | ||||
|  | ||||
|         exp1 = """Series([], dtype: datetime64[ns])""" | ||||
|  | ||||
|         exp2 = "0   2011-01-01\ndtype: datetime64[ns]" | ||||
|  | ||||
|         exp3 = "0   2011-01-01\n1   2011-01-02\ndtype: datetime64[ns]" | ||||
|  | ||||
|         exp4 = ( | ||||
|             "0   2011-01-01\n" | ||||
|             "1   2011-01-02\n" | ||||
|             "2   2011-01-03\n" | ||||
|             "dtype: datetime64[ns]" | ||||
|         ) | ||||
|  | ||||
|         exp5 = ( | ||||
|             "0   2011-01-01 09:00:00+09:00\n" | ||||
|             "1   2011-01-01 10:00:00+09:00\n" | ||||
|             "2   2011-01-01 11:00:00+09:00\n" | ||||
|             "dtype: datetime64[ns, Asia/Tokyo]" | ||||
|         ) | ||||
|  | ||||
|         exp6 = ( | ||||
|             "0   2011-01-01 09:00:00-05:00\n" | ||||
|             "1   2011-01-01 10:00:00-05:00\n" | ||||
|             "2                         NaT\n" | ||||
|             "dtype: datetime64[ns, US/Eastern]" | ||||
|         ) | ||||
|  | ||||
|         exp7 = ( | ||||
|             "0   2011-01-01 09:00:00\n" | ||||
|             "1   2011-01-02 10:15:00\n" | ||||
|             "dtype: datetime64[ns]" | ||||
|         ) | ||||
|  | ||||
|         with pd.option_context("display.width", 300): | ||||
|             for idx, expected in zip( | ||||
|                 [idx1, idx2, idx3, idx4, idx5, idx6, idx7], | ||||
|                 [exp1, exp2, exp3, exp4, exp5, exp6, exp7], | ||||
|             ): | ||||
|                 ser = Series(idx.as_unit(unit)) | ||||
|                 result = repr(ser) | ||||
|                 assert result == expected.replace("[ns", f"[{unit}") | ||||
|  | ||||
|     def test_dti_summary(self): | ||||
|         # GH#9116 | ||||
|         idx1 = DatetimeIndex([], freq="D") | ||||
|         idx2 = DatetimeIndex(["2011-01-01"], freq="D") | ||||
|         idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") | ||||
|         idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") | ||||
|         idx5 = DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], | ||||
|             freq="h", | ||||
|             tz="Asia/Tokyo", | ||||
|         ) | ||||
|         idx6 = DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern" | ||||
|         ) | ||||
|  | ||||
|         exp1 = "DatetimeIndex: 0 entries\nFreq: D" | ||||
|  | ||||
|         exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D" | ||||
|  | ||||
|         exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D" | ||||
|  | ||||
|         exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D" | ||||
|  | ||||
|         exp5 = ( | ||||
|             "DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 " | ||||
|             "to 2011-01-01 11:00:00+09:00\n" | ||||
|             "Freq: h" | ||||
|         ) | ||||
|  | ||||
|         exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT""" | ||||
|  | ||||
|         for idx, expected in zip( | ||||
|             [idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6] | ||||
|         ): | ||||
|             result = idx._summary() | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()]) | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_dti_business_repr_etc_smoke(self, tz, freq): | ||||
|         # only really care that it works | ||||
|         dti = pd.bdate_range( | ||||
|             datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq | ||||
|         ) | ||||
|         repr(dti) | ||||
|         dti._summary() | ||||
|         dti[2:2]._summary() | ||||
|  | ||||
|  | ||||
| class TestFormat: | ||||
|     def test_format(self): | ||||
|         # GH#35439 | ||||
|         idx = pd.date_range("20130101", periods=5) | ||||
|         expected = [f"{x:%Y-%m-%d}" for x in idx] | ||||
|         msg = r"DatetimeIndex\.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert idx.format() == expected | ||||
|  | ||||
|     def test_format_with_name_time_info(self): | ||||
|         # bug I fixed 12/20/2011 | ||||
|         dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something") | ||||
|  | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = dates.format(name=True) | ||||
|         assert formatted[0] == "something" | ||||
|  | ||||
|     def test_format_datetime_with_time(self): | ||||
|         dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) | ||||
|  | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = dti.format() | ||||
|         expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"] | ||||
|         assert len(result) == 2 | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_format_datetime(self): | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format() | ||||
|         assert formatted[0] == "2003-01-01 12:00:00" | ||||
|         assert formatted[1] == "NaT" | ||||
|  | ||||
|     def test_format_date(self): | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format() | ||||
|         assert formatted[0] == "2003-01-01" | ||||
|         assert formatted[1] == "NaT" | ||||
|  | ||||
|     def test_format_date_tz(self): | ||||
|         dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True) | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = dti.format() | ||||
|         assert formatted[0] == "2013-01-01 00:00:00+00:00" | ||||
|  | ||||
|         dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = dti.format() | ||||
|         assert formatted[0] == "2013-01-01 00:00:00+00:00" | ||||
|  | ||||
|     def test_format_date_explicit_date_format(self): | ||||
|         dti = pd.to_datetime([datetime(2003, 2, 1), NaT]) | ||||
|         msg = "DatetimeIndex.format is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT") | ||||
|         assert formatted[0] == "02-01-2003" | ||||
|         assert formatted[1] == "UT" | ||||
| @ -0,0 +1,61 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
| from pandas.tseries.offsets import ( | ||||
|     BDay, | ||||
|     DateOffset, | ||||
|     Day, | ||||
|     Hour, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestFreq: | ||||
|     def test_freq_setter_errors(self): | ||||
|         # GH#20678 | ||||
|         idx = DatetimeIndex(["20180101", "20180103", "20180105"]) | ||||
|  | ||||
|         # setting with an incompatible freq | ||||
|         msg = ( | ||||
|             "Inferred frequency 2D from passed values does not conform to " | ||||
|             "passed frequency 5D" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx._data.freq = "5D" | ||||
|  | ||||
|         # setting with non-freq string | ||||
|         with pytest.raises(ValueError, match="Invalid frequency"): | ||||
|             idx._data.freq = "foo" | ||||
|  | ||||
|     @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) | ||||
|     @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)]) | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     def test_freq_setter(self, values, freq, tz): | ||||
|         # GH#20678 | ||||
|         idx = DatetimeIndex(values, tz=tz) | ||||
|  | ||||
|         # can set to an offset, converting from string if necessary | ||||
|         idx._data.freq = freq | ||||
|         assert idx.freq == freq | ||||
|         assert isinstance(idx.freq, DateOffset) | ||||
|  | ||||
|         # can reset to None | ||||
|         idx._data.freq = None | ||||
|         assert idx.freq is None | ||||
|  | ||||
|     def test_freq_view_safe(self): | ||||
|         # Setting the freq for one DatetimeIndex shouldn't alter the freq | ||||
|         #  for another that views the same data | ||||
|  | ||||
|         dti = date_range("2016-01-01", periods=5) | ||||
|         dta = dti._data | ||||
|  | ||||
|         dti2 = DatetimeIndex(dta)._with_freq(None) | ||||
|         assert dti2.freq is None | ||||
|  | ||||
|         # Original was not altered | ||||
|         assert dti.freq == "D" | ||||
|         assert dta.freq == "D" | ||||
| @ -0,0 +1,717 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
|     time, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
| from pandas.compat.numpy import np_long | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Timestamp, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     notna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.tseries.frequencies import to_offset | ||||
|  | ||||
| START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|  | ||||
|  | ||||
| class TestGetItem: | ||||
|     def test_getitem_slice_keeps_name(self): | ||||
|         # GH4226 | ||||
|         st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles") | ||||
|         et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles") | ||||
|         dr = date_range(st, et, freq="h", name="timebucket") | ||||
|         assert dr[1:].name == dr.name | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) | ||||
|     def test_getitem(self, tz): | ||||
|         idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx") | ||||
|  | ||||
|         result = idx[0] | ||||
|         assert result == Timestamp("2011-01-01", tz=idx.tz) | ||||
|  | ||||
|         result = idx[0:5] | ||||
|         expected = date_range( | ||||
|             "2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx[0:10:2] | ||||
|         expected = date_range( | ||||
|             "2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx[-20:-5:3] | ||||
|         expected = date_range( | ||||
|             "2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx[4::-1] | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], | ||||
|             dtype=idx.dtype, | ||||
|             freq="-1D", | ||||
|             name="idx", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_dti_business_getitem(self, freq): | ||||
|         rng = bdate_range(START, END, freq=freq) | ||||
|         smaller = rng[:5] | ||||
|         exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq) | ||||
|         tm.assert_index_equal(smaller, exp) | ||||
|         assert smaller.freq == exp.freq | ||||
|         assert smaller.freq == rng.freq | ||||
|  | ||||
|         sliced = rng[::5] | ||||
|         assert sliced.freq == to_offset(freq) * 5 | ||||
|  | ||||
|         fancy_indexed = rng[[4, 3, 2, 1, 0]] | ||||
|         assert len(fancy_indexed) == 5 | ||||
|         assert isinstance(fancy_indexed, DatetimeIndex) | ||||
|         assert fancy_indexed.freq is None | ||||
|  | ||||
|         # 32-bit vs. 64-bit platforms | ||||
|         assert rng[4] == rng[np_long(4)] | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_dti_business_getitem_matplotlib_hackaround(self, freq): | ||||
|         rng = bdate_range(START, END, freq=freq) | ||||
|         with pytest.raises(ValueError, match="Multi-dimensional indexing"): | ||||
|             # GH#30588 multi-dimensional indexing deprecated | ||||
|             rng[:, None] | ||||
|  | ||||
|     def test_getitem_int_list(self): | ||||
|         dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME") | ||||
|         dti2 = dti[[1, 3, 5]] | ||||
|  | ||||
|         v1 = dti2[0] | ||||
|         v2 = dti2[1] | ||||
|         v3 = dti2[2] | ||||
|  | ||||
|         assert v1 == Timestamp("2/28/2005") | ||||
|         assert v2 == Timestamp("4/30/2005") | ||||
|         assert v3 == Timestamp("6/30/2005") | ||||
|  | ||||
|         # getitem with non-slice drops freq | ||||
|         assert dti2.freq is None | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where_doesnt_retain_freq(self): | ||||
|         dti = date_range("20130101", periods=3, freq="D", name="idx") | ||||
|         cond = [True, True, False] | ||||
|         expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx") | ||||
|  | ||||
|         result = dti.where(cond, dti[::-1]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_other(self): | ||||
|         # other is ndarray or Index | ||||
|         i = date_range("20130101", periods=3, tz="US/Eastern") | ||||
|  | ||||
|         for arr in [np.nan, pd.NaT]: | ||||
|             result = i.where(notna(i), other=arr) | ||||
|             expected = i | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         i2 = i.copy() | ||||
|         i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) | ||||
|         result = i.where(notna(i2), i2) | ||||
|         tm.assert_index_equal(result, i2) | ||||
|  | ||||
|         i2 = i.copy() | ||||
|         i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) | ||||
|         result = i.where(notna(i2), i2._values) | ||||
|         tm.assert_index_equal(result, i2) | ||||
|  | ||||
|     def test_where_invalid_dtypes(self): | ||||
|         dti = date_range("20130101", periods=3, tz="US/Eastern") | ||||
|  | ||||
|         tail = dti[2:].tolist() | ||||
|         i2 = Index([pd.NaT, pd.NaT] + tail) | ||||
|  | ||||
|         mask = notna(i2) | ||||
|  | ||||
|         # passing tz-naive ndarray to tzaware DTI | ||||
|         result = dti.where(mask, i2.values) | ||||
|         expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # passing tz-aware DTI to tznaive DTI | ||||
|         naive = dti.tz_localize(None) | ||||
|         result = naive.where(mask, i2) | ||||
|         expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         pi = i2.tz_localize(None).to_period("D") | ||||
|         result = dti.where(mask, pi) | ||||
|         expected = Index([pi[0], pi[1]] + tail, dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         tda = i2.asi8.view("timedelta64[ns]") | ||||
|         result = dti.where(mask, tda) | ||||
|         expected = Index([tda[0], tda[1]] + tail, dtype=object) | ||||
|         assert isinstance(expected[0], np.timedelta64) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = dti.where(mask, i2.asi8) | ||||
|         expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object) | ||||
|         assert isinstance(expected[0], int) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # non-matching scalar | ||||
|         td = pd.Timedelta(days=4) | ||||
|         result = dti.where(mask, td) | ||||
|         expected = Index([td, td] + tail, dtype=object) | ||||
|         assert expected[0] is td | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_mismatched_nat(self, tz_aware_fixture): | ||||
|         tz = tz_aware_fixture | ||||
|         dti = date_range("2013-01-01", periods=3, tz=tz) | ||||
|         cond = np.array([True, False, True]) | ||||
|  | ||||
|         tdnat = np.timedelta64("NaT", "ns") | ||||
|         expected = Index([dti[0], tdnat, dti[2]], dtype=object) | ||||
|         assert expected[1] is tdnat | ||||
|  | ||||
|         result = dti.where(cond, tdnat) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_where_tz(self): | ||||
|         i = date_range("20130101", periods=3, tz="US/Eastern") | ||||
|         result = i.where(notna(i)) | ||||
|         expected = i | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         i2 = i.copy() | ||||
|         i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) | ||||
|         result = i.where(notna(i2)) | ||||
|         expected = i2 | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_take_dont_lose_meta(self, tzstr): | ||||
|         rng = date_range("1/1/2000", periods=20, tz=tzstr) | ||||
|  | ||||
|         result = rng.take(range(5)) | ||||
|         assert result.tz == rng.tz | ||||
|         assert result.freq == rng.freq | ||||
|  | ||||
|     def test_take_nan_first_datetime(self): | ||||
|         index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")]) | ||||
|         result = index.take([-1, 0, 1]) | ||||
|         expected = DatetimeIndex([index[-1], index[0], index[1]]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) | ||||
|     def test_take(self, tz): | ||||
|         # GH#10295 | ||||
|         idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz) | ||||
|  | ||||
|         result = idx.take([0]) | ||||
|         assert result == Timestamp("2011-01-01", tz=idx.tz) | ||||
|  | ||||
|         result = idx.take([0, 1, 2]) | ||||
|         expected = date_range( | ||||
|             "2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx.take([0, 2, 4]) | ||||
|         expected = date_range( | ||||
|             "2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx.take([7, 4, 1]) | ||||
|         expected = date_range( | ||||
|             "2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq == expected.freq | ||||
|  | ||||
|         result = idx.take([3, 2, 5]) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-01-04", "2011-01-03", "2011-01-06"], | ||||
|             dtype=idx.dtype, | ||||
|             freq=None, | ||||
|             name="idx", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq is None | ||||
|  | ||||
|         result = idx.take([-3, 2, 5]) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-01-29", "2011-01-03", "2011-01-06"], | ||||
|             dtype=idx.dtype, | ||||
|             freq=None, | ||||
|             name="idx", | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq is None | ||||
|  | ||||
|     def test_take_invalid_kwargs(self): | ||||
|         idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx") | ||||
|         indices = [1, 6, 5, 9, 10, 13, 15, 3] | ||||
|  | ||||
|         msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             idx.take(indices, foo=2) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, out=indices) | ||||
|  | ||||
|         msg = "the 'mode' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(indices, mode="clip") | ||||
|  | ||||
|     # TODO: This method came from test_datetime; de-dup with version above | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"]) | ||||
|     def test_take2(self, tz): | ||||
|         dates = [ | ||||
|             datetime(2010, 1, 1, 14), | ||||
|             datetime(2010, 1, 1, 15), | ||||
|             datetime(2010, 1, 1, 17), | ||||
|             datetime(2010, 1, 1, 21), | ||||
|         ] | ||||
|  | ||||
|         idx = date_range( | ||||
|             start="2010-01-01 09:00", | ||||
|             end="2010-02-01 09:00", | ||||
|             freq="h", | ||||
|             tz=tz, | ||||
|             name="idx", | ||||
|         ) | ||||
|         expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype) | ||||
|  | ||||
|         taken1 = idx.take([5, 6, 8, 12]) | ||||
|         taken2 = idx[[5, 6, 8, 12]] | ||||
|  | ||||
|         for taken in [taken1, taken2]: | ||||
|             tm.assert_index_equal(taken, expected) | ||||
|             assert isinstance(taken, DatetimeIndex) | ||||
|             assert taken.freq is None | ||||
|             assert taken.tz == expected.tz | ||||
|             assert taken.name == expected.name | ||||
|  | ||||
|     def test_take_fill_value(self): | ||||
|         # GH#12631 | ||||
|         idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "out of bounds" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|     def test_take_fill_value_with_timezone(self): | ||||
|         idx = DatetimeIndex( | ||||
|             ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern" | ||||
|         ) | ||||
|         result = idx.take(np.array([1, 0, -1])) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = idx.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = DatetimeIndex( | ||||
|             ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         msg = ( | ||||
|             "When allow_fill=True and fill_value is not None, " | ||||
|             "all indices must be >= -1" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -2]), fill_value=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.take(np.array([1, 0, -5]), fill_value=True) | ||||
|  | ||||
|         msg = "out of bounds" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             idx.take(np.array([1, -5])) | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     def test_get_loc_key_unit_mismatch(self): | ||||
|         idx = date_range("2000-01-01", periods=3) | ||||
|         key = idx[1].as_unit("ms") | ||||
|         loc = idx.get_loc(key) | ||||
|         assert loc == 1 | ||||
|         assert key in idx | ||||
|  | ||||
|     def test_get_loc_key_unit_mismatch_not_castable(self): | ||||
|         dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]") | ||||
|         dti = DatetimeIndex(dta) | ||||
|         key = dta[0].as_unit("ns") + pd.Timedelta(1) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)" | ||||
|         ): | ||||
|             dti.get_loc(key) | ||||
|  | ||||
|         assert key not in dti | ||||
|  | ||||
|     def test_get_loc_time_obj(self): | ||||
|         # time indexing | ||||
|         idx = date_range("2000-01-01", periods=24, freq="h") | ||||
|  | ||||
|         result = idx.get_loc(time(12)) | ||||
|         expected = np.array([12]) | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|         result = idx.get_loc(time(12, 30)) | ||||
|         expected = np.array([]) | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|     @pytest.mark.parametrize("offset", [-10, 10]) | ||||
|     def test_get_loc_time_obj2(self, monkeypatch, offset): | ||||
|         # GH#8667 | ||||
|         size_cutoff = 50 | ||||
|         n = size_cutoff + offset | ||||
|         key = time(15, 11, 30) | ||||
|         start = key.hour * 3600 + key.minute * 60 + key.second | ||||
|         step = 24 * 3600 | ||||
|  | ||||
|         with monkeypatch.context(): | ||||
|             monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|             idx = date_range("2014-11-26", periods=n, freq="s") | ||||
|             ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx) | ||||
|             locs = np.arange(start, n, step, dtype=np.intp) | ||||
|  | ||||
|             result = ts.index.get_loc(key) | ||||
|             tm.assert_numpy_array_equal(result, locs) | ||||
|             tm.assert_series_equal(ts[key], ts.iloc[locs]) | ||||
|  | ||||
|             left, right = ts.copy(), ts.copy() | ||||
|             left[key] *= -10 | ||||
|             right.iloc[locs] *= -10 | ||||
|             tm.assert_series_equal(left, right) | ||||
|  | ||||
|     def test_get_loc_time_nat(self): | ||||
|         # GH#35114 | ||||
|         # Case where key's total microseconds happens to match iNaT % 1e6 // 1000 | ||||
|         tic = time(minute=12, second=43, microsecond=145224) | ||||
|         dti = DatetimeIndex([pd.NaT]) | ||||
|  | ||||
|         loc = dti.get_loc(tic) | ||||
|         expected = np.array([], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(loc, expected) | ||||
|  | ||||
|     def test_get_loc_nat(self): | ||||
|         # GH#20464 | ||||
|         index = DatetimeIndex(["1/3/2000", "NaT"]) | ||||
|         assert index.get_loc(pd.NaT) == 1 | ||||
|  | ||||
|         assert index.get_loc(None) == 1 | ||||
|  | ||||
|         assert index.get_loc(np.nan) == 1 | ||||
|  | ||||
|         assert index.get_loc(pd.NA) == 1 | ||||
|  | ||||
|         assert index.get_loc(np.datetime64("NaT")) == 1 | ||||
|  | ||||
|         with pytest.raises(KeyError, match="NaT"): | ||||
|             index.get_loc(np.timedelta64("NaT")) | ||||
|  | ||||
|     @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) | ||||
|     def test_get_loc_timedelta_invalid_key(self, key): | ||||
|         # GH#20464 | ||||
|         dti = date_range("1970-01-01", periods=10) | ||||
|         msg = "Cannot index DatetimeIndex with [Tt]imedelta" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             dti.get_loc(key) | ||||
|  | ||||
|     def test_get_loc_reasonable_key_error(self): | ||||
|         # GH#1062 | ||||
|         index = DatetimeIndex(["1/3/2000"]) | ||||
|         with pytest.raises(KeyError, match="2000"): | ||||
|             index.get_loc("1/1/2000") | ||||
|  | ||||
|     def test_get_loc_year_str(self): | ||||
|         rng = date_range("1/1/2000", "1/1/2010") | ||||
|  | ||||
|         result = rng.get_loc("2009") | ||||
|         expected = slice(3288, 3653) | ||||
|         assert result == expected | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     def test_dti_contains_with_duplicates(self): | ||||
|         d = datetime(2011, 12, 5, 20, 30) | ||||
|         ix = DatetimeIndex([d, d]) | ||||
|         assert d in ix | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "vals", | ||||
|         [ | ||||
|             [0, 1, 0], | ||||
|             [0, 0, -1], | ||||
|             [0, -1, -1], | ||||
|             ["2015", "2015", "2016"], | ||||
|             ["2015", "2015", "2014"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_contains_nonunique(self, vals): | ||||
|         # GH#9512 | ||||
|         idx = DatetimeIndex(vals) | ||||
|         assert idx[0] in idx | ||||
|  | ||||
|  | ||||
| class TestGetIndexer: | ||||
|     def test_get_indexer_date_objs(self): | ||||
|         rng = date_range("1/1/2000", periods=20) | ||||
|  | ||||
|         result = rng.get_indexer(rng.map(lambda x: x.date())) | ||||
|         expected = rng.get_indexer(rng) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer(self): | ||||
|         idx = date_range("2000-01-01", periods=3) | ||||
|         exp = np.array([0, 1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) | ||||
|  | ||||
|         target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")), | ||||
|             np.array([0, -1, 1], dtype=np.intp), | ||||
|         ) | ||||
|         tol_raw = [ | ||||
|             pd.Timedelta("1 hour"), | ||||
|             pd.Timedelta("1 hour"), | ||||
|             pd.Timedelta("1 hour").to_timedelta64(), | ||||
|         ] | ||||
|         tm.assert_numpy_array_equal( | ||||
|             idx.get_indexer( | ||||
|                 target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw] | ||||
|             ), | ||||
|             np.array([0, -1, 1], dtype=np.intp), | ||||
|         ) | ||||
|         tol_bad = [ | ||||
|             pd.Timedelta("2 hour").to_timedelta64(), | ||||
|             pd.Timedelta("1 hour").to_timedelta64(), | ||||
|             "foo", | ||||
|         ] | ||||
|         msg = "Could not convert 'foo' to NumPy timedelta" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             idx.get_indexer(target, "nearest", tolerance=tol_bad) | ||||
|         with pytest.raises(ValueError, match="abbreviation w/o a number"): | ||||
|             idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "target", | ||||
|         [ | ||||
|             [date(2020, 1, 1), Timestamp("2020-01-02")], | ||||
|             [Timestamp("2020-01-01"), date(2020, 1, 2)], | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_mixed_dtypes(self, target): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33741 | ||||
|         values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) | ||||
|         result = values.get_indexer(target) | ||||
|         expected = np.array([0, 1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "target, positions", | ||||
|         [ | ||||
|             ([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]), | ||||
|             ([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]), | ||||
|             ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_out_of_bounds_date(self, target, positions): | ||||
|         values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) | ||||
|  | ||||
|         result = values.get_indexer(target) | ||||
|         expected = np.array(positions, dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_pad_requires_monotonicity(self): | ||||
|         rng = date_range("1/1/2000", "3/1/2000", freq="B") | ||||
|  | ||||
|         # neither monotonic increasing or decreasing | ||||
|         rng2 = rng[[1, 0, 2]] | ||||
|  | ||||
|         msg = "index must be monotonic increasing or decreasing" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             rng2.get_indexer(rng, method="pad") | ||||
|  | ||||
|  | ||||
| class TestMaybeCastSliceBound: | ||||
|     def test_maybe_cast_slice_bounds_empty(self): | ||||
|         # GH#14354 | ||||
|         empty_idx = date_range(freq="1h", periods=0, end="2015") | ||||
|  | ||||
|         right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right") | ||||
|         exp = Timestamp("2015-01-02 23:59:59.999999999") | ||||
|         assert right == exp | ||||
|  | ||||
|         left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left") | ||||
|         exp = Timestamp("2015-01-02 00:00:00") | ||||
|         assert left == exp | ||||
|  | ||||
|     def test_maybe_cast_slice_duplicate_monotonic(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16515 | ||||
|         idx = DatetimeIndex(["2017", "2017"]) | ||||
|         result = idx._maybe_cast_slice_bound("2017-01-01", "left") | ||||
|         expected = Timestamp("2017-01-01") | ||||
|         assert result == expected | ||||
|  | ||||
|  | ||||
| class TestGetSliceBounds: | ||||
|     @pytest.mark.parametrize("box", [date, datetime, Timestamp]) | ||||
|     @pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)]) | ||||
|     def test_get_slice_bounds_datetime_within( | ||||
|         self, box, side, expected, tz_aware_fixture | ||||
|     ): | ||||
|         # GH 35690 | ||||
|         tz = tz_aware_fixture | ||||
|         index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) | ||||
|         key = box(year=2000, month=1, day=7) | ||||
|  | ||||
|         if tz is not None: | ||||
|             with pytest.raises(TypeError, match="Cannot compare tz-naive"): | ||||
|                 # GH#36148 we require tzawareness-compat as of 2.0 | ||||
|                 index.get_slice_bound(key, side=side) | ||||
|         else: | ||||
|             result = index.get_slice_bound(key, side=side) | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [datetime, Timestamp]) | ||||
|     @pytest.mark.parametrize("side", ["left", "right"]) | ||||
|     @pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)]) | ||||
|     def test_get_slice_bounds_datetime_outside( | ||||
|         self, box, side, year, expected, tz_aware_fixture | ||||
|     ): | ||||
|         # GH 35690 | ||||
|         tz = tz_aware_fixture | ||||
|         index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz) | ||||
|         key = box(year=year, month=1, day=7) | ||||
|  | ||||
|         if tz is not None: | ||||
|             with pytest.raises(TypeError, match="Cannot compare tz-naive"): | ||||
|                 # GH#36148 we require tzawareness-compat as of 2.0 | ||||
|                 index.get_slice_bound(key, side=side) | ||||
|         else: | ||||
|             result = index.get_slice_bound(key, side=side) | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [datetime, Timestamp]) | ||||
|     def test_slice_datetime_locs(self, box, tz_aware_fixture): | ||||
|         # GH 34077 | ||||
|         tz = tz_aware_fixture | ||||
|         index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz) | ||||
|         key = box(2010, 1, 1) | ||||
|  | ||||
|         if tz is not None: | ||||
|             with pytest.raises(TypeError, match="Cannot compare tz-naive"): | ||||
|                 # GH#36148 we require tzawareness-compat as of 2.0 | ||||
|                 index.slice_locs(key, box(2010, 1, 2)) | ||||
|         else: | ||||
|             result = index.slice_locs(key, box(2010, 1, 2)) | ||||
|             expected = (0, 1) | ||||
|             assert result == expected | ||||
|  | ||||
|  | ||||
| class TestIndexerBetweenTime: | ||||
|     def test_indexer_between_time(self): | ||||
|         # GH#11818 | ||||
|         rng = date_range("1/1/2000", "1/5/2000", freq="5min") | ||||
|         msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) | ||||
|  | ||||
|     @pytest.mark.parametrize("unit", ["us", "ms", "s"]) | ||||
|     def test_indexer_between_time_non_nano(self, unit): | ||||
|         # For simple cases like this, the non-nano indexer_between_time | ||||
|         #  should match the nano result | ||||
|  | ||||
|         rng = date_range("1/1/2000", "1/5/2000", freq="5min") | ||||
|         arr_nano = rng._data._ndarray | ||||
|  | ||||
|         arr = arr_nano.astype(f"M8[{unit}]") | ||||
|  | ||||
|         dta = type(rng._data)._simple_new(arr, dtype=arr.dtype) | ||||
|         dti = DatetimeIndex(dta) | ||||
|         assert dti.dtype == arr.dtype | ||||
|  | ||||
|         tic = time(1, 25) | ||||
|         toc = time(2, 29) | ||||
|  | ||||
|         result = dti.indexer_between_time(tic, toc) | ||||
|         expected = rng.indexer_between_time(tic, toc) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # case with non-zero micros in arguments | ||||
|         tic = time(1, 25, 0, 45678) | ||||
|         toc = time(2, 29, 0, 1234) | ||||
|  | ||||
|         result = dti.indexer_between_time(tic, toc) | ||||
|         expected = rng.indexer_between_time(tic, toc) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,76 @@ | ||||
| import dateutil.tz | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| from pandas.core.arrays import datetimes | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexIteration: | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)] | ||||
|     ) | ||||
|     def test_iteration_preserves_nanoseconds(self, tz): | ||||
|         # GH#19603 | ||||
|         index = DatetimeIndex( | ||||
|             ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz | ||||
|         ) | ||||
|         for i, ts in enumerate(index): | ||||
|             assert ts == index[i]  # pylint: disable=unnecessary-list-index-lookup | ||||
|  | ||||
|     def test_iter_readonly(self): | ||||
|         # GH#28055 ints_to_pydatetime with readonly array | ||||
|         arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")]) | ||||
|         arr.setflags(write=False) | ||||
|         dti = to_datetime(arr) | ||||
|         list(dti) | ||||
|  | ||||
|     def test_iteration_preserves_tz(self): | ||||
|         # see GH#8890 | ||||
|         index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern") | ||||
|  | ||||
|         for i, ts in enumerate(index): | ||||
|             result = ts | ||||
|             expected = index[i]  # pylint: disable=unnecessary-list-index-lookup | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_iteration_preserves_tz2(self): | ||||
|         index = date_range( | ||||
|             "2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800) | ||||
|         ) | ||||
|  | ||||
|         for i, ts in enumerate(index): | ||||
|             result = ts | ||||
|             expected = index[i]  # pylint: disable=unnecessary-list-index-lookup | ||||
|             assert result._repr_base == expected._repr_base | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_iteration_preserves_tz3(self): | ||||
|         # GH#9100 | ||||
|         index = DatetimeIndex( | ||||
|             ["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"] | ||||
|         ) | ||||
|         for i, ts in enumerate(index): | ||||
|             result = ts | ||||
|             expected = index[i]  # pylint: disable=unnecessary-list-index-lookup | ||||
|             assert result._repr_base == expected._repr_base | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("offset", [-5, -1, 0, 1]) | ||||
|     def test_iteration_over_chunksize(self, offset, monkeypatch): | ||||
|         # GH#21012 | ||||
|         chunksize = 5 | ||||
|         index = date_range( | ||||
|             "2000-01-01 00:00:00", periods=chunksize - offset, freq="min" | ||||
|         ) | ||||
|         num = 0 | ||||
|         with monkeypatch.context() as m: | ||||
|             m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize) | ||||
|             for stamp in index: | ||||
|                 assert index[num] == stamp | ||||
|                 num += 1 | ||||
|         assert num == len(index) | ||||
| @ -0,0 +1,153 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timezone, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.tseries.offsets import ( | ||||
|     BDay, | ||||
|     BMonthEnd, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestJoin: | ||||
|     def test_does_not_convert_mixed_integer(self): | ||||
|         df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2)) | ||||
|         cols = df.columns.join(df.index, how="outer") | ||||
|         joined = cols.join(df.columns) | ||||
|         assert cols.dtype == np.dtype("O") | ||||
|         assert cols.dtype == joined.dtype | ||||
|         tm.assert_numpy_array_equal(cols.values, joined.values) | ||||
|  | ||||
|     def test_join_self(self, join_type): | ||||
|         index = date_range("1/1/2000", periods=10) | ||||
|         joined = index.join(index, how=join_type) | ||||
|         assert index is joined | ||||
|  | ||||
|     def test_join_with_period_index(self, join_type): | ||||
|         df = DataFrame( | ||||
|             np.ones((10, 2)), | ||||
|             index=date_range("2020-01-01", periods=10), | ||||
|             columns=period_range("2020-01-01", periods=2), | ||||
|         ) | ||||
|         s = df.iloc[:5, 0] | ||||
|  | ||||
|         expected = df.columns.astype("O").join(s.index, how=join_type) | ||||
|         result = df.columns.join(s.index, how=join_type) | ||||
|         tm.assert_index_equal(expected, result) | ||||
|  | ||||
|     def test_join_object_index(self): | ||||
|         rng = date_range("1/1/2000", periods=10) | ||||
|         idx = Index(["a", "b", "c", "d"]) | ||||
|  | ||||
|         result = rng.join(idx, how="outer") | ||||
|         assert isinstance(result[0], Timestamp) | ||||
|  | ||||
|     def test_join_utc_convert(self, join_type): | ||||
|         rng = date_range("1/1/2011", periods=100, freq="h", tz="utc") | ||||
|  | ||||
|         left = rng.tz_convert("US/Eastern") | ||||
|         right = rng.tz_convert("Europe/Berlin") | ||||
|  | ||||
|         result = left.join(left[:-5], how=join_type) | ||||
|         assert isinstance(result, DatetimeIndex) | ||||
|         assert result.tz == left.tz | ||||
|  | ||||
|         result = left.join(right[:-5], how=join_type) | ||||
|         assert isinstance(result, DatetimeIndex) | ||||
|         assert result.tz is timezone.utc | ||||
|  | ||||
|     def test_datetimeindex_union_join_empty(self, sort, using_infer_string): | ||||
|         dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") | ||||
|         empty = Index([]) | ||||
|  | ||||
|         result = dti.union(empty, sort=sort) | ||||
|         if using_infer_string: | ||||
|             assert isinstance(result, DatetimeIndex) | ||||
|             tm.assert_index_equal(result, dti) | ||||
|         else: | ||||
|             expected = dti.astype("O") | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = dti.join(empty) | ||||
|         assert isinstance(result, DatetimeIndex) | ||||
|         tm.assert_index_equal(result, dti) | ||||
|  | ||||
|     def test_join_nonunique(self): | ||||
|         idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) | ||||
|         idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) | ||||
|         rs = idx1.join(idx2, how="outer") | ||||
|         assert rs.is_monotonic_increasing | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_outer_join(self, freq): | ||||
|         # should just behave as union | ||||
|         start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|         rng = date_range(start=start, end=end, freq=freq) | ||||
|  | ||||
|         # overlapping | ||||
|         left = rng[:10] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_join = left.join(right, how="outer") | ||||
|         assert isinstance(the_join, DatetimeIndex) | ||||
|  | ||||
|         # non-overlapping, gap in middle | ||||
|         left = rng[:5] | ||||
|         right = rng[10:] | ||||
|  | ||||
|         the_join = left.join(right, how="outer") | ||||
|         assert isinstance(the_join, DatetimeIndex) | ||||
|         assert the_join.freq is None | ||||
|  | ||||
|         # non-overlapping, no gap | ||||
|         left = rng[:5] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_join = left.join(right, how="outer") | ||||
|         assert isinstance(the_join, DatetimeIndex) | ||||
|  | ||||
|         # overlapping, but different offset | ||||
|         other = date_range(start, end, freq=BMonthEnd()) | ||||
|  | ||||
|         the_join = rng.join(other, how="outer") | ||||
|         assert isinstance(the_join, DatetimeIndex) | ||||
|         assert the_join.freq is None | ||||
|  | ||||
|     def test_naive_aware_conflicts(self): | ||||
|         start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|         naive = date_range(start, end, freq=BDay(), tz=None) | ||||
|         aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") | ||||
|  | ||||
|         msg = "tz-naive.*tz-aware" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             naive.join(aware) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             aware.join(naive) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Pacific"]) | ||||
|     def test_join_preserves_freq(self, tz): | ||||
|         # GH#32157 | ||||
|         dti = date_range("2016-01-01", periods=10, tz=tz) | ||||
|         result = dti[:5].join(dti[5:], how="outer") | ||||
|         assert result.freq == dti.freq | ||||
|         tm.assert_index_equal(result, dti) | ||||
|  | ||||
|         result = dti[:5].join(dti[6:], how="outer") | ||||
|         assert result.freq is None | ||||
|         expected = dti.delete(5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,13 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import date_range | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSplit: | ||||
|     def test_split_non_utc(self): | ||||
|         # GH#14042 | ||||
|         indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10) | ||||
|         result = np.split(indices, indices_or_sections=[])[0] | ||||
|         expected = indices._with_freq(None) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,56 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexOps: | ||||
|     def test_infer_freq(self, freq_sample): | ||||
|         # GH 11018 | ||||
|         idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) | ||||
|         result = DatetimeIndex(idx.asi8, freq="infer") | ||||
|         tm.assert_index_equal(idx, result) | ||||
|         assert result.freq == freq_sample | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
| class TestBusinessDatetimeIndex: | ||||
|     @pytest.fixture | ||||
|     def rng(self, freq): | ||||
|         START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|         return bdate_range(START, END, freq=freq) | ||||
|  | ||||
|     def test_comparison(self, rng): | ||||
|         d = rng[10] | ||||
|  | ||||
|         comp = rng > d | ||||
|         assert comp[11] | ||||
|         assert not comp[9] | ||||
|  | ||||
|     def test_copy(self, rng): | ||||
|         cp = rng.copy() | ||||
|         tm.assert_index_equal(cp, rng) | ||||
|  | ||||
|     def test_identical(self, rng): | ||||
|         t1 = rng.copy() | ||||
|         t2 = rng.copy() | ||||
|         assert t1.identical(t2) | ||||
|  | ||||
|         # name | ||||
|         t1 = t1.rename("foo") | ||||
|         assert t1.equals(t2) | ||||
|         assert not t1.identical(t2) | ||||
|         t2 = t2.rename("foo") | ||||
|         assert t1.identical(t2) | ||||
|  | ||||
|         # freq | ||||
|         t2v = Index(t2.values) | ||||
|         assert t1.equals(t2v) | ||||
|         assert not t1.identical(t2v) | ||||
| @ -0,0 +1,466 @@ | ||||
| """ test partial slicing on Series/Frame """ | ||||
|  | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSlicing: | ||||
|     def test_string_index_series_name_converted(self): | ||||
|         # GH#1644 | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             index=date_range("1/1/2000", periods=10), | ||||
|         ) | ||||
|  | ||||
|         result = df.loc["1/3/2000"] | ||||
|         assert result.name == df.index[2] | ||||
|  | ||||
|         result = df.T["1/3/2000"] | ||||
|         assert result.name == df.index[2] | ||||
|  | ||||
|     def test_stringified_slice_with_tz(self): | ||||
|         # GH#2658 | ||||
|         start = "2013-01-07" | ||||
|         idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern") | ||||
|         df = DataFrame(np.arange(10), index=idx) | ||||
|         df["2013-01-14 23:44:34.437768-05:00":]  # no exception here | ||||
|  | ||||
|     def test_return_type_doesnt_depend_on_monotonicity(self): | ||||
|         # GH#24892 we get Series back regardless of whether our DTI is monotonic | ||||
|         dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) | ||||
|         ser = Series(range(3), index=dti) | ||||
|  | ||||
|         # non-monotonic index | ||||
|         ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) | ||||
|  | ||||
|         # key with resolution strictly lower than "min" | ||||
|         key = "2015-5-14 00" | ||||
|  | ||||
|         # monotonic increasing index | ||||
|         result = ser.loc[key] | ||||
|         expected = ser.iloc[1:] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # monotonic decreasing index | ||||
|         result = ser.iloc[::-1].loc[key] | ||||
|         expected = ser.iloc[::-1][:-1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # non-monotonic index | ||||
|         result2 = ser2.loc[key] | ||||
|         expected2 = ser2.iloc[::2] | ||||
|         tm.assert_series_equal(result2, expected2) | ||||
|  | ||||
|     def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self): | ||||
|         # GH#24892 we get Series back regardless of whether our DTI is monotonic | ||||
|         dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3) | ||||
|         ser = Series(range(3), index=dti) | ||||
|  | ||||
|         # non-monotonic index | ||||
|         ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]]) | ||||
|  | ||||
|         # key with resolution strictly *higher) than "min" | ||||
|         key = "2015-5-14 00:00:00" | ||||
|  | ||||
|         # monotonic increasing index | ||||
|         result = ser.loc[key] | ||||
|         assert result == 1 | ||||
|  | ||||
|         # monotonic decreasing index | ||||
|         result = ser.iloc[::-1].loc[key] | ||||
|         assert result == 1 | ||||
|  | ||||
|         # non-monotonic index | ||||
|         result2 = ser2.loc[key] | ||||
|         assert result2 == 0 | ||||
|  | ||||
|     def test_monotone_DTI_indexing_bug(self): | ||||
|         # GH 19362 | ||||
|         # Testing accessing the first element in a monotonic descending | ||||
|         # partial string indexing. | ||||
|  | ||||
|         df = DataFrame(list(range(5))) | ||||
|         date_list = [ | ||||
|             "2018-01-02", | ||||
|             "2017-02-10", | ||||
|             "2016-03-10", | ||||
|             "2015-03-15", | ||||
|             "2014-03-16", | ||||
|         ] | ||||
|         date_index = DatetimeIndex(date_list) | ||||
|         df["date"] = date_index | ||||
|         expected = DataFrame({0: list(range(5)), "date": date_index}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # We get a slice because df.index's resolution is hourly and we | ||||
|         #  are slicing with a daily-resolution string.  If both were daily, | ||||
|         #  we would get a single item back | ||||
|         dti = date_range("20170101 01:00:00", periods=3) | ||||
|         df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1]) | ||||
|  | ||||
|         expected = DataFrame({"A": 1}, index=dti[-1:][::-1]) | ||||
|         result = df.loc["2017-01-03"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result2 = df.iloc[::-1].loc["2017-01-03"] | ||||
|         expected2 = expected.iloc[::-1] | ||||
|         tm.assert_frame_equal(result2, expected2) | ||||
|  | ||||
|     def test_slice_year(self): | ||||
|         dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500) | ||||
|  | ||||
|         s = Series(np.arange(len(dti)), index=dti) | ||||
|         result = s["2005"] | ||||
|         expected = s[s.index.year == 2005] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) | ||||
|         result = df.loc["2005"] | ||||
|         expected = df[df.index.year == 2005] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "partial_dtime", | ||||
|         [ | ||||
|             "2019", | ||||
|             "2019Q4", | ||||
|             "Dec 2019", | ||||
|             "2019-12-31", | ||||
|             "2019-12-31 23", | ||||
|             "2019-12-31 23:59", | ||||
|         ], | ||||
|     ) | ||||
|     def test_slice_end_of_period_resolution(self, partial_dtime): | ||||
|         # GH#31064 | ||||
|         dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s") | ||||
|  | ||||
|         ser = Series(range(10), index=dti) | ||||
|         result = ser[partial_dtime] | ||||
|         expected = ser.iloc[:5] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_slice_quarter(self): | ||||
|         dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500) | ||||
|  | ||||
|         s = Series(np.arange(len(dti)), index=dti) | ||||
|         assert len(s["2001Q1"]) == 90 | ||||
|  | ||||
|         df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) | ||||
|         assert len(df.loc["1Q01"]) == 90 | ||||
|  | ||||
|     def test_slice_month(self): | ||||
|         dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) | ||||
|         s = Series(np.arange(len(dti)), index=dti) | ||||
|         assert len(s["2005-11"]) == 30 | ||||
|  | ||||
|         df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) | ||||
|         assert len(df.loc["2005-11"]) == 30 | ||||
|  | ||||
|         tm.assert_series_equal(s["2005-11"], s["11-2005"]) | ||||
|  | ||||
|     def test_partial_slice(self): | ||||
|         rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) | ||||
|         s = Series(np.arange(len(rng)), index=rng) | ||||
|  | ||||
|         result = s["2005-05":"2006-02"] | ||||
|         expected = s["20050501":"20060228"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s["2005-05":] | ||||
|         expected = s["20050501":] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s[:"2006-02"] | ||||
|         expected = s[:"20060228"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s["2005-1-1"] | ||||
|         assert result == s.iloc[0] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^'2004-12-31'$"): | ||||
|             s["2004-12-31"] | ||||
|  | ||||
|     def test_partial_slice_daily(self): | ||||
|         rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500) | ||||
|         s = Series(np.arange(len(rng)), index=rng) | ||||
|  | ||||
|         result = s["2005-1-31"] | ||||
|         tm.assert_series_equal(result, s.iloc[:24]) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"): | ||||
|             s["2004-12-31 00"] | ||||
|  | ||||
|     def test_partial_slice_hourly(self): | ||||
|         rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500) | ||||
|         s = Series(np.arange(len(rng)), index=rng) | ||||
|  | ||||
|         result = s["2005-1-1"] | ||||
|         tm.assert_series_equal(result, s.iloc[: 60 * 4]) | ||||
|  | ||||
|         result = s["2005-1-1 20"] | ||||
|         tm.assert_series_equal(result, s.iloc[:60]) | ||||
|  | ||||
|         assert s["2005-1-1 20:00"] == s.iloc[0] | ||||
|         with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"): | ||||
|             s["2004-12-31 00:15"] | ||||
|  | ||||
|     def test_partial_slice_minutely(self): | ||||
|         rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500) | ||||
|         s = Series(np.arange(len(rng)), index=rng) | ||||
|  | ||||
|         result = s["2005-1-1 23:59"] | ||||
|         tm.assert_series_equal(result, s.iloc[:60]) | ||||
|  | ||||
|         result = s["2005-1-1"] | ||||
|         tm.assert_series_equal(result, s.iloc[:60]) | ||||
|  | ||||
|         assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0] | ||||
|         with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"): | ||||
|             s["2004-12-31 00:00:00"] | ||||
|  | ||||
|     def test_partial_slice_second_precision(self): | ||||
|         rng = date_range( | ||||
|             start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), | ||||
|             periods=20, | ||||
|             freq="us", | ||||
|         ) | ||||
|         s = Series(np.arange(20), rng) | ||||
|  | ||||
|         tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10]) | ||||
|         tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10]) | ||||
|  | ||||
|         tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:]) | ||||
|         tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:]) | ||||
|  | ||||
|         assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0] | ||||
|         with pytest.raises(KeyError, match="2005-1-1 00:00:00"): | ||||
|             s["2005-1-1 00:00:00"] | ||||
|  | ||||
|     def test_partial_slicing_dataframe(self): | ||||
|         # GH14856 | ||||
|         # Test various combinations of string slicing resolution vs. | ||||
|         # index resolution | ||||
|         # - If string resolution is less precise than index resolution, | ||||
|         # string is considered a slice | ||||
|         # - If string resolution is equal to or more precise than index | ||||
|         # resolution, string is considered an exact match | ||||
|         formats = [ | ||||
|             "%Y", | ||||
|             "%Y-%m", | ||||
|             "%Y-%m-%d", | ||||
|             "%Y-%m-%d %H", | ||||
|             "%Y-%m-%d %H:%M", | ||||
|             "%Y-%m-%d %H:%M:%S", | ||||
|         ] | ||||
|         resolutions = ["year", "month", "day", "hour", "minute", "second"] | ||||
|         for rnum, resolution in enumerate(resolutions[2:], 2): | ||||
|             # we check only 'day', 'hour', 'minute' and 'second' | ||||
|             unit = Timedelta("1 " + resolution) | ||||
|             middate = datetime(2012, 1, 1, 0, 0, 0) | ||||
|             index = DatetimeIndex([middate - unit, middate, middate + unit]) | ||||
|             values = [1, 2, 3] | ||||
|             df = DataFrame({"a": values}, index, dtype=np.int64) | ||||
|             assert df.index.resolution == resolution | ||||
|  | ||||
|             # Timestamp with the same resolution as index | ||||
|             # Should be exact match for Series (return scalar) | ||||
|             # and raise KeyError for Frame | ||||
|             for timestamp, expected in zip(index, values): | ||||
|                 ts_string = timestamp.strftime(formats[rnum]) | ||||
|                 # make ts_string as precise as index | ||||
|                 result = df["a"][ts_string] | ||||
|                 assert isinstance(result, np.int64) | ||||
|                 assert result == expected | ||||
|                 msg = rf"^'{ts_string}'$" | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     df[ts_string] | ||||
|  | ||||
|             # Timestamp with resolution less precise than index | ||||
|             for fmt in formats[:rnum]: | ||||
|                 for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: | ||||
|                     ts_string = index[element].strftime(fmt) | ||||
|  | ||||
|                     # Series should return slice | ||||
|                     result = df["a"][ts_string] | ||||
|                     expected = df["a"][theslice] | ||||
|                     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|                     # pre-2.0 df[ts_string] was overloaded to interpret this | ||||
|                     #  as slicing along index | ||||
|                     with pytest.raises(KeyError, match=ts_string): | ||||
|                         df[ts_string] | ||||
|  | ||||
|             # Timestamp with resolution more precise than index | ||||
|             # Compatible with existing key | ||||
|             # Should return scalar for Series | ||||
|             # and raise KeyError for Frame | ||||
|             for fmt in formats[rnum + 1 :]: | ||||
|                 ts_string = index[1].strftime(fmt) | ||||
|                 result = df["a"][ts_string] | ||||
|                 assert isinstance(result, np.int64) | ||||
|                 assert result == 2 | ||||
|                 msg = rf"^'{ts_string}'$" | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     df[ts_string] | ||||
|  | ||||
|             # Not compatible with existing key | ||||
|             # Should raise KeyError | ||||
|             for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]: | ||||
|                 ts = index[1] + Timedelta("1 " + res) | ||||
|                 ts_string = ts.strftime(fmt) | ||||
|                 msg = rf"^'{ts_string}'$" | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     df["a"][ts_string] | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     df[ts_string] | ||||
|  | ||||
|     def test_partial_slicing_with_multiindex(self): | ||||
|         # GH 4758 | ||||
|         # partial string indexing with a multi-index buggy | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], | ||||
|                 "TICKER": ["ABC", "MNP", "XYZ", "XYZ"], | ||||
|                 "val": [1, 2, 3, 4], | ||||
|             }, | ||||
|             index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"), | ||||
|         ) | ||||
|         df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True) | ||||
|  | ||||
|         expected = DataFrame( | ||||
|             [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"] | ||||
|         ) | ||||
|         result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = df_multi.loc[ | ||||
|             (Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC") | ||||
|         ] | ||||
|         result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # partial string indexing on first level, scalar indexing on the other two | ||||
|         result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")] | ||||
|         expected = df_multi.iloc[:1].droplevel([1, 2]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_partial_slicing_with_multiindex_series(self): | ||||
|         # GH 4294 | ||||
|         # partial slice on a series mi | ||||
|         ser = Series( | ||||
|             range(250), | ||||
|             index=MultiIndex.from_product( | ||||
|                 [date_range("2000-1-1", periods=50), range(5)] | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         s2 = ser[:-1].copy() | ||||
|         expected = s2["2000-1-4"] | ||||
|         result = s2[Timestamp("2000-1-4")] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser[Timestamp("2000-1-4")] | ||||
|         expected = ser["2000-1-4"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         df2 = DataFrame(ser) | ||||
|         expected = df2.xs("2000-1-4") | ||||
|         result = df2.loc[Timestamp("2000-1-4")] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_partial_slice_requires_monotonicity(self): | ||||
|         # Disallowed since 2.0 (GH 37819) | ||||
|         ser = Series(np.arange(10), date_range("2014-01-01", periods=10)) | ||||
|  | ||||
|         nonmonotonic = ser.iloc[[3, 5, 4]] | ||||
|         timestamp = Timestamp("2014-01-10") | ||||
|         with pytest.raises( | ||||
|             KeyError, match="Value based partial slicing on non-monotonic" | ||||
|         ): | ||||
|             nonmonotonic["2014-01-10":] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): | ||||
|             nonmonotonic[timestamp:] | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="Value based partial slicing on non-monotonic" | ||||
|         ): | ||||
|             nonmonotonic.loc["2014-01-10":] | ||||
|  | ||||
|         with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): | ||||
|             nonmonotonic.loc[timestamp:] | ||||
|  | ||||
|     def test_loc_datetime_length_one(self): | ||||
|         # GH16071 | ||||
|         df = DataFrame( | ||||
|             columns=["1"], | ||||
|             index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"), | ||||
|         ) | ||||
|         result = df.loc[datetime(2016, 10, 1) :] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.loc["2016-10-01T00:00:00":] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start", | ||||
|         [ | ||||
|             "2018-12-02 21:50:00+00:00", | ||||
|             Timestamp("2018-12-02 21:50:00+00:00"), | ||||
|             Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "end", | ||||
|         [ | ||||
|             "2018-12-02 21:52:00+00:00", | ||||
|             Timestamp("2018-12-02 21:52:00+00:00"), | ||||
|             Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(), | ||||
|         ], | ||||
|     ) | ||||
|     def test_getitem_with_datestring_with_UTC_offset(self, start, end): | ||||
|         # GH 24076 | ||||
|         idx = date_range( | ||||
|             start="2018-12-02 14:50:00-07:00", | ||||
|             end="2018-12-02 14:50:00-07:00", | ||||
|             freq="1min", | ||||
|         ) | ||||
|         df = DataFrame(1, index=idx, columns=["A"]) | ||||
|         result = df[start:end] | ||||
|         expected = df.iloc[0:3, :] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # GH 16785 | ||||
|         start = str(start) | ||||
|         end = str(end) | ||||
|         with pytest.raises(ValueError, match="Both dates must"): | ||||
|             df[start : end[:-4] + "1:00"] | ||||
|  | ||||
|         with pytest.raises(ValueError, match="The index must be timezone"): | ||||
|             df = df.tz_localize(None) | ||||
|             df[start:end] | ||||
|  | ||||
|     def test_slice_reduce_to_series(self): | ||||
|         # GH 27516 | ||||
|         df = DataFrame( | ||||
|             {"A": range(24)}, index=date_range("2000", periods=24, freq="ME") | ||||
|         ) | ||||
|         expected = Series( | ||||
|             range(12), index=date_range("2000", periods=12, freq="ME"), name="A" | ||||
|         ) | ||||
|         result = df.loc["2000", "A"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,45 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NaT, | ||||
|     date_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestPickle: | ||||
|     def test_pickle(self): | ||||
|         # GH#4606 | ||||
|         idx = to_datetime(["2013-01-01", NaT, "2014-01-06"]) | ||||
|         idx_p = tm.round_trip_pickle(idx) | ||||
|         assert idx_p[0] == idx[0] | ||||
|         assert idx_p[1] is NaT | ||||
|         assert idx_p[2] == idx[2] | ||||
|  | ||||
|     def test_pickle_dont_infer_freq(self): | ||||
|         # GH#11002 | ||||
|         # don't infer freq | ||||
|         idx = date_range("1750-1-1", "2050-1-1", freq="7D") | ||||
|         idx_p = tm.round_trip_pickle(idx) | ||||
|         tm.assert_index_equal(idx, idx_p) | ||||
|  | ||||
|     def test_pickle_after_set_freq(self): | ||||
|         dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo") | ||||
|         dti = dti._with_freq(None) | ||||
|  | ||||
|         res = tm.round_trip_pickle(dti) | ||||
|         tm.assert_index_equal(res, dti) | ||||
|  | ||||
|     def test_roundtrip_pickle_with_tz(self): | ||||
|         # GH#8367 | ||||
|         # round-trip of timezone | ||||
|         index = date_range("20130101", periods=3, tz="US/Eastern", name="foo") | ||||
|         unpickled = tm.round_trip_pickle(index) | ||||
|         tm.assert_index_equal(index, unpickled) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", ["B", "C"]) | ||||
|     def test_pickle_unpickle(self, freq): | ||||
|         rng = date_range("2009-01-01", "2010-01-01", freq=freq) | ||||
|         unpickled = tm.round_trip_pickle(rng) | ||||
|         assert unpickled.freq == freq | ||||
| @ -0,0 +1,56 @@ | ||||
| from datetime import timedelta | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexReindex: | ||||
|     def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): | ||||
|         # GH#7774 | ||||
|         index = date_range("2013-01-01", periods=3, tz="US/Eastern") | ||||
|         assert str(index.reindex([])[0].tz) == "US/Eastern" | ||||
|         assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern" | ||||
|  | ||||
|     def test_reindex_with_same_tz_nearest(self): | ||||
|         # GH#32740 | ||||
|         rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc") | ||||
|         rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc") | ||||
|         result1, result2 = rng_a.reindex( | ||||
|             rng_b, method="nearest", tolerance=timedelta(seconds=20) | ||||
|         ) | ||||
|         expected_list1 = [ | ||||
|             "2010-01-01 00:00:00", | ||||
|             "2010-01-01 01:05:27.272727272", | ||||
|             "2010-01-01 02:10:54.545454545", | ||||
|             "2010-01-01 03:16:21.818181818", | ||||
|             "2010-01-01 04:21:49.090909090", | ||||
|             "2010-01-01 05:27:16.363636363", | ||||
|             "2010-01-01 06:32:43.636363636", | ||||
|             "2010-01-01 07:38:10.909090909", | ||||
|             "2010-01-01 08:43:38.181818181", | ||||
|             "2010-01-01 09:49:05.454545454", | ||||
|             "2010-01-01 10:54:32.727272727", | ||||
|             "2010-01-01 12:00:00", | ||||
|             "2010-01-01 13:05:27.272727272", | ||||
|             "2010-01-01 14:10:54.545454545", | ||||
|             "2010-01-01 15:16:21.818181818", | ||||
|             "2010-01-01 16:21:49.090909090", | ||||
|             "2010-01-01 17:27:16.363636363", | ||||
|             "2010-01-01 18:32:43.636363636", | ||||
|             "2010-01-01 19:38:10.909090909", | ||||
|             "2010-01-01 20:43:38.181818181", | ||||
|             "2010-01-01 21:49:05.454545454", | ||||
|             "2010-01-01 22:54:32.727272727", | ||||
|             "2010-01-02 00:00:00", | ||||
|         ] | ||||
|         expected1 = DatetimeIndex( | ||||
|             expected_list1, dtype="datetime64[ns, UTC]", freq=None | ||||
|         ) | ||||
|         expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp")) | ||||
|         tm.assert_index_equal(result1, expected1) | ||||
|         tm.assert_numpy_array_equal(result2, expected2) | ||||
| @ -0,0 +1,329 @@ | ||||
| """ | ||||
| Tests for DatetimeIndex methods behaving like their Timestamp counterparts | ||||
| """ | ||||
|  | ||||
| import calendar | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
|     time, | ||||
| ) | ||||
| import locale | ||||
| import unicodedata | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import timezones | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     NaT, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     offsets, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import DatetimeArray | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexOps: | ||||
|     def test_dti_no_millisecond_field(self): | ||||
|         msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             DatetimeIndex.millisecond | ||||
|  | ||||
|         msg = "'DatetimeIndex' object has no attribute 'millisecond'" | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             DatetimeIndex([]).millisecond | ||||
|  | ||||
|     def test_dti_time(self): | ||||
|         rng = date_range("1/1/2000", freq="12min", periods=10) | ||||
|         result = Index(rng).time | ||||
|         expected = [t.time() for t in rng] | ||||
|         assert (result == expected).all() | ||||
|  | ||||
|     def test_dti_date(self): | ||||
|         rng = date_range("1/1/2000", freq="12h", periods=10) | ||||
|         result = Index(rng).date | ||||
|         expected = [t.date() for t in rng] | ||||
|         assert (result == expected).all() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], | ||||
|     ) | ||||
|     def test_dti_date2(self, dtype): | ||||
|         # Regression test for GH#21230 | ||||
|         expected = np.array([date(2018, 6, 4), NaT]) | ||||
|  | ||||
|         index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype) | ||||
|         result = index.date | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], | ||||
|     ) | ||||
|     def test_dti_time2(self, dtype): | ||||
|         # Regression test for GH#21267 | ||||
|         expected = np.array([time(10, 20, 30), NaT]) | ||||
|  | ||||
|         index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype) | ||||
|         result = index.time | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_dti_timetz(self, tz_naive_fixture): | ||||
|         # GH#21358 | ||||
|         tz = timezones.maybe_get_tz(tz_naive_fixture) | ||||
|  | ||||
|         expected = np.array([time(10, 20, 30, tzinfo=tz), NaT]) | ||||
|  | ||||
|         index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz) | ||||
|         result = index.timetz | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "field", | ||||
|         [ | ||||
|             "dayofweek", | ||||
|             "day_of_week", | ||||
|             "dayofyear", | ||||
|             "day_of_year", | ||||
|             "quarter", | ||||
|             "days_in_month", | ||||
|             "is_month_start", | ||||
|             "is_month_end", | ||||
|             "is_quarter_start", | ||||
|             "is_quarter_end", | ||||
|             "is_year_start", | ||||
|             "is_year_end", | ||||
|         ], | ||||
|     ) | ||||
|     def test_dti_timestamp_fields(self, field): | ||||
|         # extra fields from DatetimeIndex like quarter and week | ||||
|         idx = date_range("2020-01-01", periods=10) | ||||
|         expected = getattr(idx, field)[-1] | ||||
|  | ||||
|         result = getattr(Timestamp(idx[-1]), field) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_dti_nanosecond(self): | ||||
|         dti = DatetimeIndex(np.arange(10)) | ||||
|         expected = Index(np.arange(10, dtype=np.int32)) | ||||
|  | ||||
|         tm.assert_index_equal(dti.nanosecond, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("prefix", ["", "dateutil/"]) | ||||
|     def test_dti_hour_tzaware(self, prefix): | ||||
|         strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] | ||||
|         rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern") | ||||
|         assert (rng.hour == 0).all() | ||||
|  | ||||
|         # a more unusual time zone, GH#1946 | ||||
|         dr = date_range( | ||||
|             "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" | ||||
|         ) | ||||
|  | ||||
|         expected = Index(np.arange(10, dtype=np.int32)) | ||||
|         tm.assert_index_equal(dr.hour, expected) | ||||
|  | ||||
|     # GH#12806 | ||||
|     # error: Unsupported operand types for + ("List[None]" and "List[str]") | ||||
|     @pytest.mark.parametrize( | ||||
|         "time_locale", [None] + tm.get_locales()  # type: ignore[operator] | ||||
|     ) | ||||
|     def test_day_name_month_name(self, time_locale): | ||||
|         # Test Monday -> Sunday and January -> December, in that sequence | ||||
|         if time_locale is None: | ||||
|             # If the time_locale is None, day-name and month_name should | ||||
|             # return the english attributes | ||||
|             expected_days = [ | ||||
|                 "Monday", | ||||
|                 "Tuesday", | ||||
|                 "Wednesday", | ||||
|                 "Thursday", | ||||
|                 "Friday", | ||||
|                 "Saturday", | ||||
|                 "Sunday", | ||||
|             ] | ||||
|             expected_months = [ | ||||
|                 "January", | ||||
|                 "February", | ||||
|                 "March", | ||||
|                 "April", | ||||
|                 "May", | ||||
|                 "June", | ||||
|                 "July", | ||||
|                 "August", | ||||
|                 "September", | ||||
|                 "October", | ||||
|                 "November", | ||||
|                 "December", | ||||
|             ] | ||||
|         else: | ||||
|             with tm.set_locale(time_locale, locale.LC_TIME): | ||||
|                 expected_days = calendar.day_name[:] | ||||
|                 expected_months = calendar.month_name[1:] | ||||
|  | ||||
|         # GH#11128 | ||||
|         dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365) | ||||
|         english_days = [ | ||||
|             "Monday", | ||||
|             "Tuesday", | ||||
|             "Wednesday", | ||||
|             "Thursday", | ||||
|             "Friday", | ||||
|             "Saturday", | ||||
|             "Sunday", | ||||
|         ] | ||||
|         for day, name, eng_name in zip(range(4, 11), expected_days, english_days): | ||||
|             name = name.capitalize() | ||||
|             assert dti.day_name(locale=time_locale)[day] == name | ||||
|             assert dti.day_name(locale=None)[day] == eng_name | ||||
|             ts = Timestamp(datetime(2016, 4, day)) | ||||
|             assert ts.day_name(locale=time_locale) == name | ||||
|         dti = dti.append(DatetimeIndex([NaT])) | ||||
|         assert np.isnan(dti.day_name(locale=time_locale)[-1]) | ||||
|         ts = Timestamp(NaT) | ||||
|         assert np.isnan(ts.day_name(locale=time_locale)) | ||||
|  | ||||
|         # GH#12805 | ||||
|         dti = date_range(freq="ME", start="2012", end="2013") | ||||
|         result = dti.month_name(locale=time_locale) | ||||
|         expected = Index([month.capitalize() for month in expected_months]) | ||||
|  | ||||
|         # work around different normalization schemes GH#22342 | ||||
|         result = result.str.normalize("NFD") | ||||
|         expected = expected.str.normalize("NFD") | ||||
|  | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         for item, expected in zip(dti, expected_months): | ||||
|             result = item.month_name(locale=time_locale) | ||||
|             expected = expected.capitalize() | ||||
|  | ||||
|             result = unicodedata.normalize("NFD", result) | ||||
|             expected = unicodedata.normalize("NFD", result) | ||||
|  | ||||
|             assert result == expected | ||||
|         dti = dti.append(DatetimeIndex([NaT])) | ||||
|         assert np.isnan(dti.month_name(locale=time_locale)[-1]) | ||||
|  | ||||
|     def test_dti_week(self): | ||||
|         # GH#6538: Check that DatetimeIndex and its TimeStamp elements | ||||
|         # return the same weekofyear accessor close to new year w/ tz | ||||
|         dates = ["2013/12/29", "2013/12/30", "2013/12/31"] | ||||
|         dates = DatetimeIndex(dates, tz="Europe/Brussels") | ||||
|         expected = [52, 1, 1] | ||||
|         assert dates.isocalendar().week.tolist() == expected | ||||
|         assert [d.weekofyear for d in dates] == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     def test_dti_fields(self, tz): | ||||
|         # GH#13303 | ||||
|         dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz) | ||||
|         assert dti.year[0] == 1998 | ||||
|         assert dti.month[0] == 1 | ||||
|         assert dti.day[0] == 1 | ||||
|         assert dti.hour[0] == 0 | ||||
|         assert dti.minute[0] == 0 | ||||
|         assert dti.second[0] == 0 | ||||
|         assert dti.microsecond[0] == 0 | ||||
|         assert dti.dayofweek[0] == 3 | ||||
|  | ||||
|         assert dti.dayofyear[0] == 1 | ||||
|         assert dti.dayofyear[120] == 121 | ||||
|  | ||||
|         assert dti.isocalendar().week.iloc[0] == 1 | ||||
|         assert dti.isocalendar().week.iloc[120] == 18 | ||||
|  | ||||
|         assert dti.quarter[0] == 1 | ||||
|         assert dti.quarter[120] == 2 | ||||
|  | ||||
|         assert dti.days_in_month[0] == 31 | ||||
|         assert dti.days_in_month[90] == 30 | ||||
|  | ||||
|         assert dti.is_month_start[0] | ||||
|         assert not dti.is_month_start[1] | ||||
|         assert dti.is_month_start[31] | ||||
|         assert dti.is_quarter_start[0] | ||||
|         assert dti.is_quarter_start[90] | ||||
|         assert dti.is_year_start[0] | ||||
|         assert not dti.is_year_start[364] | ||||
|         assert not dti.is_month_end[0] | ||||
|         assert dti.is_month_end[30] | ||||
|         assert not dti.is_month_end[31] | ||||
|         assert dti.is_month_end[364] | ||||
|         assert not dti.is_quarter_end[0] | ||||
|         assert not dti.is_quarter_end[30] | ||||
|         assert dti.is_quarter_end[89] | ||||
|         assert dti.is_quarter_end[364] | ||||
|         assert not dti.is_year_end[0] | ||||
|         assert dti.is_year_end[364] | ||||
|  | ||||
|         assert len(dti.year) == 365 | ||||
|         assert len(dti.month) == 365 | ||||
|         assert len(dti.day) == 365 | ||||
|         assert len(dti.hour) == 365 | ||||
|         assert len(dti.minute) == 365 | ||||
|         assert len(dti.second) == 365 | ||||
|         assert len(dti.microsecond) == 365 | ||||
|         assert len(dti.dayofweek) == 365 | ||||
|         assert len(dti.dayofyear) == 365 | ||||
|         assert len(dti.isocalendar()) == 365 | ||||
|         assert len(dti.quarter) == 365 | ||||
|         assert len(dti.is_month_start) == 365 | ||||
|         assert len(dti.is_month_end) == 365 | ||||
|         assert len(dti.is_quarter_start) == 365 | ||||
|         assert len(dti.is_quarter_end) == 365 | ||||
|         assert len(dti.is_year_start) == 365 | ||||
|         assert len(dti.is_year_end) == 365 | ||||
|  | ||||
|         dti.name = "name" | ||||
|  | ||||
|         # non boolean accessors -> return Index | ||||
|         for accessor in DatetimeArray._field_ops: | ||||
|             res = getattr(dti, accessor) | ||||
|             assert len(res) == 365 | ||||
|             assert isinstance(res, Index) | ||||
|             assert res.name == "name" | ||||
|  | ||||
|         # boolean accessors -> return array | ||||
|         for accessor in DatetimeArray._bool_ops: | ||||
|             res = getattr(dti, accessor) | ||||
|             assert len(res) == 365 | ||||
|             assert isinstance(res, np.ndarray) | ||||
|  | ||||
|         # test boolean indexing | ||||
|         res = dti[dti.is_quarter_start] | ||||
|         exp = dti[[0, 90, 181, 273]] | ||||
|         tm.assert_index_equal(res, exp) | ||||
|         res = dti[dti.is_leap_year] | ||||
|         exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns") | ||||
|         tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     def test_dti_is_year_quarter_start(self): | ||||
|         dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4) | ||||
|  | ||||
|         assert sum(dti.is_quarter_start) == 0 | ||||
|         assert sum(dti.is_quarter_end) == 4 | ||||
|         assert sum(dti.is_year_start) == 0 | ||||
|         assert sum(dti.is_year_end) == 1 | ||||
|  | ||||
|     def test_dti_is_month_start(self): | ||||
|         dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) | ||||
|  | ||||
|         assert dti.is_month_start[0] == 1 | ||||
|  | ||||
|     def test_dti_is_month_start_custom(self): | ||||
|         # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, | ||||
|         bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") | ||||
|         dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) | ||||
|         msg = "Custom business days is not supported by is_month_start" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             dti.is_month_start | ||||
| @ -0,0 +1,666 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timezone, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.tseries.offsets import ( | ||||
|     BMonthEnd, | ||||
|     Minute, | ||||
|     MonthEnd, | ||||
| ) | ||||
|  | ||||
| START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexSetOps: | ||||
|     tz = [ | ||||
|         None, | ||||
|         "UTC", | ||||
|         "Asia/Tokyo", | ||||
|         "US/Eastern", | ||||
|         "dateutil/Asia/Singapore", | ||||
|         "dateutil/US/Pacific", | ||||
|     ] | ||||
|  | ||||
|     # TODO: moved from test_datetimelike; dedup with version below | ||||
|     def test_union2(self, sort): | ||||
|         everything = date_range("2020-01-01", periods=10) | ||||
|         first = everything[:5] | ||||
|         second = everything[5:] | ||||
|         union = first.union(second, sort=sort) | ||||
|         tm.assert_index_equal(union, everything) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [np.array, Series, list]) | ||||
|     def test_union3(self, sort, box): | ||||
|         everything = date_range("2020-01-01", periods=10) | ||||
|         first = everything[:5] | ||||
|         second = everything[5:] | ||||
|  | ||||
|         # GH 10149 support listlike inputs other than Index objects | ||||
|         expected = first.union(second, sort=sort) | ||||
|         case = box(second.values) | ||||
|         result = first.union(case, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", tz) | ||||
|     def test_union(self, tz, sort): | ||||
|         rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz) | ||||
|         other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) | ||||
|         expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz) | ||||
|         expected1_notsorted = DatetimeIndex(list(other1) + list(rng1)) | ||||
|  | ||||
|         rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz) | ||||
|         other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) | ||||
|         expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz) | ||||
|         expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3])) | ||||
|  | ||||
|         rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) | ||||
|         other3 = DatetimeIndex([], tz=tz).as_unit("ns") | ||||
|         expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz) | ||||
|         expected3_notsorted = rng3 | ||||
|  | ||||
|         for rng, other, exp, exp_notsorted in [ | ||||
|             (rng1, other1, expected1, expected1_notsorted), | ||||
|             (rng2, other2, expected2, expected2_notsorted), | ||||
|             (rng3, other3, expected3, expected3_notsorted), | ||||
|         ]: | ||||
|             result_union = rng.union(other, sort=sort) | ||||
|             tm.assert_index_equal(result_union, exp) | ||||
|  | ||||
|             result_union = other.union(rng, sort=sort) | ||||
|             if sort is None: | ||||
|                 tm.assert_index_equal(result_union, exp) | ||||
|             else: | ||||
|                 tm.assert_index_equal(result_union, exp_notsorted) | ||||
|  | ||||
|     def test_union_coverage(self, sort): | ||||
|         idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"]) | ||||
|         ordered = DatetimeIndex(idx.sort_values(), freq="infer") | ||||
|         result = ordered.union(idx, sort=sort) | ||||
|         tm.assert_index_equal(result, ordered) | ||||
|  | ||||
|         result = ordered[:0].union(ordered, sort=sort) | ||||
|         tm.assert_index_equal(result, ordered) | ||||
|         assert result.freq == ordered.freq | ||||
|  | ||||
|     def test_union_bug_1730(self, sort): | ||||
|         rng_a = date_range("1/1/2012", periods=4, freq="3h") | ||||
|         rng_b = date_range("1/1/2012", periods=4, freq="4h") | ||||
|  | ||||
|         result = rng_a.union(rng_b, sort=sort) | ||||
|         exp = list(rng_a) + list(rng_b[1:]) | ||||
|         if sort is None: | ||||
|             exp = DatetimeIndex(sorted(exp)) | ||||
|         else: | ||||
|             exp = DatetimeIndex(exp) | ||||
|         tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     def test_union_bug_1745(self, sort): | ||||
|         left = DatetimeIndex(["2012-05-11 15:19:49.695000"]) | ||||
|         right = DatetimeIndex( | ||||
|             [ | ||||
|                 "2012-05-29 13:04:21.322000", | ||||
|                 "2012-05-11 15:27:24.873000", | ||||
|                 "2012-05-11 15:31:05.350000", | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         result = left.union(right, sort=sort) | ||||
|         exp = DatetimeIndex( | ||||
|             [ | ||||
|                 "2012-05-11 15:19:49.695000", | ||||
|                 "2012-05-29 13:04:21.322000", | ||||
|                 "2012-05-11 15:27:24.873000", | ||||
|                 "2012-05-11 15:31:05.350000", | ||||
|             ] | ||||
|         ) | ||||
|         if sort is None: | ||||
|             exp = exp.sort_values() | ||||
|         tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     def test_union_bug_4564(self, sort): | ||||
|         from pandas import DateOffset | ||||
|  | ||||
|         left = date_range("2013-01-01", "2013-02-01") | ||||
|         right = left + DateOffset(minutes=15) | ||||
|  | ||||
|         result = left.union(right, sort=sort) | ||||
|         exp = list(left) + list(right) | ||||
|         if sort is None: | ||||
|             exp = DatetimeIndex(sorted(exp)) | ||||
|         else: | ||||
|             exp = DatetimeIndex(exp) | ||||
|         tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     def test_union_freq_both_none(self, sort): | ||||
|         # GH11086 | ||||
|         expected = bdate_range("20150101", periods=10) | ||||
|         expected._data.freq = None | ||||
|  | ||||
|         result = expected.union(expected, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.freq is None | ||||
|  | ||||
|     def test_union_freq_infer(self): | ||||
|         # When taking the union of two DatetimeIndexes, we infer | ||||
|         #  a freq even if the arguments don't have freq.  This matches | ||||
|         #  TimedeltaIndex behavior. | ||||
|         dti = date_range("2016-01-01", periods=5) | ||||
|         left = dti[[0, 1, 3, 4]] | ||||
|         right = dti[[2, 3, 1]] | ||||
|  | ||||
|         assert left.freq is None | ||||
|         assert right.freq is None | ||||
|  | ||||
|         result = left.union(right) | ||||
|         tm.assert_index_equal(result, dti) | ||||
|         assert result.freq == "D" | ||||
|  | ||||
|     def test_union_dataframe_index(self): | ||||
|         rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") | ||||
|         s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1) | ||||
|  | ||||
|         rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") | ||||
|         s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2) | ||||
|         df = DataFrame({"s1": s1, "s2": s2}) | ||||
|  | ||||
|         exp = date_range("1/1/1980", "1/1/2012", freq="MS") | ||||
|         tm.assert_index_equal(df.index, exp) | ||||
|  | ||||
|     def test_union_with_DatetimeIndex(self, sort): | ||||
|         i1 = Index(np.arange(0, 20, 2, dtype=np.int64)) | ||||
|         i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D") | ||||
|         # Works | ||||
|         i1.union(i2, sort=sort) | ||||
|         # Fails with "AttributeError: can't set attribute" | ||||
|         i2.union(i1, sort=sort) | ||||
|  | ||||
|     def test_union_same_timezone_different_units(self): | ||||
|         # GH 55238 | ||||
|         idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms") | ||||
|         idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us") | ||||
|         result = idx1.union(idx2) | ||||
|         expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # TODO: moved from test_datetimelike; de-duplicate with version below | ||||
|     def test_intersection2(self): | ||||
|         first = date_range("2020-01-01", periods=10) | ||||
|         second = first[5:] | ||||
|         intersect = first.intersection(second) | ||||
|         tm.assert_index_equal(intersect, second) | ||||
|  | ||||
|         # GH 10149 | ||||
|         cases = [klass(second.values) for klass in [np.array, Series, list]] | ||||
|         for case in cases: | ||||
|             result = first.intersection(case) | ||||
|             tm.assert_index_equal(result, second) | ||||
|  | ||||
|         third = Index(["a", "b", "c"]) | ||||
|         result = first.intersection(third) | ||||
|         expected = Index([], dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] | ||||
|     ) | ||||
|     def test_intersection(self, tz, sort): | ||||
|         # GH 4690 (with tz) | ||||
|         base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") | ||||
|  | ||||
|         # if target has the same name, it is preserved | ||||
|         rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx") | ||||
|         expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx") | ||||
|  | ||||
|         # if target name is different, it will be reset | ||||
|         rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other") | ||||
|         expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) | ||||
|  | ||||
|         rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") | ||||
|         expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]") | ||||
|  | ||||
|         for rng, expected in [ | ||||
|             (rng2, expected2), | ||||
|             (rng3, expected3), | ||||
|             (rng4, expected4), | ||||
|         ]: | ||||
|             result = base.intersection(rng) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|         # non-monotonic | ||||
|         base = DatetimeIndex( | ||||
|             ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx" | ||||
|         ).as_unit("ns") | ||||
|  | ||||
|         rng2 = DatetimeIndex( | ||||
|             ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx" | ||||
|         ).as_unit("ns") | ||||
|         expected2 = DatetimeIndex( | ||||
|             ["2011-01-04", "2011-01-02"], tz=tz, name="idx" | ||||
|         ).as_unit("ns") | ||||
|  | ||||
|         rng3 = DatetimeIndex( | ||||
|             ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], | ||||
|             tz=tz, | ||||
|             name="other", | ||||
|         ).as_unit("ns") | ||||
|         expected3 = DatetimeIndex( | ||||
|             ["2011-01-04", "2011-01-02"], tz=tz, name=None | ||||
|         ).as_unit("ns") | ||||
|  | ||||
|         # GH 7880 | ||||
|         rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") | ||||
|         expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns") | ||||
|         assert expected4.freq is None | ||||
|  | ||||
|         for rng, expected in [ | ||||
|             (rng2, expected2), | ||||
|             (rng3, expected3), | ||||
|             (rng4, expected4), | ||||
|         ]: | ||||
|             result = base.intersection(rng, sort=sort) | ||||
|             if sort is None: | ||||
|                 expected = expected.sort_values() | ||||
|             tm.assert_index_equal(result, expected) | ||||
|             assert result.freq == expected.freq | ||||
|  | ||||
|     # parametrize over both anchored and non-anchored freqs, as they | ||||
|     #  have different code paths | ||||
|     @pytest.mark.parametrize("freq", ["min", "B"]) | ||||
|     def test_intersection_empty(self, tz_aware_fixture, freq): | ||||
|         # empty same freq GH2129 | ||||
|         tz = tz_aware_fixture | ||||
|         rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz) | ||||
|         result = rng[0:0].intersection(rng) | ||||
|         assert len(result) == 0 | ||||
|         assert result.freq == rng.freq | ||||
|  | ||||
|         result = rng.intersection(rng[0:0]) | ||||
|         assert len(result) == 0 | ||||
|         assert result.freq == rng.freq | ||||
|  | ||||
|         # no overlap GH#33604 | ||||
|         check_freq = freq != "min"  # We don't preserve freq on non-anchored offsets | ||||
|         result = rng[:3].intersection(rng[-3:]) | ||||
|         tm.assert_index_equal(result, rng[:0]) | ||||
|         if check_freq: | ||||
|             # We don't preserve freq on non-anchored offsets | ||||
|             assert result.freq == rng.freq | ||||
|  | ||||
|         # swapped left and right | ||||
|         result = rng[-3:].intersection(rng[:3]) | ||||
|         tm.assert_index_equal(result, rng[:0]) | ||||
|         if check_freq: | ||||
|             # We don't preserve freq on non-anchored offsets | ||||
|             assert result.freq == rng.freq | ||||
|  | ||||
|     def test_intersection_bug_1708(self): | ||||
|         from pandas import DateOffset | ||||
|  | ||||
|         index_1 = date_range("1/1/2012", periods=4, freq="12h") | ||||
|         index_2 = index_1 + DateOffset(hours=1) | ||||
|  | ||||
|         result = index_1.intersection(index_2) | ||||
|         assert len(result) == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", tz) | ||||
|     def test_difference(self, tz, sort): | ||||
|         rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"] | ||||
|  | ||||
|         rng1 = DatetimeIndex(rng_dates, tz=tz) | ||||
|         other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz) | ||||
|         expected1 = DatetimeIndex(rng_dates, tz=tz) | ||||
|  | ||||
|         rng2 = DatetimeIndex(rng_dates, tz=tz) | ||||
|         other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz) | ||||
|         expected2 = DatetimeIndex(rng_dates[:3], tz=tz) | ||||
|  | ||||
|         rng3 = DatetimeIndex(rng_dates, tz=tz) | ||||
|         other3 = DatetimeIndex([], tz=tz) | ||||
|         expected3 = DatetimeIndex(rng_dates, tz=tz) | ||||
|  | ||||
|         for rng, other, expected in [ | ||||
|             (rng1, other1, expected1), | ||||
|             (rng2, other2, expected2), | ||||
|             (rng3, other3, expected3), | ||||
|         ]: | ||||
|             result_diff = rng.difference(other, sort) | ||||
|             if sort is None and len(other): | ||||
|                 # We dont sort (yet?) when empty GH#24959 | ||||
|                 expected = expected.sort_values() | ||||
|             tm.assert_index_equal(result_diff, expected) | ||||
|  | ||||
|     def test_difference_freq(self, sort): | ||||
|         # GH14323: difference of DatetimeIndex should not preserve frequency | ||||
|  | ||||
|         index = date_range("20160920", "20160925", freq="D") | ||||
|         other = date_range("20160921", "20160924", freq="D") | ||||
|         expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None) | ||||
|         idx_diff = index.difference(other, sort) | ||||
|         tm.assert_index_equal(idx_diff, expected) | ||||
|         tm.assert_attr_equal("freq", idx_diff, expected) | ||||
|  | ||||
|         # preserve frequency when the difference is a contiguous | ||||
|         # subset of the original range | ||||
|         other = date_range("20160922", "20160925", freq="D") | ||||
|         idx_diff = index.difference(other, sort) | ||||
|         expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D") | ||||
|         tm.assert_index_equal(idx_diff, expected) | ||||
|         tm.assert_attr_equal("freq", idx_diff, expected) | ||||
|  | ||||
|     def test_datetimeindex_diff(self, sort): | ||||
|         dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100) | ||||
|         dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98) | ||||
|         assert len(dti1.difference(dti2, sort)) == 2 | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) | ||||
|     def test_setops_preserve_freq(self, tz): | ||||
|         rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) | ||||
|  | ||||
|         result = rng[:50].union(rng[50:100]) | ||||
|         assert result.name == rng.name | ||||
|         assert result.freq == rng.freq | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|         result = rng[:50].union(rng[30:100]) | ||||
|         assert result.name == rng.name | ||||
|         assert result.freq == rng.freq | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|         result = rng[:50].union(rng[60:100]) | ||||
|         assert result.name == rng.name | ||||
|         assert result.freq is None | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|         result = rng[:50].intersection(rng[25:75]) | ||||
|         assert result.name == rng.name | ||||
|         assert result.freqstr == "D" | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|         nofreq = DatetimeIndex(list(rng[25:75]), name="other") | ||||
|         result = rng[:50].union(nofreq) | ||||
|         assert result.name is None | ||||
|         assert result.freq == rng.freq | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|         result = rng[:50].intersection(nofreq) | ||||
|         assert result.name is None | ||||
|         assert result.freq == rng.freq | ||||
|         assert result.tz == rng.tz | ||||
|  | ||||
|     def test_intersection_non_tick_no_fastpath(self): | ||||
|         # GH#42104 | ||||
|         dti = DatetimeIndex( | ||||
|             [ | ||||
|                 "2018-12-31", | ||||
|                 "2019-03-31", | ||||
|                 "2019-06-30", | ||||
|                 "2019-09-30", | ||||
|                 "2019-12-31", | ||||
|                 "2020-03-31", | ||||
|             ], | ||||
|             freq="QE-DEC", | ||||
|         ) | ||||
|         result = dti[::2].intersection(dti[1::2]) | ||||
|         expected = dti[:0] | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_dti_intersection(self): | ||||
|         rng = date_range("1/1/2011", periods=100, freq="h", tz="utc") | ||||
|  | ||||
|         left = rng[10:90][::-1] | ||||
|         right = rng[20:80][::-1] | ||||
|  | ||||
|         assert left.tz == rng.tz | ||||
|         result = left.intersection(right) | ||||
|         assert result.tz == left.tz | ||||
|  | ||||
|     # Note: not difference, as there is no symmetry requirement there | ||||
|     @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"]) | ||||
|     def test_dti_setop_aware(self, setop): | ||||
|         # non-overlapping | ||||
|         # GH#39328 as of 2.0 we cast these to UTC instead of object | ||||
|         rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central") | ||||
|  | ||||
|         rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern") | ||||
|  | ||||
|         result = getattr(rng, setop)(rng2) | ||||
|  | ||||
|         left = rng.tz_convert("UTC") | ||||
|         right = rng2.tz_convert("UTC") | ||||
|         expected = getattr(left, setop)(right) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert result.tz == left.tz | ||||
|         if len(result): | ||||
|             assert result[0].tz is timezone.utc | ||||
|             assert result[-1].tz is timezone.utc | ||||
|  | ||||
|     def test_dti_union_mixed(self): | ||||
|         # GH#21671 | ||||
|         rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT]) | ||||
|         rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo") | ||||
|         result = rng.union(rng2) | ||||
|         expected = Index( | ||||
|             [ | ||||
|                 Timestamp("2011-01-01"), | ||||
|                 pd.NaT, | ||||
|                 Timestamp("2012-01-01", tz="Asia/Tokyo"), | ||||
|                 Timestamp("2012-01-02", tz="Asia/Tokyo"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestBusinessDatetimeIndex: | ||||
|     def test_union(self, sort): | ||||
|         rng = bdate_range(START, END) | ||||
|         # overlapping | ||||
|         left = rng[:10] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_union = left.union(right, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|         # non-overlapping, gap in middle | ||||
|         left = rng[:5] | ||||
|         right = rng[10:] | ||||
|  | ||||
|         the_union = left.union(right, sort=sort) | ||||
|         assert isinstance(the_union, Index) | ||||
|  | ||||
|         # non-overlapping, no gap | ||||
|         left = rng[:5] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_union = left.union(right, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|         # order does not matter | ||||
|         if sort is None: | ||||
|             tm.assert_index_equal(right.union(left, sort=sort), the_union) | ||||
|         else: | ||||
|             expected = DatetimeIndex(list(right) + list(left)) | ||||
|             tm.assert_index_equal(right.union(left, sort=sort), expected) | ||||
|  | ||||
|         # overlapping, but different offset | ||||
|         rng = date_range(START, END, freq=BMonthEnd()) | ||||
|  | ||||
|         the_union = rng.union(rng, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|     def test_union_not_cacheable(self, sort): | ||||
|         rng = date_range("1/1/2000", periods=50, freq=Minute()) | ||||
|         rng1 = rng[10:] | ||||
|         rng2 = rng[:25] | ||||
|         the_union = rng1.union(rng2, sort=sort) | ||||
|         if sort is None: | ||||
|             tm.assert_index_equal(the_union, rng) | ||||
|         else: | ||||
|             expected = DatetimeIndex(list(rng[10:]) + list(rng[:10])) | ||||
|             tm.assert_index_equal(the_union, expected) | ||||
|  | ||||
|         rng1 = rng[10:] | ||||
|         rng2 = rng[15:35] | ||||
|         the_union = rng1.union(rng2, sort=sort) | ||||
|         expected = rng[10:] | ||||
|         tm.assert_index_equal(the_union, expected) | ||||
|  | ||||
|     def test_intersection(self): | ||||
|         rng = date_range("1/1/2000", periods=50, freq=Minute()) | ||||
|         rng1 = rng[10:] | ||||
|         rng2 = rng[:25] | ||||
|         the_int = rng1.intersection(rng2) | ||||
|         expected = rng[10:25] | ||||
|         tm.assert_index_equal(the_int, expected) | ||||
|         assert isinstance(the_int, DatetimeIndex) | ||||
|         assert the_int.freq == rng.freq | ||||
|  | ||||
|         the_int = rng1.intersection(rng2) | ||||
|         tm.assert_index_equal(the_int, expected) | ||||
|  | ||||
|         # non-overlapping | ||||
|         the_int = rng[:10].intersection(rng[10:]) | ||||
|         expected = DatetimeIndex([]).as_unit("ns") | ||||
|         tm.assert_index_equal(the_int, expected) | ||||
|  | ||||
|     def test_intersection_bug(self): | ||||
|         # GH #771 | ||||
|         a = bdate_range("11/30/2011", "12/31/2011") | ||||
|         b = bdate_range("12/10/2011", "12/20/2011") | ||||
|         result = a.intersection(b) | ||||
|         tm.assert_index_equal(result, b) | ||||
|         assert result.freq == b.freq | ||||
|  | ||||
|     def test_intersection_list(self): | ||||
|         # GH#35876 | ||||
|         # values is not an Index -> no name -> retain "a" | ||||
|         values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")] | ||||
|         idx = DatetimeIndex(values, name="a") | ||||
|         res = idx.intersection(values) | ||||
|         tm.assert_index_equal(res, idx) | ||||
|  | ||||
|     def test_month_range_union_tz_pytz(self, sort): | ||||
|         tz = pytz.timezone("US/Eastern") | ||||
|  | ||||
|         early_start = datetime(2011, 1, 1) | ||||
|         early_end = datetime(2011, 3, 1) | ||||
|  | ||||
|         late_start = datetime(2011, 3, 1) | ||||
|         late_end = datetime(2011, 5, 1) | ||||
|  | ||||
|         early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) | ||||
|         late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) | ||||
|  | ||||
|         early_dr.union(late_dr, sort=sort) | ||||
|  | ||||
|     @td.skip_if_windows | ||||
|     def test_month_range_union_tz_dateutil(self, sort): | ||||
|         from pandas._libs.tslibs.timezones import dateutil_gettz | ||||
|  | ||||
|         tz = dateutil_gettz("US/Eastern") | ||||
|  | ||||
|         early_start = datetime(2011, 1, 1) | ||||
|         early_end = datetime(2011, 3, 1) | ||||
|  | ||||
|         late_start = datetime(2011, 3, 1) | ||||
|         late_end = datetime(2011, 5, 1) | ||||
|  | ||||
|         early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) | ||||
|         late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) | ||||
|  | ||||
|         early_dr.union(late_dr, sort=sort) | ||||
|  | ||||
|     @pytest.mark.parametrize("sort", [False, None]) | ||||
|     def test_intersection_duplicates(self, sort): | ||||
|         # GH#38196 | ||||
|         idx1 = Index( | ||||
|             [ | ||||
|                 Timestamp("2019-12-13"), | ||||
|                 Timestamp("2019-12-12"), | ||||
|                 Timestamp("2019-12-12"), | ||||
|             ] | ||||
|         ) | ||||
|         result = idx1.intersection(idx1, sort=sort) | ||||
|         expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestCustomDatetimeIndex: | ||||
|     def test_union(self, sort): | ||||
|         # overlapping | ||||
|         rng = bdate_range(START, END, freq="C") | ||||
|         left = rng[:10] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_union = left.union(right, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|         # non-overlapping, gap in middle | ||||
|         left = rng[:5] | ||||
|         right = rng[10:] | ||||
|  | ||||
|         the_union = left.union(right, sort) | ||||
|         assert isinstance(the_union, Index) | ||||
|  | ||||
|         # non-overlapping, no gap | ||||
|         left = rng[:5] | ||||
|         right = rng[5:10] | ||||
|  | ||||
|         the_union = left.union(right, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|         # order does not matter | ||||
|         if sort is None: | ||||
|             tm.assert_index_equal(right.union(left, sort=sort), the_union) | ||||
|  | ||||
|         # overlapping, but different offset | ||||
|         rng = date_range(START, END, freq=BMonthEnd()) | ||||
|  | ||||
|         the_union = rng.union(rng, sort=sort) | ||||
|         assert isinstance(the_union, DatetimeIndex) | ||||
|  | ||||
|     def test_intersection_bug(self): | ||||
|         # GH #771 | ||||
|         a = bdate_range("11/30/2011", "12/31/2011", freq="C") | ||||
|         b = bdate_range("12/10/2011", "12/20/2011", freq="C") | ||||
|         result = a.intersection(b) | ||||
|         tm.assert_index_equal(result, b) | ||||
|         assert result.freq == b.freq | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)] | ||||
|     ) | ||||
|     def test_intersection_dst_transition(self, tz): | ||||
|         # GH 46702: Europe/Berlin has DST transition | ||||
|         idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz) | ||||
|         idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz) | ||||
|         result = idx1.intersection(idx2) | ||||
|         expected = date_range("2020-03-30", periods=2, freq="D", tz=tz) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH#45863 same problem for union | ||||
|         index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London") | ||||
|         index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London") | ||||
|         result = index1.union(index2) | ||||
|         expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London") | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,251 @@ | ||||
| """ | ||||
| Tests for DatetimeIndex timezone-related methods | ||||
| """ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
|     timezone, | ||||
|     tzinfo, | ||||
| ) | ||||
|  | ||||
| from dateutil.tz import gettz | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas._libs.tslibs import ( | ||||
|     conversion, | ||||
|     timezones, | ||||
| ) | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     Timestamp, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     isna, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class FixedOffset(tzinfo): | ||||
|     """Fixed offset in minutes east from UTC.""" | ||||
|  | ||||
|     def __init__(self, offset, name) -> None: | ||||
|         self.__offset = timedelta(minutes=offset) | ||||
|         self.__name = name | ||||
|  | ||||
|     def utcoffset(self, dt): | ||||
|         return self.__offset | ||||
|  | ||||
|     def tzname(self, dt): | ||||
|         return self.__name | ||||
|  | ||||
|     def dst(self, dt): | ||||
|         return timedelta(0) | ||||
|  | ||||
|  | ||||
| fixed_off_no_name = FixedOffset(-330, None) | ||||
|  | ||||
|  | ||||
| class TestDatetimeIndexTimezones: | ||||
|     # ------------------------------------------------------------- | ||||
|     # Unsorted | ||||
|  | ||||
|     def test_dti_drop_dont_lose_tz(self): | ||||
|         # GH#2621 | ||||
|         ind = date_range("2012-12-01", periods=10, tz="utc") | ||||
|         ind = ind.drop(ind[-1]) | ||||
|  | ||||
|         assert ind.tz is not None | ||||
|  | ||||
|     def test_dti_tz_conversion_freq(self, tz_naive_fixture): | ||||
|         # GH25241 | ||||
|         t3 = DatetimeIndex(["2019-01-01 10:00"], freq="h") | ||||
|         assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq | ||||
|         t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="min") | ||||
|         assert t4.tz_convert(tz="UTC").freq == t4.freq | ||||
|  | ||||
|     def test_drop_dst_boundary(self): | ||||
|         # see gh-18031 | ||||
|         tz = "Europe/Brussels" | ||||
|         freq = "15min" | ||||
|  | ||||
|         start = Timestamp("201710290100", tz=tz) | ||||
|         end = Timestamp("201710290300", tz=tz) | ||||
|         index = date_range(start=start, end=end, freq=freq) | ||||
|  | ||||
|         expected = DatetimeIndex( | ||||
|             [ | ||||
|                 "201710290115", | ||||
|                 "201710290130", | ||||
|                 "201710290145", | ||||
|                 "201710290200", | ||||
|                 "201710290215", | ||||
|                 "201710290230", | ||||
|                 "201710290245", | ||||
|                 "201710290200", | ||||
|                 "201710290215", | ||||
|                 "201710290230", | ||||
|                 "201710290245", | ||||
|                 "201710290300", | ||||
|             ], | ||||
|             dtype="M8[ns, Europe/Brussels]", | ||||
|             freq=freq, | ||||
|             ambiguous=[ | ||||
|                 True, | ||||
|                 True, | ||||
|                 True, | ||||
|                 True, | ||||
|                 True, | ||||
|                 True, | ||||
|                 True, | ||||
|                 False, | ||||
|                 False, | ||||
|                 False, | ||||
|                 False, | ||||
|                 False, | ||||
|             ], | ||||
|         ) | ||||
|         result = index.drop(index[0]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_date_range_localize(self, unit): | ||||
|         rng = date_range( | ||||
|             "3/11/2012 03:00", periods=15, freq="h", tz="US/Eastern", unit=unit | ||||
|         ) | ||||
|         rng2 = DatetimeIndex( | ||||
|             ["3/11/2012 03:00", "3/11/2012 04:00"], dtype=f"M8[{unit}, US/Eastern]" | ||||
|         ) | ||||
|         rng3 = date_range("3/11/2012 03:00", periods=15, freq="h", unit=unit) | ||||
|         rng3 = rng3.tz_localize("US/Eastern") | ||||
|  | ||||
|         tm.assert_index_equal(rng._with_freq(None), rng3) | ||||
|  | ||||
|         # DST transition time | ||||
|         val = rng[0] | ||||
|         exp = Timestamp("3/11/2012 03:00", tz="US/Eastern") | ||||
|  | ||||
|         assert val.hour == 3 | ||||
|         assert exp.hour == 3 | ||||
|         assert val == exp  # same UTC value | ||||
|         tm.assert_index_equal(rng[:2], rng2) | ||||
|  | ||||
|     def test_date_range_localize2(self, unit): | ||||
|         # Right before the DST transition | ||||
|         rng = date_range( | ||||
|             "3/11/2012 00:00", periods=2, freq="h", tz="US/Eastern", unit=unit | ||||
|         ) | ||||
|         rng2 = DatetimeIndex( | ||||
|             ["3/11/2012 00:00", "3/11/2012 01:00"], | ||||
|             dtype=f"M8[{unit}, US/Eastern]", | ||||
|             freq="h", | ||||
|         ) | ||||
|         tm.assert_index_equal(rng, rng2) | ||||
|         exp = Timestamp("3/11/2012 00:00", tz="US/Eastern") | ||||
|         assert exp.hour == 0 | ||||
|         assert rng[0] == exp | ||||
|         exp = Timestamp("3/11/2012 01:00", tz="US/Eastern") | ||||
|         assert exp.hour == 1 | ||||
|         assert rng[1] == exp | ||||
|  | ||||
|         rng = date_range( | ||||
|             "3/11/2012 00:00", periods=10, freq="h", tz="US/Eastern", unit=unit | ||||
|         ) | ||||
|         assert rng[2].hour == 3 | ||||
|  | ||||
|     def test_timestamp_equality_different_timezones(self): | ||||
|         utc_range = date_range("1/1/2000", periods=20, tz="UTC") | ||||
|         eastern_range = utc_range.tz_convert("US/Eastern") | ||||
|         berlin_range = utc_range.tz_convert("Europe/Berlin") | ||||
|  | ||||
|         for a, b, c in zip(utc_range, eastern_range, berlin_range): | ||||
|             assert a == b | ||||
|             assert b == c | ||||
|             assert a == c | ||||
|  | ||||
|         assert (utc_range == eastern_range).all() | ||||
|         assert (utc_range == berlin_range).all() | ||||
|         assert (berlin_range == eastern_range).all() | ||||
|  | ||||
|     def test_dti_equals_with_tz(self): | ||||
|         left = date_range("1/1/2011", periods=100, freq="h", tz="utc") | ||||
|         right = date_range("1/1/2011", periods=100, freq="h", tz="US/Eastern") | ||||
|  | ||||
|         assert not left.equals(right) | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_dti_tz_nat(self, tzstr): | ||||
|         idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) | ||||
|  | ||||
|         assert isna(idx[1]) | ||||
|         assert idx[0].tzinfo is not None | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_utc_box_timestamp_and_localize(self, tzstr): | ||||
|         tz = timezones.maybe_get_tz(tzstr) | ||||
|  | ||||
|         rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc") | ||||
|         rng_eastern = rng.tz_convert(tzstr) | ||||
|  | ||||
|         expected = rng[-1].astimezone(tz) | ||||
|  | ||||
|         stamp = rng_eastern[-1] | ||||
|         assert stamp == expected | ||||
|         assert stamp.tzinfo == expected.tzinfo | ||||
|  | ||||
|         # right tzinfo | ||||
|         rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc") | ||||
|         rng_eastern = rng.tz_convert(tzstr) | ||||
|         # test not valid for dateutil timezones. | ||||
|         # assert 'EDT' in repr(rng_eastern[0].tzinfo) | ||||
|         assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( | ||||
|             rng_eastern[0].tzinfo | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")]) | ||||
|     def test_with_tz(self, tz): | ||||
|         # just want it to work | ||||
|         start = datetime(2011, 3, 12, tzinfo=pytz.utc) | ||||
|         dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) | ||||
|         assert dr.tz is pytz.utc | ||||
|  | ||||
|         # DateRange with naive datetimes | ||||
|         dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc) | ||||
|         dr = bdate_range("1/1/2005", "1/1/2009", tz=tz) | ||||
|  | ||||
|         # normalized | ||||
|         central = dr.tz_convert(tz) | ||||
|         assert central.tz is tz | ||||
|         naive = central[0].to_pydatetime().replace(tzinfo=None) | ||||
|         comp = conversion.localize_pydatetime(naive, tz).tzinfo | ||||
|         assert central[0].tz is comp | ||||
|  | ||||
|         # compare vs a localized tz | ||||
|         naive = dr[0].to_pydatetime().replace(tzinfo=None) | ||||
|         comp = conversion.localize_pydatetime(naive, tz).tzinfo | ||||
|         assert central[0].tz is comp | ||||
|  | ||||
|         # datetimes with tzinfo set | ||||
|         dr = bdate_range( | ||||
|             datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc) | ||||
|         ) | ||||
|         msg = "Start and end cannot both be tz-aware with different timezones" | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) | ||||
|     def test_dti_convert_tz_aware_datetime_datetime(self, tz): | ||||
|         # GH#1581 | ||||
|         dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)] | ||||
|  | ||||
|         dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates] | ||||
|         result = DatetimeIndex(dates_aware).as_unit("ns") | ||||
|         assert timezones.tz_compare(result.tz, tz) | ||||
|  | ||||
|         converted = to_datetime(dates_aware, utc=True).as_unit("ns") | ||||
|         ex_vals = np.array([Timestamp(x).as_unit("ns")._value for x in dates_aware]) | ||||
|         tm.assert_numpy_array_equal(converted.asi8, ex_vals) | ||||
|         assert converted.tz is timezone.utc | ||||
| @ -0,0 +1,254 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import ( | ||||
|     CategoricalDtype, | ||||
|     IntervalDtype, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     NaT, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     interval_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class AstypeTests: | ||||
|     """Tests common to IntervalIndex with any subtype""" | ||||
|  | ||||
|     def test_astype_idempotent(self, index): | ||||
|         result = index.astype("interval") | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.astype(index.dtype) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|     def test_astype_object(self, index): | ||||
|         result = index.astype(object) | ||||
|         expected = Index(index.values, dtype="object") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert not result.equals(index) | ||||
|  | ||||
|     def test_astype_category(self, index): | ||||
|         result = index.astype("category") | ||||
|         expected = CategoricalIndex(index.values) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index.astype(CategoricalDtype()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # non-default params | ||||
|         categories = index.dropna().unique().values[:-1] | ||||
|         dtype = CategoricalDtype(categories=categories, ordered=True) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex(index.values, categories=categories, ordered=True) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         [ | ||||
|             "int64", | ||||
|             "uint64", | ||||
|             "float64", | ||||
|             "complex128", | ||||
|             "period[M]", | ||||
|             "timedelta64", | ||||
|             "timedelta64[ns]", | ||||
|             "datetime64", | ||||
|             "datetime64[ns]", | ||||
|             "datetime64[ns, US/Eastern]", | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_cannot_cast(self, index, dtype): | ||||
|         msg = "Cannot cast IntervalIndex to dtype" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     def test_astype_invalid_dtype(self, index): | ||||
|         msg = "data type [\"']fake_dtype[\"'] not understood" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype("fake_dtype") | ||||
|  | ||||
|  | ||||
| class TestIntSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with integer-like subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), | ||||
|         IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] | ||||
|     ) | ||||
|     def test_subtype_conversion(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, index.closed) | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] | ||||
|     ) | ||||
|     def test_subtype_integer(self, subtype_start, subtype_end): | ||||
|         index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) | ||||
|         dtype = IntervalDtype(subtype_end, index.closed) | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype_end), | ||||
|             index.right.astype(subtype_end), | ||||
|             closed=index.closed, | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="GH#15832") | ||||
|     def test_subtype_integer_errors(self): | ||||
|         # int64 -> uint64 fails with negative values | ||||
|         index = interval_range(-10, 10) | ||||
|         dtype = IntervalDtype("uint64", "right") | ||||
|  | ||||
|         # Until we decide what the exception message _should_ be, we | ||||
|         #  assert something that it should _not_ be. | ||||
|         #  We should _not_ be getting a message suggesting that the -10 | ||||
|         #  has been wrapped around to a large-positive integer | ||||
|         msg = "^(?!(left side of interval must be <= right side))" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|  | ||||
| class TestFloatSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with float subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         interval_range(-10.0, 10.0, closed="neither"), | ||||
|         IntervalIndex.from_arrays( | ||||
|             [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" | ||||
|         ), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer(self, subtype): | ||||
|         index = interval_range(0.0, 10.0) | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # raises with NA | ||||
|         msg = r"Cannot convert non-finite values \(NA or inf\) to integer" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.insert(0, np.nan).astype(dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer_with_non_integer_borders(self, subtype): | ||||
|         index = interval_range(0.0, 3.0, freq=0.25) | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_subtype_integer_errors(self): | ||||
|         # float64 -> uint64 fails with negative values | ||||
|         index = interval_range(-10.0, 10.0) | ||||
|         dtype = IntervalDtype("uint64", "right") | ||||
|         msg = re.escape( | ||||
|             "Cannot convert interval[float64, right] to interval[uint64, right]; " | ||||
|             "subtypes are incompatible" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) | ||||
|     def test_subtype_datetimelike(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_astype_category(self, index): | ||||
|         super().test_astype_category(index) | ||||
|  | ||||
|  | ||||
| class TestDatetimelikeSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with datetime-like subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), | ||||
|         interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), | ||||
|         interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), | ||||
|         interval_range(Timedelta("0 days"), periods=10, closed="both"), | ||||
|         interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|  | ||||
|         if subtype != "int64": | ||||
|             msg = ( | ||||
|                 r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] " | ||||
|                 r"to interval\[uint64, .*\]" | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 index.astype(dtype) | ||||
|             return | ||||
|  | ||||
|         result = index.astype(dtype) | ||||
|         new_left = index.left.astype(subtype) | ||||
|         new_right = index.right.astype(subtype) | ||||
|  | ||||
|         expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_subtype_float(self, index): | ||||
|         dtype = IntervalDtype("float64", "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     def test_subtype_datetimelike(self): | ||||
|         # datetime -> timedelta raises | ||||
|         dtype = IntervalDtype("timedelta64[ns]", "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|  | ||||
|         index = interval_range(Timestamp("2018-01-01"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|         index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|         # timedelta -> datetime raises | ||||
|         dtype = IntervalDtype("datetime64[ns]", "right") | ||||
|         index = interval_range(Timedelta("0 days"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
| @ -0,0 +1,535 @@ | ||||
| from functools import partial | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas.core.dtypes.common import is_unsigned_integer_dtype | ||||
| from pandas.core.dtypes.dtypes import IntervalDtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     date_range, | ||||
|     notna, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class ConstructorTests: | ||||
|     """ | ||||
|     Common tests for all variations of IntervalIndex construction. Input data | ||||
|     to be supplied in breaks format, then converted by the subclass method | ||||
|     get_kwargs_from_breaks to the expected format. | ||||
|     """ | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[ | ||||
|             ([3, 14, 15, 92, 653], np.int64), | ||||
|             (np.arange(10, dtype="int64"), np.int64), | ||||
|             (Index(np.arange(-10, 11, dtype=np.int64)), np.int64), | ||||
|             (Index(np.arange(10, 31, dtype=np.uint64)), np.uint64), | ||||
|             (Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64), | ||||
|             (date_range("20180101", periods=10), "<M8[ns]"), | ||||
|             ( | ||||
|                 date_range("20180101", periods=10, tz="US/Eastern"), | ||||
|                 "datetime64[ns, US/Eastern]", | ||||
|             ), | ||||
|             (timedelta_range("1 day", periods=10), "<m8[ns]"), | ||||
|         ] | ||||
|     ) | ||||
|     def breaks_and_expected_subtype(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name): | ||||
|         breaks, expected_subtype = breaks_and_expected_subtype | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks, closed) | ||||
|  | ||||
|         result = constructor(closed=closed, name=name, **result_kwargs) | ||||
|  | ||||
|         assert result.closed == closed | ||||
|         assert result.name == name | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype)) | ||||
|         tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks, subtype", | ||||
|         [ | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"), | ||||
|             (date_range("2017-01-01", periods=5), "int64"), | ||||
|             (timedelta_range("1 day", periods=5), "int64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_dtype(self, constructor, breaks, subtype): | ||||
|         # GH 19262: conversion via dtype parameter | ||||
|         expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) | ||||
|         expected = constructor(**expected_kwargs) | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         iv_dtype = IntervalDtype(subtype, "right") | ||||
|         for dtype in (iv_dtype, str(iv_dtype)): | ||||
|             result = constructor(dtype=dtype, **result_kwargs) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.int64), | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.uint64), | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.float64), | ||||
|             date_range("2017-01-01", periods=5), | ||||
|             timedelta_range("1 day", periods=5), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_pass_closed(self, constructor, breaks): | ||||
|         # not passing closed to IntervalDtype, but to IntervalArray constructor | ||||
|         iv_dtype = IntervalDtype(breaks.dtype) | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|  | ||||
|         for dtype in (iv_dtype, str(iv_dtype)): | ||||
|             with tm.assert_produces_warning(None): | ||||
|                 result = constructor(dtype=dtype, closed="left", **result_kwargs) | ||||
|             assert result.dtype.closed == "left" | ||||
|  | ||||
|     @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) | ||||
|     def test_constructor_nan(self, constructor, breaks, closed): | ||||
|         # GH 18421 | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         result = constructor(closed=closed, **result_kwargs) | ||||
|  | ||||
|         expected_subtype = np.float64 | ||||
|         expected_values = np.array(breaks[:-1], dtype=object) | ||||
|  | ||||
|         assert result.closed == closed | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_numpy_array_equal(np.array(result), expected_values) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             [], | ||||
|             np.array([], dtype="int64"), | ||||
|             np.array([], dtype="uint64"), | ||||
|             np.array([], dtype="float64"), | ||||
|             np.array([], dtype="datetime64[ns]"), | ||||
|             np.array([], dtype="timedelta64[ns]"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_empty(self, constructor, breaks, closed): | ||||
|         # GH 18421 | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         result = constructor(closed=closed, **result_kwargs) | ||||
|  | ||||
|         expected_values = np.array([], dtype=object) | ||||
|         expected_subtype = getattr(breaks, "dtype", np.int64) | ||||
|  | ||||
|         assert result.empty | ||||
|         assert result.closed == closed | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_numpy_array_equal(np.array(result), expected_values) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             tuple("0123456789"), | ||||
|             list("abcdefghij"), | ||||
|             np.array(list("abcdefghij"), dtype=object), | ||||
|             np.array(list("abcdefghij"), dtype="<U1"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_string(self, constructor, breaks): | ||||
|         # GH 19016 | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(**self.get_kwargs_from_breaks(breaks)) | ||||
|  | ||||
|     @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex]) | ||||
|     def test_constructor_categorical_valid(self, constructor, cat_constructor): | ||||
|         # GH 21243/21253 | ||||
|  | ||||
|         breaks = np.arange(10, dtype="int64") | ||||
|         expected = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         cat_breaks = cat_constructor(breaks) | ||||
|         result_kwargs = self.get_kwargs_from_breaks(cat_breaks) | ||||
|         result = constructor(**result_kwargs) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_generic_errors(self, constructor): | ||||
|         # filler input data to be used when supplying invalid kwargs | ||||
|         filler = self.get_kwargs_from_breaks(range(10)) | ||||
|  | ||||
|         # invalid closed | ||||
|         msg = "closed must be one of 'right', 'left', 'both', 'neither'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(closed="invalid", **filler) | ||||
|  | ||||
|         # unsupported dtype | ||||
|         msg = "dtype must be an IntervalDtype, got int64" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(dtype="int64", **filler) | ||||
|  | ||||
|         # invalid dtype | ||||
|         msg = "data type [\"']invalid[\"'] not understood" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(dtype="invalid", **filler) | ||||
|  | ||||
|         # no point in nesting periods in an IntervalIndex | ||||
|         periods = period_range("2000-01-01", periods=10) | ||||
|         periods_kwargs = self.get_kwargs_from_breaks(periods) | ||||
|         msg = "Period dtypes are not supported, use a PeriodIndex instead" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(**periods_kwargs) | ||||
|  | ||||
|         # decreasing values | ||||
|         decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) | ||||
|         msg = "left side of interval must be <= right side" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(**decreasing_kwargs) | ||||
|  | ||||
|  | ||||
| class TestFromArrays(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_arrays""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_arrays | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_arrays | ||||
|         """ | ||||
|         return {"left": breaks[:-1], "right": breaks[1:]} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # GH 19016: categorical data | ||||
|         data = Categorical(list("01234abcde"), ordered=True) | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             IntervalIndex.from_arrays(data[:-1], data[1:]) | ||||
|  | ||||
|         # unequal length | ||||
|         left = [0, 1, 2] | ||||
|         right = [2, 3] | ||||
|         msg = "left and right must have the same length" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntervalIndex.from_arrays(left, right) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)] | ||||
|     ) | ||||
|     def test_mixed_float_int(self, left_subtype, right_subtype): | ||||
|         """mixed int/float left/right results in float for both sides""" | ||||
|         left = np.arange(9, dtype=left_subtype) | ||||
|         right = np.arange(1, 10, dtype=right_subtype) | ||||
|         result = IntervalIndex.from_arrays(left, right) | ||||
|  | ||||
|         expected_left = Index(left, dtype=np.float64) | ||||
|         expected_right = Index(right, dtype=np.float64) | ||||
|         expected_subtype = np.float64 | ||||
|  | ||||
|         tm.assert_index_equal(result.left, expected_left) | ||||
|         tm.assert_index_equal(result.right, expected_right) | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|  | ||||
|     @pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex]) | ||||
|     def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls): | ||||
|         # GH#55714 | ||||
|         left = date_range("2016-01-01", periods=3, unit="s") | ||||
|         right = date_range("2017-01-01", periods=3, unit="ms") | ||||
|         result = interval_cls.from_arrays(left, right) | ||||
|         expected = interval_cls.from_arrays(left.as_unit("ms"), right) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         # td64 | ||||
|         left2 = left - left[0] | ||||
|         right2 = right - left[0] | ||||
|         result2 = interval_cls.from_arrays(left2, right2) | ||||
|         expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2) | ||||
|         tm.assert_equal(result2, expected2) | ||||
|  | ||||
|         # dt64tz | ||||
|         left3 = left.tz_localize("UTC") | ||||
|         right3 = right.tz_localize("UTC") | ||||
|         result3 = interval_cls.from_arrays(left3, right3) | ||||
|         expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3) | ||||
|         tm.assert_equal(result3, expected3) | ||||
|  | ||||
|  | ||||
| class TestFromBreaks(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_breaks""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_breaks | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_breaks | ||||
|         """ | ||||
|         return {"breaks": breaks} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # GH 19016: categorical data | ||||
|         data = Categorical(list("01234abcde"), ordered=True) | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             IntervalIndex.from_breaks(data) | ||||
|  | ||||
|     def test_length_one(self): | ||||
|         """breaks of length one produce an empty IntervalIndex""" | ||||
|         breaks = [0] | ||||
|         result = IntervalIndex.from_breaks(breaks) | ||||
|         expected = IntervalIndex.from_breaks([]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_left_right_dont_share_data(self): | ||||
|         # GH#36310 | ||||
|         breaks = np.arange(5) | ||||
|         result = IntervalIndex.from_breaks(breaks)._data | ||||
|         assert result._left.base is None or result._left.base is not result._right.base | ||||
|  | ||||
|  | ||||
| class TestFromTuples(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_tuples""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_tuples | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_tuples | ||||
|         """ | ||||
|         if is_unsigned_integer_dtype(breaks): | ||||
|             pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests") | ||||
|  | ||||
|         if len(breaks) == 0: | ||||
|             return {"data": breaks} | ||||
|  | ||||
|         tuples = list(zip(breaks[:-1], breaks[1:])) | ||||
|         if isinstance(breaks, (list, tuple)): | ||||
|             return {"data": tuples} | ||||
|         elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): | ||||
|             return {"data": breaks._constructor(tuples)} | ||||
|         return {"data": com.asarray_tuplesafe(tuples)} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # non-tuple | ||||
|         tuples = [(0, 1), 2, (3, 4)] | ||||
|         msg = "IntervalIndex.from_tuples received an invalid item, 2" | ||||
|         with pytest.raises(TypeError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|         # too few/many items | ||||
|         tuples = [(0, 1), (2,), (3, 4)] | ||||
|         msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}" | ||||
|         with pytest.raises(ValueError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|         tuples = [(0, 1), (2, 3, 4), (5, 6)] | ||||
|         with pytest.raises(ValueError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|     def test_na_tuples(self): | ||||
|         # tuple (NA, NA) evaluates the same as NA as an element | ||||
|         na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)] | ||||
|         idx_na_tuple = IntervalIndex.from_tuples(na_tuple) | ||||
|         idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) | ||||
|         tm.assert_index_equal(idx_na_tuple, idx_na_element) | ||||
|  | ||||
|  | ||||
| class TestClassConstructors(ConstructorTests): | ||||
|     """Tests specific to the IntervalIndex/Index constructors""" | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[IntervalIndex, partial(Index, dtype="interval")], | ||||
|         ids=["IntervalIndex", "Index"], | ||||
|     ) | ||||
|     def klass(self, request): | ||||
|         # We use a separate fixture here to include Index.__new__ with dtype kwarg | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by the IntervalIndex/Index constructors | ||||
|         """ | ||||
|         if is_unsigned_integer_dtype(breaks): | ||||
|             pytest.skip(f"{breaks.dtype} not relevant for class constructor tests") | ||||
|  | ||||
|         if len(breaks) == 0: | ||||
|             return {"data": breaks} | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) if notna(left) else left | ||||
|             for left, right in zip(breaks[:-1], breaks[1:]) | ||||
|         ] | ||||
|  | ||||
|         if isinstance(breaks, list): | ||||
|             return {"data": ivs} | ||||
|         elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): | ||||
|             return {"data": breaks._constructor(ivs)} | ||||
|         return {"data": np.array(ivs, dtype=object)} | ||||
|  | ||||
|     def test_generic_errors(self, constructor): | ||||
|         """ | ||||
|         override the base class implementation since errors are handled | ||||
|         differently; checks unnecessary since caught at the Interval level | ||||
|         """ | ||||
|  | ||||
|     def test_constructor_string(self): | ||||
|         # GH23013 | ||||
|         # When forming the interval from breaks, | ||||
|         # the interval of strings is already forbidden. | ||||
|         pass | ||||
|  | ||||
|     def test_constructor_errors(self, klass): | ||||
|         # mismatched closed within intervals with no constructor override | ||||
|         ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")] | ||||
|         msg = "intervals must all be closed on the same side" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             klass(ivs) | ||||
|  | ||||
|         # scalar | ||||
|         msg = ( | ||||
|             r"(IntervalIndex|Index)\(...\) must be called with a collection of " | ||||
|             "some kind, 5 was passed" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             klass(5) | ||||
|  | ||||
|         # not an interval; dtype depends on 32bit/windows builds | ||||
|         msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             klass([0, 1]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, closed", | ||||
|         [ | ||||
|             ([], "both"), | ||||
|             ([np.nan, np.nan], "neither"), | ||||
|             ( | ||||
|                 [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")], | ||||
|                 "left", | ||||
|             ), | ||||
|             ( | ||||
|                 [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")], | ||||
|                 "neither", | ||||
|             ), | ||||
|             (IntervalIndex.from_breaks(range(5), closed="both"), "right"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_override_inferred_closed(self, constructor, data, closed): | ||||
|         # GH 19370 | ||||
|         if isinstance(data, IntervalIndex): | ||||
|             tuples = data.to_tuples() | ||||
|         else: | ||||
|             tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] | ||||
|         expected = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = constructor(data, closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values_constructor", [list, np.array, IntervalIndex, IntervalArray] | ||||
|     ) | ||||
|     def test_index_object_dtype(self, values_constructor): | ||||
|         # Index(intervals, dtype=object) is an Index (not an IntervalIndex) | ||||
|         intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] | ||||
|         values = values_constructor(intervals) | ||||
|         result = Index(values, dtype=object) | ||||
|  | ||||
|         assert type(result) is Index | ||||
|         tm.assert_numpy_array_equal(result.values, np.array(values)) | ||||
|  | ||||
|     def test_index_mixed_closed(self): | ||||
|         # GH27172 | ||||
|         intervals = [ | ||||
|             Interval(0, 1, closed="left"), | ||||
|             Interval(1, 2, closed="right"), | ||||
|             Interval(2, 3, closed="neither"), | ||||
|             Interval(3, 4, closed="both"), | ||||
|         ] | ||||
|         result = Index(intervals) | ||||
|         expected = Index(intervals, dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"]) | ||||
| def test_interval_index_subtype(timezone, inclusive_endpoints_fixture): | ||||
|     # GH#46999 | ||||
|     dates = date_range("2022", periods=3, tz=timezone) | ||||
|     dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]" | ||||
|     result = IntervalIndex.from_arrays( | ||||
|         ["2022-01-01", "2022-01-02"], | ||||
|         ["2022-01-02", "2022-01-03"], | ||||
|         closed=inclusive_endpoints_fixture, | ||||
|         dtype=dtype, | ||||
|     ) | ||||
|     expected = IntervalIndex.from_arrays( | ||||
|         dates[:-1], dates[1:], closed=inclusive_endpoints_fixture | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_dtype_closed_mismatch(): | ||||
|     # GH#38394 closed specified in both dtype and IntervalIndex constructor | ||||
|  | ||||
|     dtype = IntervalDtype(np.int64, "left") | ||||
|  | ||||
|     msg = "closed keyword does not match dtype.closed" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         IntervalIndex([], dtype=dtype, closed="neither") | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         IntervalArray([], dtype=dtype, closed="neither") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     ["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))], | ||||
| ) | ||||
| def test_ea_dtype(dtype): | ||||
|     # GH#56765 | ||||
|     bins = [(0.0, 0.4), (0.4, 0.6)] | ||||
|     interval_dtype = IntervalDtype(subtype=dtype, closed="left") | ||||
|     result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype) | ||||
|     assert result.dtype == interval_dtype | ||||
|     expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,36 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     IntervalIndex, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestEquals: | ||||
|     def test_equals(self, closed): | ||||
|         expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) | ||||
|         assert expected.equals(expected) | ||||
|         assert expected.equals(expected.copy()) | ||||
|  | ||||
|         assert not expected.equals(expected.astype(object)) | ||||
|         assert not expected.equals(np.array(expected)) | ||||
|         assert not expected.equals(list(expected)) | ||||
|  | ||||
|         assert not expected.equals([1, 2]) | ||||
|         assert not expected.equals(np.array([1, 2])) | ||||
|         assert not expected.equals(date_range("20130101", periods=2)) | ||||
|  | ||||
|         expected_name1 = IntervalIndex.from_breaks( | ||||
|             np.arange(5), closed=closed, name="foo" | ||||
|         ) | ||||
|         expected_name2 = IntervalIndex.from_breaks( | ||||
|             np.arange(5), closed=closed, name="bar" | ||||
|         ) | ||||
|         assert expected.equals(expected_name1) | ||||
|         assert expected_name1.equals(expected_name2) | ||||
|  | ||||
|         for other_closed in {"left", "right", "both", "neither"} - {closed}: | ||||
|             expected_other_closed = IntervalIndex.from_breaks( | ||||
|                 np.arange(5), closed=other_closed | ||||
|             ) | ||||
|             assert not expected.equals(expected_other_closed) | ||||
| @ -0,0 +1,119 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIntervalIndexRendering: | ||||
|     # TODO: this is a test for DataFrame/Series, not IntervalIndex | ||||
|     @pytest.mark.parametrize( | ||||
|         "constructor,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 Series, | ||||
|                 ( | ||||
|                     "(0.0, 1.0]    a\n" | ||||
|                     "NaN           b\n" | ||||
|                     "(2.0, 3.0]    c\n" | ||||
|                     "dtype: object" | ||||
|                 ), | ||||
|             ), | ||||
|             (DataFrame, ("            0\n(0.0, 1.0]  a\nNaN         b\n(2.0, 3.0]  c")), | ||||
|         ], | ||||
|     ) | ||||
|     def test_repr_missing(self, constructor, expected, using_infer_string, request): | ||||
|         # GH 25984 | ||||
|         if using_infer_string and constructor is Series: | ||||
|             request.applymarker(pytest.mark.xfail(reason="repr different")) | ||||
|         index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) | ||||
|         obj = constructor(list("abc"), index=index) | ||||
|         result = repr(obj) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_repr_floats(self): | ||||
|         # GH 32553 | ||||
|  | ||||
|         markers = Series( | ||||
|             [1, 2], | ||||
|             index=IntervalIndex( | ||||
|                 [ | ||||
|                     Interval(left, right) | ||||
|                     for left, right in zip( | ||||
|                         Index([329.973, 345.137], dtype="float64"), | ||||
|                         Index([345.137, 360.191], dtype="float64"), | ||||
|                     ) | ||||
|                 ] | ||||
|             ), | ||||
|         ) | ||||
|         result = str(markers) | ||||
|         expected = "(329.973, 345.137]    1\n(345.137, 360.191]    2\ndtype: int64" | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples, closed, expected_data", | ||||
|         [ | ||||
|             ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]), | ||||
|             ( | ||||
|                 [(0.5, 1.0), np.nan, (2.0, 3.0)], | ||||
|                 "right", | ||||
|                 ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"], | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     (Timestamp("20180101"), Timestamp("20180102")), | ||||
|                     np.nan, | ||||
|                     ((Timestamp("20180102"), Timestamp("20180103"))), | ||||
|                 ], | ||||
|                 "both", | ||||
|                 [ | ||||
|                     "[2018-01-01 00:00:00, 2018-01-02 00:00:00]", | ||||
|                     "NaN", | ||||
|                     "[2018-01-02 00:00:00, 2018-01-03 00:00:00]", | ||||
|                 ], | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     (Timedelta("0 days"), Timedelta("1 days")), | ||||
|                     (Timedelta("1 days"), Timedelta("2 days")), | ||||
|                     np.nan, | ||||
|                 ], | ||||
|                 "neither", | ||||
|                 [ | ||||
|                     "(0 days 00:00:00, 1 days 00:00:00)", | ||||
|                     "(1 days 00:00:00, 2 days 00:00:00)", | ||||
|                     "NaN", | ||||
|                 ], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_values_for_csv(self, tuples, closed, expected_data): | ||||
|         # GH 28210 | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index._get_values_for_csv(na_rep="NaN") | ||||
|         expected = np.array(expected_data) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_timestamp_with_timezone(self, unit): | ||||
|         # GH 55035 | ||||
|         left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]") | ||||
|         right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]") | ||||
|         index = IntervalIndex.from_arrays(left, right) | ||||
|         result = repr(index) | ||||
|         expected = ( | ||||
|             "IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], " | ||||
|             f"dtype='interval[datetime64[{unit}, UTC], right]')" | ||||
|         ) | ||||
|         assert result == expected | ||||
| @ -0,0 +1,674 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     NaT, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     array, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     isna, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetItem: | ||||
|     def test_getitem(self, closed): | ||||
|         idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) | ||||
|         assert idx[0] == Interval(0.0, 1.0, closed=closed) | ||||
|         assert idx[1] == Interval(1.0, 2.0, closed=closed) | ||||
|         assert isna(idx[2]) | ||||
|  | ||||
|         result = idx[0:1] | ||||
|         expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx[0:2] | ||||
|         expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx[1:3] | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             (1.0, np.nan), (2.0, np.nan), closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_2d_deprecated(self): | ||||
|         # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable | ||||
|         idx = IntervalIndex.from_breaks(range(11), closed="right") | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             idx[:, None] | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             # GH#44051 | ||||
|             idx[True] | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             # GH#44051 | ||||
|             idx[False] | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where(self, listlike_box): | ||||
|         klass = listlike_box | ||||
|  | ||||
|         idx = IntervalIndex.from_breaks(range(11), closed="right") | ||||
|         cond = [True] * len(idx) | ||||
|         expected = idx | ||||
|         result = expected.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         cond = [False] + [True] * len(idx[1:]) | ||||
|         expected = IntervalIndex([np.nan] + idx[1:].tolist()) | ||||
|         result = idx.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take(self, closed): | ||||
|         index = IntervalIndex.from_breaks(range(11), closed=closed) | ||||
|  | ||||
|         result = index.take(range(10)) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.take([0, 0, 1]) | ||||
|         expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     @pytest.mark.parametrize("side", ["right", "left", "both", "neither"]) | ||||
|     def test_get_loc_interval(self, closed, side): | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|  | ||||
|         for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: | ||||
|             # if get_loc is supplied an interval, it should only search | ||||
|             # for exact matches, not overlaps or covers, else KeyError. | ||||
|             msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')") | ||||
|             if closed == side: | ||||
|                 if bound == [0, 1]: | ||||
|                     assert idx.get_loc(Interval(0, 1, closed=side)) == 0 | ||||
|                 elif bound == [2, 3]: | ||||
|                     assert idx.get_loc(Interval(2, 3, closed=side)) == 1 | ||||
|                 else: | ||||
|                     with pytest.raises(KeyError, match=msg): | ||||
|                         idx.get_loc(Interval(*bound, closed=side)) | ||||
|             else: | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     idx.get_loc(Interval(*bound, closed=side)) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) | ||||
|     def test_get_loc_scalar(self, closed, scalar): | ||||
|         # correct = {side: {query: answer}}. | ||||
|         # If query is not in the dict, that query should raise a KeyError | ||||
|         correct = { | ||||
|             "right": {0.5: 0, 1: 0, 2.5: 1, 3: 1}, | ||||
|             "left": {0: 0, 0.5: 0, 2: 1, 2.5: 1}, | ||||
|             "both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1}, | ||||
|             "neither": {0.5: 0, 2.5: 1}, | ||||
|         } | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|  | ||||
|         # if get_loc is supplied a scalar, it should return the index of | ||||
|         # the interval which contains the scalar, or KeyError. | ||||
|         if scalar in correct[closed].keys(): | ||||
|             assert idx.get_loc(scalar) == correct[closed][scalar] | ||||
|         else: | ||||
|             with pytest.raises(KeyError, match=str(scalar)): | ||||
|                 idx.get_loc(scalar) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6]) | ||||
|     def test_get_loc_length_one_scalar(self, scalar, closed): | ||||
|         # GH 20921 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         if scalar in index[0]: | ||||
|             result = index.get_loc(scalar) | ||||
|             assert result == 0 | ||||
|         else: | ||||
|             with pytest.raises(KeyError, match=str(scalar)): | ||||
|                 index.get_loc(scalar) | ||||
|  | ||||
|     @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) | ||||
|     @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)]) | ||||
|     def test_get_loc_length_one_interval(self, left, right, closed, other_closed): | ||||
|         # GH 20921 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         interval = Interval(left, right, closed=other_closed) | ||||
|         if interval == index[0]: | ||||
|             result = index.get_loc(interval) | ||||
|             assert result == 0 | ||||
|         else: | ||||
|             with pytest.raises( | ||||
|                 KeyError, | ||||
|                 match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"), | ||||
|             ): | ||||
|                 index.get_loc(interval) | ||||
|  | ||||
|     # Make consistent with test_interval_new.py (see #16316, #16386) | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             date_range("20180101", periods=4), | ||||
|             date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|             timedelta_range("0 days", periods=4), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_get_loc_datetimelike_nonoverlapping(self, breaks): | ||||
|         # GH 20636 | ||||
|         # nonoverlapping = IntervalIndex method and no i8 conversion | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         value = index[0].mid | ||||
|         result = index.get_loc(value) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|         interval = Interval(index[0].left, index[0].right) | ||||
|         result = index.get_loc(interval) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arrays", | ||||
|         [ | ||||
|             (date_range("20180101", periods=4), date_range("20180103", periods=4)), | ||||
|             ( | ||||
|                 date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|                 date_range("20180103", periods=4, tz="US/Eastern"), | ||||
|             ), | ||||
|             ( | ||||
|                 timedelta_range("0 days", periods=4), | ||||
|                 timedelta_range("2 days", periods=4), | ||||
|             ), | ||||
|         ], | ||||
|         ids=lambda x: str(x[0].dtype), | ||||
|     ) | ||||
|     def test_get_loc_datetimelike_overlapping(self, arrays): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_arrays(*arrays) | ||||
|  | ||||
|         value = index[0].mid + Timedelta("12 hours") | ||||
|         result = index.get_loc(value) | ||||
|         expected = slice(0, 2, None) | ||||
|         assert result == expected | ||||
|  | ||||
|         interval = Interval(index[0].left, index[0].right) | ||||
|         result = index.get_loc(interval) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values", | ||||
|         [ | ||||
|             date_range("2018-01-04", periods=4, freq="-1D"), | ||||
|             date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"), | ||||
|             timedelta_range("3 days", periods=4, freq="-1D"), | ||||
|             np.arange(3.0, -1.0, -1.0), | ||||
|             np.arange(3, -1, -1), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_get_loc_decreasing(self, values): | ||||
|         # GH 25860 | ||||
|         index = IntervalIndex.from_arrays(values[1:], values[:-1]) | ||||
|         result = index.get_loc(index[0]) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("key", [[5], (2, 3)]) | ||||
|     def test_get_loc_non_scalar_errors(self, key): | ||||
|         # GH 31117 | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) | ||||
|  | ||||
|         msg = str(key) | ||||
|         with pytest.raises(InvalidIndexError, match=msg): | ||||
|             idx.get_loc(key) | ||||
|  | ||||
|     def test_get_indexer_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, Interval(1, 2), np.nan]) | ||||
|  | ||||
|         expected = np.array([True, False, True]) | ||||
|         for key in [None, np.nan, NA]: | ||||
|             assert key in index | ||||
|             result = index.get_loc(key) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]: | ||||
|             with pytest.raises(KeyError, match=str(key)): | ||||
|                 index.get_loc(key) | ||||
|  | ||||
|  | ||||
| class TestGetIndexer: | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([Interval(2, 4, closed="right")], [1]), | ||||
|             ([Interval(2, 4, closed="left")], [-1]), | ||||
|             ([Interval(2, 4, closed="both")], [-1]), | ||||
|             ([Interval(2, 4, closed="neither")], [-1]), | ||||
|             ([Interval(1, 4, closed="right")], [-1]), | ||||
|             ([Interval(0, 4, closed="right")], [-1]), | ||||
|             ([Interval(0.5, 1.5, closed="right")], [-1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]), | ||||
|             ([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_with_interval(self, query, expected): | ||||
|         tuples = [(0, 2), (2, 4), (5, 7)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="right") | ||||
|  | ||||
|         result = index.get_indexer(query) | ||||
|         expected = np.array(expected, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([-0.5], [-1]), | ||||
|             ([0], [-1]), | ||||
|             ([0.5], [0]), | ||||
|             ([1], [0]), | ||||
|             ([1.5], [1]), | ||||
|             ([2], [1]), | ||||
|             ([2.5], [-1]), | ||||
|             ([3], [-1]), | ||||
|             ([3.5], [2]), | ||||
|             ([4], [2]), | ||||
|             ([4.5], [-1]), | ||||
|             ([1, 2], [0, 1]), | ||||
|             ([1, 2, 3], [0, 1, -1]), | ||||
|             ([1, 2, 3, 4], [0, 1, -1, 2]), | ||||
|             ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_with_int_and_float(self, query, expected): | ||||
|         tuples = [(0, 1), (1, 2), (3, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="right") | ||||
|  | ||||
|         result = index.get_indexer(query) | ||||
|         expected = np.array(expected, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)]) | ||||
|     def test_get_indexer_length_one(self, item, closed): | ||||
|         # GH 17284 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         result = index.get_indexer(item) | ||||
|         expected = np.array([0] * len(item), dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("size", [1, 5]) | ||||
|     def test_get_indexer_length_one_interval(self, size, closed): | ||||
|         # GH 17284 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         result = index.get_indexer([Interval(0, 5, closed)] * size) | ||||
|         expected = np.array([0] * size, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "target", | ||||
|         [ | ||||
|             IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]), | ||||
|             IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]), | ||||
|             IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"), | ||||
|             [-1, 0, 0.5, 1, 2, 2.5, np.nan], | ||||
|             ["foo", "foo", "bar", "baz"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_categorical(self, target, ordered): | ||||
|         # GH 30063: categorical and non-categorical results should be consistent | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) | ||||
|         categorical_target = CategoricalIndex(target, ordered=ordered) | ||||
|  | ||||
|         result = index.get_indexer(categorical_target) | ||||
|         expected = index.get_indexer(target) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_get_indexer_categorical_with_nans(self): | ||||
|         # GH#41934 nans in both index and in target | ||||
|         ii = IntervalIndex.from_breaks(range(5)) | ||||
|         ii2 = ii.append(IntervalIndex([np.nan])) | ||||
|         ci2 = CategoricalIndex(ii2) | ||||
|  | ||||
|         result = ii2.get_indexer(ci2) | ||||
|         expected = np.arange(5, dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # not-all-matches | ||||
|         result = ii2[1:].get_indexer(ci2[::-1]) | ||||
|         expected = np.array([3, 2, 1, 0, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # non-unique target, non-unique nans | ||||
|         result = ii2.get_indexer(ci2.append(ci2)) | ||||
|         expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_datetime(self): | ||||
|         ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4)) | ||||
|         # TODO: with mismatched resolution get_indexer currently raises; | ||||
|         #  this should probably coerce? | ||||
|         target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]") | ||||
|         result = ii.get_indexer(target) | ||||
|         expected = np.array([0], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = ii.get_indexer(target.astype(str)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/47772 | ||||
|         result = ii.get_indexer(target.asi8) | ||||
|         expected = np.array([-1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples, closed", | ||||
|         [ | ||||
|             ([(0, 2), (1, 3), (3, 4)], "neither"), | ||||
|             ([(0, 5), (1, 4), (6, 7)], "left"), | ||||
|             ([(0, 1), (0, 1), (1, 2)], "right"), | ||||
|             ([(0, 1), (2, 3), (3, 4)], "both"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_errors(self, tuples, closed): | ||||
|         # IntervalIndex needs non-overlapping for uniqueness when querying | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|  | ||||
|         msg = ( | ||||
|             "cannot handle overlapping indices; use " | ||||
|             "IntervalIndex.get_indexer_non_unique" | ||||
|         ) | ||||
|         with pytest.raises(InvalidIndexError, match=msg): | ||||
|             index.get_indexer([0, 2]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([-0.5], ([-1], [0])), | ||||
|             ([0], ([0], [])), | ||||
|             ([0.5], ([0], [])), | ||||
|             ([1], ([0, 1], [])), | ||||
|             ([1.5], ([0, 1], [])), | ||||
|             ([2], ([0, 1, 2], [])), | ||||
|             ([2.5], ([1, 2], [])), | ||||
|             ([3], ([2], [])), | ||||
|             ([3.5], ([2], [])), | ||||
|             ([4], ([-1], [0])), | ||||
|             ([4.5], ([-1], [0])), | ||||
|             ([1, 2], ([0, 1, 0, 1, 2], [])), | ||||
|             ([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])), | ||||
|             ([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])), | ||||
|             ([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_non_unique_with_int_and_float(self, query, expected): | ||||
|         tuples = [(0, 2.5), (1, 3), (2, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="left") | ||||
|  | ||||
|         result_indexer, result_missing = index.get_indexer_non_unique(query) | ||||
|         expected_indexer = np.array(expected[0], dtype="intp") | ||||
|         expected_missing = np.array(expected[1], dtype="intp") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result_indexer, expected_indexer) | ||||
|         tm.assert_numpy_array_equal(result_missing, expected_missing) | ||||
|  | ||||
|         # TODO we may also want to test get_indexer for the case when | ||||
|         # the intervals are duplicated, decreasing, non-monotonic, etc.. | ||||
|  | ||||
|     def test_get_indexer_non_monotonic(self): | ||||
|         # GH 16410 | ||||
|         idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) | ||||
|         idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) | ||||
|         result = idx1.get_indexer(idx2) | ||||
|         expected = np.array([2, 0, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = idx1.get_indexer(idx1[1:]) | ||||
|         expected = np.array([1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|         other = IntervalIndex([np.nan]) | ||||
|  | ||||
|         assert not index._index_as_unique | ||||
|  | ||||
|         result = index.get_indexer_for(other) | ||||
|         expected = np.array([0, 1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_index_non_unique_non_monotonic(self): | ||||
|         # GH#44084 (root cause) | ||||
|         index = IntervalIndex.from_tuples( | ||||
|             [(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)] | ||||
|         ) | ||||
|  | ||||
|         result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)]) | ||||
|         expected = np.array([1, 3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_multiindex_with_intervals(self): | ||||
|         # GH#44084 (MultiIndex case as reported) | ||||
|         interval_index = IntervalIndex.from_tuples( | ||||
|             [(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval" | ||||
|         ) | ||||
|         foo_index = Index([1, 2, 3], name="foo") | ||||
|  | ||||
|         multi_index = MultiIndex.from_product([foo_index, interval_index]) | ||||
|  | ||||
|         result = multi_index.get_level_values("interval").get_indexer_for( | ||||
|             [Interval(0.0, 1.0)] | ||||
|         ) | ||||
|         expected = np.array([1, 4, 7], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [IntervalIndex, array, list]) | ||||
|     def test_get_indexer_interval_index(self, box): | ||||
|         # GH#30178 | ||||
|         rng = period_range("2022-07-01", freq="D", periods=3) | ||||
|         idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3)) | ||||
|  | ||||
|         actual = rng.get_indexer(idx) | ||||
|         expected = np.array([-1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     def test_get_indexer_read_only(self): | ||||
|         idx = interval_range(start=0, end=5) | ||||
|         arr = np.array([1, 2]) | ||||
|         arr.flags.writeable = False | ||||
|         result = idx.get_indexer(arr) | ||||
|         expected = np.array([0, 1]) | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|         result = idx.get_indexer_non_unique(arr)[0] | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|  | ||||
| class TestSliceLocs: | ||||
|     def test_slice_locs_with_interval(self): | ||||
|         # increasing monotonically | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 1) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) | ||||
|  | ||||
|         # decreasing monotonically | ||||
|         index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (2, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 1) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) | ||||
|  | ||||
|         # sorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 2) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) | ||||
|  | ||||
|         # unsorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get left slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get left slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(0, 2)) | ||||
|  | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 2) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get right slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(end=Interval(0, 2)) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get right slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) | ||||
|  | ||||
|         # another unsorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 4) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 2) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) | ||||
|  | ||||
|     def test_slice_locs_with_ints_and_floats_succeeds(self): | ||||
|         # increasing non-overlapping | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(0, 1) == (0, 1) | ||||
|         assert index.slice_locs(0, 2) == (0, 2) | ||||
|         assert index.slice_locs(0, 3) == (0, 2) | ||||
|         assert index.slice_locs(3, 1) == (2, 1) | ||||
|         assert index.slice_locs(3, 4) == (2, 3) | ||||
|         assert index.slice_locs(0, 4) == (0, 3) | ||||
|  | ||||
|         # decreasing non-overlapping | ||||
|         index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) | ||||
|         assert index.slice_locs(0, 1) == (3, 3) | ||||
|         assert index.slice_locs(0, 2) == (3, 2) | ||||
|         assert index.slice_locs(0, 3) == (3, 1) | ||||
|         assert index.slice_locs(3, 1) == (1, 3) | ||||
|         assert index.slice_locs(3, 4) == (1, 1) | ||||
|         assert index.slice_locs(0, 4) == (3, 1) | ||||
|  | ||||
|     @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             [(0, 2), (1, 3), (2, 4)], | ||||
|             [(2, 4), (1, 3), (0, 2)], | ||||
|             [(0, 2), (0, 2), (2, 4)], | ||||
|             [(0, 2), (2, 4), (0, 2)], | ||||
|             [(0, 2), (0, 2), (2, 4), (1, 3)], | ||||
|         ], | ||||
|     ) | ||||
|     def test_slice_locs_with_ints_and_floats_errors(self, tuples, query): | ||||
|         start, stop = query | ||||
|         index = IntervalIndex.from_tuples(tuples) | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=( | ||||
|                 "'can only get slices from an IntervalIndex if bounds are " | ||||
|                 "non-overlapping and all monotonic increasing or decreasing'" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start, stop) | ||||
|  | ||||
|  | ||||
| class TestPutmask: | ||||
|     @pytest.mark.parametrize("tz", ["US/Pacific", None]) | ||||
|     def test_putmask_dt64(self, tz): | ||||
|         # GH#37968 | ||||
|         dti = date_range("2016-01-01", periods=9, tz=tz) | ||||
|         idx = IntervalIndex.from_breaks(dti) | ||||
|         mask = np.zeros(idx.shape, dtype=bool) | ||||
|         mask[0:3] = True | ||||
|  | ||||
|         result = idx.putmask(mask, idx[-1]) | ||||
|         expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:])) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_putmask_td64(self): | ||||
|         # GH#37968 | ||||
|         dti = date_range("2016-01-01", periods=9) | ||||
|         tdi = dti - dti[0] | ||||
|         idx = IntervalIndex.from_breaks(tdi) | ||||
|         mask = np.zeros(idx.shape, dtype=bool) | ||||
|         mask[0:3] = True | ||||
|  | ||||
|         result = idx.putmask(mask, idx[-1]) | ||||
|         expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:])) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     # .__contains__, not .contains | ||||
|  | ||||
|     def test_contains_dunder(self): | ||||
|         index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") | ||||
|  | ||||
|         # __contains__ requires perfect matches to intervals. | ||||
|         assert 0 not in index | ||||
|         assert 1 not in index | ||||
|         assert 2 not in index | ||||
|  | ||||
|         assert Interval(0, 1, closed="right") in index | ||||
|         assert Interval(0, 2, closed="right") not in index | ||||
|         assert Interval(0, 0.5, closed="right") not in index | ||||
|         assert Interval(3, 5, closed="right") not in index | ||||
|         assert Interval(-1, 0, closed="left") not in index | ||||
|         assert Interval(0, 1, closed="left") not in index | ||||
|         assert Interval(0, 1, closed="both") not in index | ||||
| @ -0,0 +1,918 @@ | ||||
| from itertools import permutations | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     isna, | ||||
|     notna, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     index = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|  | ||||
|     def create_index(self, closed="right"): | ||||
|         return IntervalIndex.from_breaks(range(11), closed=closed) | ||||
|  | ||||
|     def create_index_with_nan(self, closed="right"): | ||||
|         mask = [True, False] + [True] * 8 | ||||
|         return IntervalIndex.from_arrays( | ||||
|             np.where(mask, np.arange(10), np.nan), | ||||
|             np.where(mask, np.arange(1, 11), np.nan), | ||||
|             closed=closed, | ||||
|         ) | ||||
|  | ||||
|     def test_properties(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|         assert len(index) == 10 | ||||
|         assert index.size == 10 | ||||
|         assert index.shape == (10,) | ||||
|  | ||||
|         tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64))) | ||||
|         tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64))) | ||||
|         tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64))) | ||||
|  | ||||
|         assert index.closed == closed | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) | ||||
|             for left, right in zip(range(10), range(1, 11)) | ||||
|         ] | ||||
|         expected = np.array(ivs, dtype=object) | ||||
|         tm.assert_numpy_array_equal(np.asarray(index), expected) | ||||
|  | ||||
|         # with nans | ||||
|         index = self.create_index_with_nan(closed=closed) | ||||
|         assert len(index) == 10 | ||||
|         assert index.size == 10 | ||||
|         assert index.shape == (10,) | ||||
|  | ||||
|         expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) | ||||
|         expected_right = expected_left + 1 | ||||
|         expected_mid = expected_left + 0.5 | ||||
|         tm.assert_index_equal(index.left, expected_left) | ||||
|         tm.assert_index_equal(index.right, expected_right) | ||||
|         tm.assert_index_equal(index.mid, expected_mid) | ||||
|  | ||||
|         assert index.closed == closed | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) if notna(left) else np.nan | ||||
|             for left, right in zip(expected_left, expected_right) | ||||
|         ] | ||||
|         expected = np.array(ivs, dtype=object) | ||||
|         tm.assert_numpy_array_equal(np.asarray(index), expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], | ||||
|             [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], | ||||
|             date_range("2017-01-01", "2017-01-04"), | ||||
|             pytest.param( | ||||
|                 date_range("2017-01-01", "2017-01-04", unit="s"), | ||||
|                 marks=pytest.mark.xfail(reason="mismatched result unit"), | ||||
|             ), | ||||
|             pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_length(self, closed, breaks): | ||||
|         # GH 18789 | ||||
|         index = IntervalIndex.from_breaks(breaks, closed=closed) | ||||
|         result = index.length | ||||
|         expected = Index(iv.length for iv in index) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # with NA | ||||
|         index = index.insert(1, np.nan) | ||||
|         result = index.length | ||||
|         expected = Index(iv.length if notna(iv) else iv for iv in index) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_with_nans(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|         assert index.hasnans is False | ||||
|  | ||||
|         result = index.isna() | ||||
|         expected = np.zeros(len(index), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.notna() | ||||
|         expected = np.ones(len(index), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         index = self.create_index_with_nan(closed=closed) | ||||
|         assert index.hasnans is True | ||||
|  | ||||
|         result = index.isna() | ||||
|         expected = np.array([False, True] + [False] * (len(index) - 2)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.notna() | ||||
|         expected = np.array([True, False] + [True] * (len(index) - 2)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_copy(self, closed): | ||||
|         expected = self.create_index(closed=closed) | ||||
|  | ||||
|         result = expected.copy() | ||||
|         assert result.equals(expected) | ||||
|  | ||||
|         result = expected.copy(deep=True) | ||||
|         assert result.equals(expected) | ||||
|         assert result.left is not expected.left | ||||
|  | ||||
|     def test_ensure_copied_data(self, closed): | ||||
|         # exercise the copy flag in the constructor | ||||
|  | ||||
|         # not copying | ||||
|         index = self.create_index(closed=closed) | ||||
|         result = IntervalIndex(index, copy=False) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.left.values, result.left.values, check_same="same" | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.right.values, result.right.values, check_same="same" | ||||
|         ) | ||||
|  | ||||
|         # by-definition make a copy | ||||
|         result = IntervalIndex(np.array(index), copy=False) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.left.values, result.left.values, check_same="copy" | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.right.values, result.right.values, check_same="copy" | ||||
|         ) | ||||
|  | ||||
|     def test_delete(self, closed): | ||||
|         breaks = np.arange(1, 11, dtype=np.int64) | ||||
|         expected = IntervalIndex.from_breaks(breaks, closed=closed) | ||||
|         result = self.create_index(closed=closed).delete(0) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data", | ||||
|         [ | ||||
|             interval_range(0, periods=10, closed="neither"), | ||||
|             interval_range(1.7, periods=8, freq=2.5, closed="both"), | ||||
|             interval_range(Timestamp("20170101"), periods=12, closed="left"), | ||||
|             interval_range(Timedelta("1 day"), periods=6, closed="right"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert(self, data): | ||||
|         item = data[0] | ||||
|         idx_item = IntervalIndex([item]) | ||||
|  | ||||
|         # start | ||||
|         expected = idx_item.append(data) | ||||
|         result = data.insert(0, item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # end | ||||
|         expected = data.append(idx_item) | ||||
|         result = data.insert(len(data), item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # mid | ||||
|         expected = data[:3].append(idx_item).append(data[3:]) | ||||
|         result = data.insert(3, item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # invalid type | ||||
|         res = data.insert(1, "foo") | ||||
|         expected = data.astype(object).insert(1, "foo") | ||||
|         tm.assert_index_equal(res, expected) | ||||
|  | ||||
|         msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             data._data.insert(1, "foo") | ||||
|  | ||||
|         # invalid closed | ||||
|         msg = "'value.closed' is 'left', expected 'right'." | ||||
|         for closed in {"left", "right", "both", "neither"} - {item.closed}: | ||||
|             msg = f"'value.closed' is '{closed}', expected '{item.closed}'." | ||||
|             bad_item = Interval(item.left, item.right, closed=closed) | ||||
|             res = data.insert(1, bad_item) | ||||
|             expected = data.astype(object).insert(1, bad_item) | ||||
|             tm.assert_index_equal(res, expected) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 data._data.insert(1, bad_item) | ||||
|  | ||||
|         # GH 18295 (test missing) | ||||
|         na_idx = IntervalIndex([np.nan], closed=data.closed) | ||||
|         for na in [np.nan, None, pd.NA]: | ||||
|             expected = data[:1].append(na_idx).append(data[1:]) | ||||
|             result = data.insert(1, na) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         if data.left.dtype.kind not in ["m", "M"]: | ||||
|             # trying to insert pd.NaT into a numeric-dtyped Index should cast | ||||
|             expected = data.astype(object).insert(1, pd.NaT) | ||||
|  | ||||
|             msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 data._data.insert(1, pd.NaT) | ||||
|  | ||||
|         result = data.insert(1, pd.NaT) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_is_unique_interval(self, closed): | ||||
|         """ | ||||
|         Interval specific tests for is_unique in addition to base class tests | ||||
|         """ | ||||
|         # unique overlapping - distinct endpoints | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique overlapping - shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique nested | ||||
|         idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique NaN | ||||
|         idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # non-unique NaN | ||||
|         idx = IntervalIndex.from_tuples( | ||||
|             [(np.nan, np.nan), (np.nan, np.nan)], closed=closed | ||||
|         ) | ||||
|         assert idx.is_unique is False | ||||
|  | ||||
|     def test_monotonic(self, closed): | ||||
|         # increasing non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # unordered non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # increasing overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing overlapping | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # unordered overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # increasing overlapping shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing overlapping shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # stationary | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # empty | ||||
|         idx = IntervalIndex([], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|     def test_is_monotonic_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|  | ||||
|         assert not index.is_monotonic_increasing | ||||
|         assert not index._is_strictly_monotonic_increasing | ||||
|         assert not index.is_monotonic_increasing | ||||
|         assert not index._is_strictly_monotonic_decreasing | ||||
|         assert not index.is_monotonic_decreasing | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             date_range("20180101", periods=4), | ||||
|             date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|             timedelta_range("0 days", periods=4), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_maybe_convert_i8(self, breaks): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         # intervalindex | ||||
|         result = index._maybe_convert_i8(index) | ||||
|         expected = IntervalIndex.from_breaks(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # interval | ||||
|         interval = Interval(breaks[0], breaks[1]) | ||||
|         result = index._maybe_convert_i8(interval) | ||||
|         expected = Interval(breaks[0]._value, breaks[1]._value) | ||||
|         assert result == expected | ||||
|  | ||||
|         # datetimelike index | ||||
|         result = index._maybe_convert_i8(breaks) | ||||
|         expected = Index(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # datetimelike scalar | ||||
|         result = index._maybe_convert_i8(breaks[0]) | ||||
|         expected = breaks[0]._value | ||||
|         assert result == expected | ||||
|  | ||||
|         # list-like of datetimelike scalars | ||||
|         result = index._maybe_convert_i8(list(breaks)) | ||||
|         expected = Index(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_nat(self, breaks): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns") | ||||
|         expected = Index([np.nan] * 3, dtype=np.float64) | ||||
|         result = index._maybe_convert_i8(to_convert) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         to_convert = to_convert.insert(0, breaks[0]) | ||||
|         expected = expected.insert(0, float(breaks[0]._value)) | ||||
|         result = index._maybe_convert_i8(to_convert) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [lambda breaks: breaks, list], | ||||
|         ids=["lambda", "list"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype): | ||||
|         # GH 20636 | ||||
|         breaks = np.arange(5, dtype=any_real_numpy_dtype) | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|         key = make_key(breaks) | ||||
|  | ||||
|         result = index._maybe_convert_i8(key) | ||||
|         kind = breaks.dtype.kind | ||||
|         expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind] | ||||
|         expected = Index(key, dtype=expected_dtype) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [ | ||||
|             IntervalIndex.from_breaks, | ||||
|             lambda breaks: Interval(breaks[0], breaks[1]), | ||||
|             lambda breaks: breaks[0], | ||||
|         ], | ||||
|         ids=["IntervalIndex", "Interval", "scalar"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype): | ||||
|         # GH 20636 | ||||
|         breaks = np.arange(5, dtype=any_real_numpy_dtype) | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|         key = make_key(breaks) | ||||
|  | ||||
|         # test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex | ||||
|         result = index._maybe_convert_i8(key) | ||||
|         assert result is key | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks1, breaks2", | ||||
|         permutations( | ||||
|             [ | ||||
|                 date_range("20180101", periods=4), | ||||
|                 date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|                 timedelta_range("0 days", periods=4), | ||||
|             ], | ||||
|             2, | ||||
|         ), | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [ | ||||
|             IntervalIndex.from_breaks, | ||||
|             lambda breaks: Interval(breaks[0], breaks[1]), | ||||
|             lambda breaks: breaks, | ||||
|             lambda breaks: breaks[0], | ||||
|             list, | ||||
|         ], | ||||
|         ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks1) | ||||
|         key = make_key(breaks2) | ||||
|  | ||||
|         msg = ( | ||||
|             f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " | ||||
|             f"values of dtype {breaks2.dtype}" | ||||
|         ) | ||||
|         msg = re.escape(msg) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index._maybe_convert_i8(key) | ||||
|  | ||||
|     def test_contains_method(self): | ||||
|         # can select values that are IN the range of a value | ||||
|         i = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|  | ||||
|         expected = np.array([False, False], dtype="bool") | ||||
|         actual = i.contains(0) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = i.contains(3) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         expected = np.array([True, False], dtype="bool") | ||||
|         actual = i.contains(0.5) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = i.contains(1) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         # __contains__ not implemented for "interval in interval", follow | ||||
|         # that for the contains method for now | ||||
|         with pytest.raises( | ||||
|             NotImplementedError, match="contains not implemented for two" | ||||
|         ): | ||||
|             i.contains(Interval(0, 1)) | ||||
|  | ||||
|     def test_dropna(self, closed): | ||||
|         expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) | ||||
|  | ||||
|         ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) | ||||
|         result = ii.dropna() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) | ||||
|         result = ii.dropna() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_non_contiguous(self, closed): | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|         target = [0.5, 1.5, 2.5] | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([0, -1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         assert 1.5 not in index | ||||
|  | ||||
|     def test_isin(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|  | ||||
|         expected = np.array([True] + [False] * (len(index) - 1)) | ||||
|         result = index.isin(index[:1]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.isin([index[0]]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) | ||||
|         expected = np.array([True] * (len(index) - 1) + [False]) | ||||
|         result = index.isin(other) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.isin(other.tolist()) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         for other_closed in ["right", "left", "both", "neither"]: | ||||
|             other = self.create_index(closed=other_closed) | ||||
|             expected = np.repeat(closed == other_closed, len(index)) | ||||
|             result = index.isin(other) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|             result = index.isin(other.tolist()) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_comparison(self): | ||||
|         actual = Interval(0, 1) < self.index | ||||
|         expected = np.array([False, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = Interval(0.5, 1.5) < self.index | ||||
|         expected = np.array([False, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index > Interval(0.5, 1.5) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == self.index | ||||
|         expected = np.array([True, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index <= self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index >= self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index < self.index | ||||
|         expected = np.array([False, False]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index > self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index.values == self.index | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index <= self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index != self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index > self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index.values > self.index | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|  | ||||
|         # invalid comparisons | ||||
|         actual = self.index == 0 | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index == self.index.left | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|  | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "not supported between instances of 'int' and '.*.Interval'", | ||||
|                 r"Invalid comparison between dtype=interval\[int64, right\] and ", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index > 0 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index <= 0 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index > np.arange(2) | ||||
|  | ||||
|         msg = "Lengths must match to compare" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             self.index > np.arange(3) | ||||
|  | ||||
|     def test_missing_values(self, closed): | ||||
|         idx = Index( | ||||
|             [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] | ||||
|         ) | ||||
|         idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) | ||||
|         assert idx.equals(idx2) | ||||
|  | ||||
|         msg = ( | ||||
|             "missing values must be missing in the same location both left " | ||||
|             "and right sides" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntervalIndex.from_arrays( | ||||
|                 [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed | ||||
|             ) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) | ||||
|  | ||||
|     def test_sort_values(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|  | ||||
|         result = index.sort_values() | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(result, index[::-1]) | ||||
|  | ||||
|         # with nan | ||||
|         index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) | ||||
|  | ||||
|         result = index.sort_values() | ||||
|         expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index.sort_values(ascending=False, na_position="first") | ||||
|         expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     def test_datetime(self, tz): | ||||
|         start = Timestamp("2000-01-01", tz=tz) | ||||
|         dates = date_range(start=start, periods=10) | ||||
|         index = IntervalIndex.from_breaks(dates) | ||||
|  | ||||
|         # test mid | ||||
|         start = Timestamp("2000-01-01T12:00", tz=tz) | ||||
|         expected = date_range(start=start, periods=9) | ||||
|         tm.assert_index_equal(index.mid, expected) | ||||
|  | ||||
|         # __contains__ doesn't check individual points | ||||
|         assert Timestamp("2000-01-01", tz=tz) not in index | ||||
|         assert Timestamp("2000-01-01T12", tz=tz) not in index | ||||
|         assert Timestamp("2000-01-02", tz=tz) not in index | ||||
|         iv_true = Interval( | ||||
|             Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) | ||||
|         ) | ||||
|         iv_false = Interval( | ||||
|             Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) | ||||
|         ) | ||||
|         assert iv_true in index | ||||
|         assert iv_false not in index | ||||
|  | ||||
|         # .contains does check individual points | ||||
|         assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() | ||||
|         assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() | ||||
|         assert index.contains(Timestamp("2000-01-02", tz=tz)).any() | ||||
|  | ||||
|         # test get_indexer | ||||
|         start = Timestamp("1999-12-31T12:00", tz=tz) | ||||
|         target = date_range(start=start, periods=7, freq="12h") | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         start = Timestamp("2000-01-08T18:00", tz=tz) | ||||
|         target = date_range(start=start, periods=7, freq="6h") | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     def test_append(self, closed): | ||||
|         index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) | ||||
|         index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) | ||||
|  | ||||
|         result = index1.append(index2) | ||||
|         expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index1.append([index1, index2]) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         for other_closed in {"left", "right", "both", "neither"} - {closed}: | ||||
|             index_other_closed = IntervalIndex.from_arrays( | ||||
|                 [0, 1], [1, 2], closed=other_closed | ||||
|             ) | ||||
|             result = index1.append(index_other_closed) | ||||
|             expected = index1.astype(object).append(index_other_closed.astype(object)) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_is_non_overlapping_monotonic(self, closed): | ||||
|         # Should be True in all cases | ||||
|         tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|         # Should be False in all cases (overlapping) | ||||
|         tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         # Should be False in all cases (non-monotonic) | ||||
|         tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         # Should be False for closed='both', otherwise True (GH16560) | ||||
|         if closed == "both": | ||||
|             idx = IntervalIndex.from_breaks(range(4), closed=closed) | ||||
|             assert idx.is_non_overlapping_monotonic is False | ||||
|         else: | ||||
|             idx = IntervalIndex.from_breaks(range(4), closed=closed) | ||||
|             assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, shift, na_value", | ||||
|         [ | ||||
|             (0, 1, np.nan), | ||||
|             (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), | ||||
|             (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_is_overlapping(self, start, shift, na_value, closed): | ||||
|         # GH 23309 | ||||
|         # see test_interval_tree.py for extensive tests; interface tests here | ||||
|  | ||||
|         # non-overlapping | ||||
|         tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is False | ||||
|  | ||||
|         # non-overlapping with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is False | ||||
|  | ||||
|         # overlapping | ||||
|         tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is True | ||||
|  | ||||
|         # overlapping with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is True | ||||
|  | ||||
|         # common endpoints | ||||
|         tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index.is_overlapping | ||||
|         expected = closed == "both" | ||||
|         assert result is expected | ||||
|  | ||||
|         # common endpoints with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index.is_overlapping | ||||
|         assert result is expected | ||||
|  | ||||
|         # intervals with duplicate left values | ||||
|         a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] | ||||
|         b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] | ||||
|         index = IntervalIndex.from_arrays(a, b, closed="right") | ||||
|         result = index.is_overlapping | ||||
|         assert result is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             list(zip(range(10), range(1, 11))), | ||||
|             list( | ||||
|                 zip( | ||||
|                     date_range("20170101", periods=10), | ||||
|                     date_range("20170101", periods=10), | ||||
|                 ) | ||||
|             ), | ||||
|             list( | ||||
|                 zip( | ||||
|                     timedelta_range("0 days", periods=10), | ||||
|                     timedelta_range("1 day", periods=10), | ||||
|                 ) | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_tuples(self, tuples): | ||||
|         # GH 18756 | ||||
|         idx = IntervalIndex.from_tuples(tuples) | ||||
|         result = idx.to_tuples() | ||||
|         expected = Index(com.asarray_tuplesafe(tuples)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             list(zip(range(10), range(1, 11))) + [np.nan], | ||||
|             list( | ||||
|                 zip( | ||||
|                     date_range("20170101", periods=10), | ||||
|                     date_range("20170101", periods=10), | ||||
|                 ) | ||||
|             ) | ||||
|             + [np.nan], | ||||
|             list( | ||||
|                 zip( | ||||
|                     timedelta_range("0 days", periods=10), | ||||
|                     timedelta_range("1 day", periods=10), | ||||
|                 ) | ||||
|             ) | ||||
|             + [np.nan], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("na_tuple", [True, False]) | ||||
|     def test_to_tuples_na(self, tuples, na_tuple): | ||||
|         # GH 18756 | ||||
|         idx = IntervalIndex.from_tuples(tuples) | ||||
|         result = idx.to_tuples(na_tuple=na_tuple) | ||||
|  | ||||
|         # check the non-NA portion | ||||
|         expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) | ||||
|         result_notna = result[:-1] | ||||
|         tm.assert_index_equal(result_notna, expected_notna) | ||||
|  | ||||
|         # check the NA portion | ||||
|         result_na = result[-1] | ||||
|         if na_tuple: | ||||
|             assert isinstance(result_na, tuple) | ||||
|             assert len(result_na) == 2 | ||||
|             assert all(isna(x) for x in result_na) | ||||
|         else: | ||||
|             assert isna(result_na) | ||||
|  | ||||
|     def test_nbytes(self): | ||||
|         # GH 19209 | ||||
|         left = np.arange(0, 4, dtype="i8") | ||||
|         right = np.arange(1, 5, dtype="i8") | ||||
|  | ||||
|         result = IntervalIndex.from_arrays(left, right).nbytes | ||||
|         expected = 64  # 4 * 8 * 2 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) | ||||
|     def test_set_closed(self, name, closed, new_closed): | ||||
|         # GH 21670 | ||||
|         index = interval_range(0, 5, closed=closed, name=name) | ||||
|         result = index.set_closed(new_closed) | ||||
|         expected = interval_range(0, 5, closed=new_closed, name=name) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) | ||||
|     def test_set_closed_errors(self, bad_closed): | ||||
|         # GH 21670 | ||||
|         index = interval_range(0, 5) | ||||
|         msg = f"invalid option for 'closed': {bad_closed}" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.set_closed(bad_closed) | ||||
|  | ||||
|     def test_is_all_dates(self): | ||||
|         # GH 23576 | ||||
|         year_2017 = Interval( | ||||
|             Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00") | ||||
|         ) | ||||
|         year_2017_index = IntervalIndex([year_2017]) | ||||
|         assert not year_2017_index._is_all_dates | ||||
|  | ||||
|  | ||||
| def test_dir(): | ||||
|     # GH#27571 dir(interval_index) should not raise | ||||
|     index = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|     result = dir(index) | ||||
|     assert "str" not in result | ||||
|  | ||||
|  | ||||
| def test_searchsorted_different_argument_classes(listlike_box): | ||||
|     # https://github.com/pandas-dev/pandas/issues/32762 | ||||
|     values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) | ||||
|     result = values.searchsorted(listlike_box(values)) | ||||
|     expected = np.array([0, 1], dtype=result.dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = values._data.searchsorted(listlike_box(values)) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] | ||||
| ) | ||||
| def test_searchsorted_invalid_argument(arg): | ||||
|     values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) | ||||
|     msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         values.searchsorted(arg) | ||||
| @ -0,0 +1,369 @@ | ||||
| from datetime import timedelta | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_integer | ||||
|  | ||||
| from pandas import ( | ||||
|     DateOffset, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.tseries.offsets import Day | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestIntervalRange: | ||||
|     @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) | ||||
|     def test_constructor_numeric(self, closed, name, freq, periods): | ||||
|         start, end = 0, 100 | ||||
|         breaks = np.arange(101, step=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         result = interval_range( | ||||
|             start=start, end=end, periods=periods, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)] | ||||
|     ) | ||||
|     def test_constructor_timestamp(self, closed, name, freq, periods, tz): | ||||
|         start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) | ||||
|         breaks = date_range(start=start, end=end, freq=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         if not breaks.freq.n == 1 and tz is None: | ||||
|             result = interval_range( | ||||
|                 start=start, end=end, periods=periods, name=name, closed=closed | ||||
|             ) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)] | ||||
|     ) | ||||
|     def test_constructor_timedelta(self, closed, name, freq, periods): | ||||
|         start, end = Timedelta("0 days"), Timedelta("100 days") | ||||
|         breaks = timedelta_range(start=start, end=end, freq=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         result = interval_range( | ||||
|             start=start, end=end, periods=periods, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, end, freq, expected_endpoint", | ||||
|         [ | ||||
|             (0, 10, 3, 9), | ||||
|             (0, 10, 1.5, 9), | ||||
|             (0.5, 10, 3, 9.5), | ||||
|             (Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")), | ||||
|             ( | ||||
|                 Timestamp("2018-01-01"), | ||||
|                 Timestamp("2018-02-09"), | ||||
|                 "MS", | ||||
|                 Timestamp("2018-02-01"), | ||||
|             ), | ||||
|             ( | ||||
|                 Timestamp("2018-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2018-01-20", tz="US/Eastern"), | ||||
|                 "5D12h", | ||||
|                 Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_early_truncation(self, start, end, freq, expected_endpoint): | ||||
|         # index truncates early if freq causes end to be skipped | ||||
|         result = interval_range(start=start, end=end, freq=freq) | ||||
|         result_endpoint = result.right[-1] | ||||
|         assert result_endpoint == expected_endpoint | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, end, freq", | ||||
|         [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], | ||||
|     ) | ||||
|     def test_no_invalid_float_truncation(self, start, end, freq): | ||||
|         # GH 21161 | ||||
|         if freq is None: | ||||
|             breaks = [0.5, 1.5, 2.5, 3.5, 4.5] | ||||
|         else: | ||||
|             breaks = [0.5, 2.0, 3.5, 5.0, 6.5] | ||||
|         expected = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         result = interval_range(start=start, end=end, periods=4, freq=freq) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, mid, end", | ||||
|         [ | ||||
|             ( | ||||
|                 Timestamp("2018-03-10", tz="US/Eastern"), | ||||
|                 Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2018-03-12", tz="US/Eastern"), | ||||
|             ), | ||||
|             ( | ||||
|                 Timestamp("2018-11-03", tz="US/Eastern"), | ||||
|                 Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2018-11-05", tz="US/Eastern"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_linspace_dst_transition(self, start, mid, end): | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         # accounts for the hour gained/lost during DST transition | ||||
|         start = start.as_unit("ns") | ||||
|         mid = mid.as_unit("ns") | ||||
|         end = end.as_unit("ns") | ||||
|         result = interval_range(start=start, end=end, periods=2) | ||||
|         expected = IntervalIndex.from_breaks([start, mid, end]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", [2, 2.0]) | ||||
|     @pytest.mark.parametrize("end", [10, 10.0]) | ||||
|     @pytest.mark.parametrize("start", [0, 0.0]) | ||||
|     def test_float_subtype(self, start, end, freq): | ||||
|         # Has float subtype if any of start/end/freq are float, even if all | ||||
|         # resulting endpoints can safely be upcast to integers | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         index = interval_range(start=start, end=end, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + end + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         index = interval_range(start=start, periods=5, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         index = interval_range(end=end, periods=5, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(end + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         index = interval_range(start=start, end=end, periods=5) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + end) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_interval_range_fractional_period(self): | ||||
|         # float value for periods | ||||
|         expected = interval_range(start=0, periods=10) | ||||
|         msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = interval_range(start=0, periods=10.5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_coverage(self): | ||||
|         # equivalent timestamp-like start/end | ||||
|         start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") | ||||
|         expected = interval_range(start=start, end=end) | ||||
|  | ||||
|         result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(start=start.asm8, end=end.asm8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent freq with timestamp | ||||
|         equiv_freq = [ | ||||
|             "D", | ||||
|             Day(), | ||||
|             Timedelta(days=1), | ||||
|             timedelta(days=1), | ||||
|             DateOffset(days=1), | ||||
|         ] | ||||
|         for freq in equiv_freq: | ||||
|             result = interval_range(start=start, end=end, freq=freq) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent timedelta-like start/end | ||||
|         start, end = Timedelta(days=1), Timedelta(days=10) | ||||
|         expected = interval_range(start=start, end=end) | ||||
|  | ||||
|         result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(start=start.asm8, end=end.asm8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent freq with timedelta | ||||
|         equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] | ||||
|         for freq in equiv_freq: | ||||
|             result = interval_range(start=start, end=end, freq=freq) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_errors(self): | ||||
|         # not enough params | ||||
|         msg = ( | ||||
|             "Of the four parameters: start, end, periods, and freq, " | ||||
|             "exactly three must be specified" | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=5) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(periods=2) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range() | ||||
|  | ||||
|         # too many params | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0, end=5, periods=6, freq=1.5) | ||||
|  | ||||
|         # mixed units | ||||
|         msg = "start, end, freq need to be type compatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=Timestamp("20130101"), freq=2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=Timedelta("1 day"), freq=2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timestamp("20130101"), end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timedelta("1 day"), end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) | ||||
|  | ||||
|         # invalid periods | ||||
|         msg = "periods must be a number, got foo" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, periods="foo") | ||||
|  | ||||
|         # invalid start | ||||
|         msg = "start must be numeric or datetime-like, got foo" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start="foo", periods=10) | ||||
|  | ||||
|         # invalid end | ||||
|         msg = r"end must be numeric or datetime-like, got \(0, 1\]" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=Interval(0, 1), periods=10) | ||||
|  | ||||
|         # invalid freq for datetime-like | ||||
|         msg = "freq must be numeric or convertible to DateOffset, got foo" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0, end=10, freq="foo") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=Timestamp("20130101"), periods=10, freq="foo") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=Timedelta("1 day"), periods=10, freq="foo") | ||||
|  | ||||
|         # mixed tz | ||||
|         start = Timestamp("2017-01-01", tz="US/Eastern") | ||||
|         end = Timestamp("2017-01-07", tz="US/Pacific") | ||||
|         msg = "Start and end cannot both be tz-aware with different timezones" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=start, end=end) | ||||
|  | ||||
|     def test_float_freq(self): | ||||
|         # GH 54477 | ||||
|         result = interval_range(0, 1, freq=0.1) | ||||
|         expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(0, 1, freq=0.6) | ||||
|         expected = IntervalIndex.from_breaks([0, 0.6]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,208 @@ | ||||
| from itertools import permutations | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.interval import IntervalTree | ||||
| from pandas.compat import IS64 | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def skipif_32bit(param): | ||||
|     """ | ||||
|     Skip parameters in a parametrize on 32bit systems. Specifically used | ||||
|     here to skip leaf_size parameters related to GH 23440. | ||||
|     """ | ||||
|     marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") | ||||
|     return pytest.param(param, marks=marks) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["int64", "float64", "uint64"]) | ||||
| def dtype(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) | ||||
| def leaf_size(request): | ||||
|     """ | ||||
|     Fixture to specify IntervalTree leaf_size parameter; to be used with the | ||||
|     tree fixture. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         np.arange(5, dtype="int64"), | ||||
|         np.arange(5, dtype="uint64"), | ||||
|         np.arange(5, dtype="float64"), | ||||
|         np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), | ||||
|     ] | ||||
| ) | ||||
| def tree(request, leaf_size): | ||||
|     left = request.param | ||||
|     return IntervalTree(left, left + 2, leaf_size=leaf_size) | ||||
|  | ||||
|  | ||||
| class TestIntervalTree: | ||||
|     def test_get_indexer(self, tree): | ||||
|         result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) | ||||
|         expected = np.array([0, 4, -1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="'indexer does not intersect a unique set of intervals'" | ||||
|         ): | ||||
|             tree.get_indexer(np.array([3.0])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, target_value, target_dtype", | ||||
|         [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], | ||||
|     ) | ||||
|     def test_get_indexer_overflow(self, dtype, target_value, target_dtype): | ||||
|         left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) | ||||
|         tree = IntervalTree(left, right) | ||||
|  | ||||
|         result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) | ||||
|         expected = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_non_unique(self, tree): | ||||
|         indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) | ||||
|  | ||||
|         result = indexer[:1] | ||||
|         expected = np.array([0], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.sort(indexer[1:3]) | ||||
|         expected = np.array([0, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.sort(indexer[3:]) | ||||
|         expected = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = missing | ||||
|         expected = np.array([2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, target_value, target_dtype", | ||||
|         [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], | ||||
|     ) | ||||
|     def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): | ||||
|         left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) | ||||
|         tree = IntervalTree(left, right) | ||||
|         target = np.array([target_value], dtype=target_dtype) | ||||
|  | ||||
|         result_indexer, result_missing = tree.get_indexer_non_unique(target) | ||||
|         expected_indexer = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result_indexer, expected_indexer) | ||||
|  | ||||
|         expected_missing = np.array([0], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result_missing, expected_missing) | ||||
|  | ||||
|     def test_duplicates(self, dtype): | ||||
|         left = np.array([0, 0, 0], dtype=dtype) | ||||
|         tree = IntervalTree(left, left + 1) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="'indexer does not intersect a unique set of intervals'" | ||||
|         ): | ||||
|             tree.get_indexer(np.array([0.5])) | ||||
|  | ||||
|         indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) | ||||
|         result = np.sort(indexer) | ||||
|         expected = np.array([0, 1, 2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = missing | ||||
|         expected = np.array([], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] | ||||
|     ) | ||||
|     def test_get_indexer_closed(self, closed, leaf_size): | ||||
|         x = np.arange(1000, dtype="float64") | ||||
|         found = x.astype("intp") | ||||
|         not_found = (-1 * np.ones(1000)).astype("intp") | ||||
|  | ||||
|         tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) | ||||
|         tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) | ||||
|  | ||||
|         expected = found if tree.closed_left else not_found | ||||
|         tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) | ||||
|  | ||||
|         expected = found if tree.closed_right else not_found | ||||
|         tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right, expected", | ||||
|         [ | ||||
|             (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), | ||||
|             (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), | ||||
|             (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), | ||||
|             (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), | ||||
|             (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) | ||||
|     def test_is_overlapping(self, closed, order, left, right, expected): | ||||
|         # GH 23309 | ||||
|         tree = IntervalTree(left[order], right[order], closed=closed) | ||||
|         result = tree.is_overlapping | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) | ||||
|     def test_is_overlapping_endpoints(self, closed, order): | ||||
|         """shared endpoints are marked as overlapping""" | ||||
|         # GH 23309 | ||||
|         left, right = np.arange(3, dtype="int64"), np.arange(1, 4) | ||||
|         tree = IntervalTree(left[order], right[order], closed=closed) | ||||
|         result = tree.is_overlapping | ||||
|         expected = closed == "both" | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right", | ||||
|         [ | ||||
|             (np.array([], dtype="int64"), np.array([], dtype="int64")), | ||||
|             (np.array([0], dtype="int64"), np.array([1], dtype="int64")), | ||||
|             (np.array([np.nan]), np.array([np.nan])), | ||||
|             (np.array([np.nan] * 3), np.array([np.nan] * 3)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_is_overlapping_trivial(self, closed, left, right): | ||||
|         # GH 23309 | ||||
|         tree = IntervalTree(left, right, closed=closed) | ||||
|         assert tree.is_overlapping is False | ||||
|  | ||||
|     @pytest.mark.skipif(not IS64, reason="GH 23440") | ||||
|     def test_construction_overflow(self): | ||||
|         # GH 25485 | ||||
|         left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 | ||||
|         tree = IntervalTree(left, right) | ||||
|  | ||||
|         # pivot should be average of left/right medians | ||||
|         result = tree.root.pivot | ||||
|         expected = (50 + np.iinfo(np.int64).max) / 2 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right, expected", | ||||
|         [ | ||||
|             ([-np.inf, 1.0], [1.0, 2.0], 0.0), | ||||
|             ([-np.inf, -2.0], [-2.0, -1.0], -2.0), | ||||
|             ([-2.0, -1.0], [-1.0, np.inf], 0.0), | ||||
|             ([1.0, 2.0], [2.0, np.inf], 2.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_inf_bound_infinite_recursion(self, left, right, expected): | ||||
|         # GH 46658 | ||||
|  | ||||
|         tree = IntervalTree(left * 101, right * 101) | ||||
|  | ||||
|         result = tree.root.pivot | ||||
|         assert result == expected | ||||
| @ -0,0 +1,44 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def range_index(): | ||||
|     return RangeIndex(3, name="range_index") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def interval_index(): | ||||
|     return IntervalIndex.from_tuples( | ||||
|         [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index): | ||||
|     #  GH-45661 | ||||
|     multi_index = MultiIndex.from_product([interval_index, range_index]) | ||||
|     result = multi_index.join(interval_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, multi_index) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index): | ||||
|     #  GH-45661 | ||||
|     multi_index = MultiIndex.from_product([interval_index, range_index]) | ||||
|     result = interval_index.join(multi_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, multi_index) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_interval_to_another_intervalindex(interval_index): | ||||
|     #  GH-45661 | ||||
|     flipped_interval_index = interval_index[::-1] | ||||
|     result = interval_index.join(flipped_interval_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, interval_index) | ||||
| @ -0,0 +1,13 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import IntervalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestPickle: | ||||
|     @pytest.mark.parametrize("closed", ["left", "right", "both"]) | ||||
|     def test_pickle_round_trip_closed(self, closed): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35658 | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) | ||||
|         result = tm.round_trip_pickle(idx) | ||||
|         tm.assert_index_equal(result, idx) | ||||
| @ -0,0 +1,208 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Timestamp, | ||||
|     interval_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def monotonic_index(start, end, dtype="int64", closed="right"): | ||||
|     return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) | ||||
|  | ||||
|  | ||||
| def empty_index(dtype="int64", closed="right"): | ||||
|     return IntervalIndex(np.array([], dtype=dtype), closed=closed) | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     def test_union(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         other = monotonic_index(5, 13, closed=closed) | ||||
|  | ||||
|         expected = monotonic_index(0, 13, closed=closed) | ||||
|         result = index[::-1].union(other, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         result = other[::-1].union(index, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(index.union(index, sort=sort), index) | ||||
|         tm.assert_index_equal(index.union(index[:1], sort=sort), index) | ||||
|  | ||||
|     def test_union_empty_result(self, closed, sort): | ||||
|         # GH 19101: empty result, same dtype | ||||
|         index = empty_index(dtype="int64", closed=closed) | ||||
|         result = index.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         # GH 19101: empty result, different numeric dtypes -> common dtype is f8 | ||||
|         other = empty_index(dtype="float64", closed=closed) | ||||
|         result = index.union(other, sort=sort) | ||||
|         expected = other | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = index.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = empty_index(dtype="uint64", closed=closed) | ||||
|         result = index.union(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = other.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         other = monotonic_index(5, 13, closed=closed) | ||||
|  | ||||
|         expected = monotonic_index(5, 11, closed=closed) | ||||
|         result = index[::-1].intersection(other, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         result = other[::-1].intersection(index, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(index.intersection(index, sort=sort), index) | ||||
|  | ||||
|         # GH 26225: nested intervals | ||||
|         index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) | ||||
|         other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 26225 | ||||
|         index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) | ||||
|         other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(0, 2)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 26225: duplicate nan element | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|         other = IntervalIndex([np.nan]) | ||||
|         expected = IntervalIndex([np.nan]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection_empty_result(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         other = monotonic_index(300, 314, closed=closed) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, different numeric dtypes -> common dtype is float64 | ||||
|         other = monotonic_index(300, 314, dtype="float64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         expected = other[:0] | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = monotonic_index(300, 314, dtype="uint64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection_duplicates(self): | ||||
|         # GH#38743 | ||||
|         index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) | ||||
|         other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_difference(self, closed, sort): | ||||
|         index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) | ||||
|         result = index.difference(index[:1], sort=sort) | ||||
|         expected = index[1:] | ||||
|         if sort is None: | ||||
|             expected = expected.sort_values() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         result = index.difference(index, sort=sort) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, different dtypes | ||||
|         other = IntervalIndex.from_arrays( | ||||
|             index.left.astype("float64"), index.right, closed=closed | ||||
|         ) | ||||
|         result = index.difference(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_symmetric_difference(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         result = index[1:].symmetric_difference(index[:-1], sort=sort) | ||||
|         expected = IntervalIndex([index[0], index[-1]]) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         result = index.symmetric_difference(index, sort=sort) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         # GH 19101: empty result, different dtypes | ||||
|         other = IntervalIndex.from_arrays( | ||||
|             index.left.astype("float64"), index.right, closed=closed | ||||
|         ) | ||||
|         result = index.symmetric_difference(other, sort=sort) | ||||
|         expected = empty_index(dtype="float64", closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning") | ||||
|     @pytest.mark.parametrize( | ||||
|         "op_name", ["union", "intersection", "difference", "symmetric_difference"] | ||||
|     ) | ||||
|     def test_set_incompatible_types(self, closed, op_name, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         set_op = getattr(index, op_name) | ||||
|  | ||||
|         # TODO: standardize return type of non-union setops type(self vs other) | ||||
|         # non-IntervalIndex | ||||
|         if op_name == "difference": | ||||
|             expected = index | ||||
|         else: | ||||
|             expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) | ||||
|         result = set_op(Index([1, 2, 3]), sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # mixed closed -> cast to object | ||||
|         for other_closed in {"right", "left", "both", "neither"} - {closed}: | ||||
|             other = monotonic_index(0, 11, closed=other_closed) | ||||
|             expected = getattr(index.astype(object), op_name)(other, sort=sort) | ||||
|             if op_name == "difference": | ||||
|                 expected = index | ||||
|             result = set_op(other, sort=sort) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19016: incompatible dtypes -> cast to object | ||||
|         other = interval_range(Timestamp("20180101"), periods=9, closed=closed) | ||||
|         expected = getattr(index.astype(object), op_name)(other, sort=sort) | ||||
|         if op_name == "difference": | ||||
|             expected = index | ||||
|         result = set_op(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,27 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
|  | ||||
|  | ||||
| # Note: identical the "multi" entry in the top-level "index" fixture | ||||
| @pytest.fixture | ||||
| def idx(): | ||||
|     # a MultiIndex used to test the general functionality of the | ||||
|     # general functionality of this object | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|     index_names = ["first", "second"] | ||||
|     mi = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=index_names, | ||||
|         verify_integrity=False, | ||||
|     ) | ||||
|     return mi | ||||
| @ -0,0 +1,263 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_infer_objects(idx): | ||||
|     with pytest.raises(NotImplementedError, match="to_frame"): | ||||
|         idx.infer_objects() | ||||
|  | ||||
|  | ||||
| def test_shift(idx): | ||||
|     # GH8083 test the base class for shift | ||||
|     msg = ( | ||||
|         "This method is only implemented for DatetimeIndex, PeriodIndex and " | ||||
|         "TimedeltaIndex; Got type MultiIndex" | ||||
|     ) | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.shift(1) | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.shift(1, 2) | ||||
|  | ||||
|  | ||||
| def test_groupby(idx): | ||||
|     groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) | ||||
|     labels = idx.tolist() | ||||
|     exp = {1: labels[:3], 2: labels[3:]} | ||||
|     tm.assert_dict_equal(groups, exp) | ||||
|  | ||||
|     # GH5620 | ||||
|     groups = idx.groupby(idx) | ||||
|     exp = {key: [key] for key in idx} | ||||
|     tm.assert_dict_equal(groups, exp) | ||||
|  | ||||
|  | ||||
| def test_truncate_multiindex(): | ||||
|     # GH 34564 for MultiIndex level names check | ||||
|     major_axis = Index(list(range(4))) | ||||
|     minor_axis = Index(list(range(2))) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=["L1", "L2"], | ||||
|     ) | ||||
|  | ||||
|     result = index.truncate(before=1) | ||||
|     assert "foo" not in result.levels[0] | ||||
|     assert 1 in result.levels[0] | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     result = index.truncate(after=1) | ||||
|     assert 2 not in result.levels[0] | ||||
|     assert 1 in result.levels[0] | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     result = index.truncate(before=1, after=2) | ||||
|     assert len(result.levels[0]) == 2 | ||||
|     assert index.names == result.names | ||||
|  | ||||
|     msg = "after < before" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index.truncate(3, 1) | ||||
|  | ||||
|  | ||||
| # TODO: reshape | ||||
|  | ||||
|  | ||||
| def test_reorder_levels(idx): | ||||
|     # this blows up | ||||
|     with pytest.raises(IndexError, match="^Too many levels"): | ||||
|         idx.reorder_levels([2, 1, 0]) | ||||
|  | ||||
|  | ||||
| def test_numpy_repeat(): | ||||
|     reps = 2 | ||||
|     numbers = [1, 2, 3] | ||||
|     names = np.array(["foo", "bar"]) | ||||
|  | ||||
|     m = MultiIndex.from_product([numbers, names], names=names) | ||||
|     expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) | ||||
|     tm.assert_index_equal(np.repeat(m, reps), expected) | ||||
|  | ||||
|     msg = "the 'axis' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         np.repeat(m, reps, axis=1) | ||||
|  | ||||
|  | ||||
| def test_append_mixed_dtypes(): | ||||
|     # GH 13660 | ||||
|     dti = date_range("2011-01-01", freq="ME", periods=3) | ||||
|     dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern") | ||||
|     pi = period_range("2011-01", freq="M", periods=3) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] | ||||
|     ) | ||||
|     assert mi.nlevels == 6 | ||||
|  | ||||
|     res = mi.append(mi) | ||||
|     exp = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, 2, 3, 1, 2, 3], | ||||
|             [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], | ||||
|             ["a", "b", "c", "a", "b", "c"], | ||||
|             dti.append(dti), | ||||
|             dti_tz.append(dti_tz), | ||||
|             pi.append(pi), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     other = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|             ["x", "y", "z"], | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     res = mi.append(other) | ||||
|     exp = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             [1, 2, 3, "x", "y", "z"], | ||||
|             [1.1, np.nan, 3.3, "x", "y", "z"], | ||||
|             ["a", "b", "c", "x", "y", "z"], | ||||
|             dti.append(Index(["x", "y", "z"])), | ||||
|             dti_tz.append(Index(["x", "y", "z"])), | ||||
|             pi.append(Index(["x", "y", "z"])), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_iter(idx): | ||||
|     result = list(idx) | ||||
|     expected = [ | ||||
|         ("foo", "one"), | ||||
|         ("foo", "two"), | ||||
|         ("bar", "one"), | ||||
|         ("baz", "two"), | ||||
|         ("qux", "one"), | ||||
|         ("qux", "two"), | ||||
|     ] | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_sub(idx): | ||||
|     first = idx | ||||
|  | ||||
|     # - now raises (previously was set op difference) | ||||
|     msg = "cannot perform __sub__ with this index type: MultiIndex" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first - idx[-3:] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx[-3:] - first | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx[-3:] - first.tolist() | ||||
|     msg = "cannot perform __rsub__ with this index type: MultiIndex" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         first.tolist() - idx[-3:] | ||||
|  | ||||
|  | ||||
| def test_map(idx): | ||||
|     # callable | ||||
|     index = idx | ||||
|  | ||||
|     result = index.map(lambda x: x) | ||||
|     tm.assert_index_equal(result, index) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "mapper", | ||||
|     [ | ||||
|         lambda values, idx: {i: e for e, i in zip(values, idx)}, | ||||
|         lambda values, idx: pd.Series(values, idx), | ||||
|     ], | ||||
| ) | ||||
| def test_map_dictlike(idx, mapper): | ||||
|     identity = mapper(idx.values, idx) | ||||
|  | ||||
|     # we don't infer to uint64 dtype for a dict | ||||
|     if idx.dtype == np.uint64 and isinstance(identity, dict): | ||||
|         expected = idx.astype("int64") | ||||
|     else: | ||||
|         expected = idx | ||||
|  | ||||
|     result = idx.map(identity) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # empty mappable | ||||
|     expected = Index([np.nan] * len(idx)) | ||||
|     result = idx.map(mapper(expected, idx)) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [ | ||||
|         np.exp, | ||||
|         np.exp2, | ||||
|         np.expm1, | ||||
|         np.log, | ||||
|         np.log2, | ||||
|         np.log10, | ||||
|         np.log1p, | ||||
|         np.sqrt, | ||||
|         np.sin, | ||||
|         np.cos, | ||||
|         np.tan, | ||||
|         np.arcsin, | ||||
|         np.arccos, | ||||
|         np.arctan, | ||||
|         np.sinh, | ||||
|         np.cosh, | ||||
|         np.tanh, | ||||
|         np.arcsinh, | ||||
|         np.arccosh, | ||||
|         np.arctanh, | ||||
|         np.deg2rad, | ||||
|         np.rad2deg, | ||||
|     ], | ||||
|     ids=lambda func: func.__name__, | ||||
| ) | ||||
| def test_numpy_ufuncs(idx, func): | ||||
|     # test ufuncs of numpy. see: | ||||
|     # https://numpy.org/doc/stable/reference/ufuncs.html | ||||
|  | ||||
|     expected_exception = TypeError | ||||
|     msg = ( | ||||
|         "loop of ufunc does not support argument 0 of type tuple which " | ||||
|         f"has no callable {func.__name__} method" | ||||
|     ) | ||||
|     with pytest.raises(expected_exception, match=msg): | ||||
|         func(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [np.isfinite, np.isinf, np.isnan, np.signbit], | ||||
|     ids=lambda func: func.__name__, | ||||
| ) | ||||
| def test_numpy_type_funcs(idx, func): | ||||
|     msg = ( | ||||
|         f"ufunc '{func.__name__}' not supported for the input types, and the inputs " | ||||
|         "could not be safely coerced to any supported types according to " | ||||
|         "the casting rule ''safe''" | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         func(idx) | ||||
| @ -0,0 +1,30 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_astype(idx): | ||||
|     expected = idx.copy() | ||||
|     actual = idx.astype("O") | ||||
|     tm.assert_copy(actual.levels, expected.levels) | ||||
|     tm.assert_copy(actual.codes, expected.codes) | ||||
|     assert actual.names == list(expected.names) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Setting.*dtype.*object"): | ||||
|         idx.astype(np.dtype(int)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| def test_astype_category(idx, ordered): | ||||
|     # GH 18630 | ||||
|     msg = "> 1 ndim Categorical are not supported at this time" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         idx.astype(CategoricalDtype(ordered=ordered)) | ||||
|  | ||||
|     if ordered is False: | ||||
|         # dtype='category' defaults to ordered=False, so only test once | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             idx.astype("category") | ||||
| @ -0,0 +1,122 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_numeric_compat(idx): | ||||
|     with pytest.raises(TypeError, match="cannot perform __mul__"): | ||||
|         idx * 1 | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __rmul__"): | ||||
|         1 * idx | ||||
|  | ||||
|     div_err = "cannot perform __truediv__" | ||||
|     with pytest.raises(TypeError, match=div_err): | ||||
|         idx / 1 | ||||
|  | ||||
|     div_err = div_err.replace(" __", " __r") | ||||
|     with pytest.raises(TypeError, match=div_err): | ||||
|         1 / idx | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __floordiv__"): | ||||
|         idx // 1 | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): | ||||
|         1 // idx | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["all", "any", "__invert__"]) | ||||
| def test_logical_compat(idx, method): | ||||
|     msg = f"cannot perform {method}" | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         getattr(idx, method)() | ||||
|  | ||||
|  | ||||
| def test_inplace_mutation_resets_values(): | ||||
|     levels = [["a", "b", "c"], [4]] | ||||
|     levels2 = [[1, 2, 3], ["a"]] | ||||
|     codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] | ||||
|  | ||||
|     mi1 = MultiIndex(levels=levels, codes=codes) | ||||
|     mi2 = MultiIndex(levels=levels2, codes=codes) | ||||
|  | ||||
|     # instantiating MultiIndex should not access/cache _.values | ||||
|     assert "_values" not in mi1._cache | ||||
|     assert "_values" not in mi2._cache | ||||
|  | ||||
|     vals = mi1.values.copy() | ||||
|     vals2 = mi2.values.copy() | ||||
|  | ||||
|     # accessing .values should cache ._values | ||||
|     assert mi1._values is mi1._cache["_values"] | ||||
|     assert mi1.values is mi1._cache["_values"] | ||||
|     assert isinstance(mi1._cache["_values"], np.ndarray) | ||||
|  | ||||
|     # Make sure level setting works | ||||
|     new_vals = mi1.set_levels(levels2).values | ||||
|     tm.assert_almost_equal(vals2, new_vals) | ||||
|  | ||||
|     #  Doesn't drop _values from _cache [implementation detail] | ||||
|     tm.assert_almost_equal(mi1._cache["_values"], vals) | ||||
|  | ||||
|     # ...and values is still same too | ||||
|     tm.assert_almost_equal(mi1.values, vals) | ||||
|  | ||||
|     # Make sure label setting works too | ||||
|     codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] | ||||
|     exp_values = np.empty((6,), dtype=object) | ||||
|     exp_values[:] = [(1, "a")] * 6 | ||||
|  | ||||
|     # Must be 1d array of tuples | ||||
|     assert exp_values.shape == (6,) | ||||
|  | ||||
|     new_mi = mi2.set_codes(codes2) | ||||
|     assert "_values" not in new_mi._cache | ||||
|     new_values = new_mi.values | ||||
|     assert "_values" in new_mi._cache | ||||
|  | ||||
|     # Shouldn't change cache | ||||
|     tm.assert_almost_equal(mi2._cache["_values"], vals2) | ||||
|  | ||||
|     # Should have correct values | ||||
|     tm.assert_almost_equal(exp_values, new_values) | ||||
|  | ||||
|  | ||||
| def test_boxable_categorical_values(): | ||||
|     cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h")) | ||||
|     result = MultiIndex.from_product([["a", "b", "c"], cat]).values | ||||
|     expected = pd.Series( | ||||
|         [ | ||||
|             ("a", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("a", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("a", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("b", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 00:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 01:00:00")), | ||||
|             ("c", pd.Timestamp("2012-01-01 02:00:00")), | ||||
|         ] | ||||
|     ).values | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|     result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values | ||||
|     expected = pd.DataFrame( | ||||
|         { | ||||
|             "a": ["a", "b", "c"], | ||||
|             "b": [ | ||||
|                 pd.Timestamp("2012-01-01 00:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 01:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 02:00:00"), | ||||
|             ], | ||||
|             "c": [ | ||||
|                 pd.Timestamp("2012-01-01 00:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 01:00:00"), | ||||
|                 pd.Timestamp("2012-01-01 02:00:00"), | ||||
|             ], | ||||
|         } | ||||
|     ).values | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,860 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
| ) | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_constructor_single_level(): | ||||
|     result = MultiIndex( | ||||
|         levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] | ||||
|     ) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     expected = Index(["foo", "bar", "baz", "qux"], name="first") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["first"] | ||||
|  | ||||
|  | ||||
| def test_constructor_no_levels(): | ||||
|     msg = "non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex(levels=[], codes=[]) | ||||
|  | ||||
|     msg = "Must pass both levels and codes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(levels=[]) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(codes=[]) | ||||
|  | ||||
|  | ||||
| def test_constructor_nonhashable_names(): | ||||
|     # GH 20527 | ||||
|     levels = [[1, 2], ["one", "two"]] | ||||
|     codes = [[0, 0, 1, 1], [0, 1, 0, 1]] | ||||
|     names = (["foo"], ["bar"]) | ||||
|     msg = r"MultiIndex\.name must be a hashable type" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex(levels=levels, codes=codes, names=names) | ||||
|  | ||||
|     # With .rename() | ||||
|     mi = MultiIndex( | ||||
|         levels=[[1, 2], ["one", "two"]], | ||||
|         codes=[[0, 0, 1, 1], [0, 1, 0, 1]], | ||||
|         names=("foo", "bar"), | ||||
|     ) | ||||
|     renamed = [["fooo"], ["barr"]] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         mi.rename(names=renamed) | ||||
|  | ||||
|     # With .set_names() | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         mi.set_names(names=renamed) | ||||
|  | ||||
|  | ||||
| def test_constructor_mismatched_codes_levels(idx): | ||||
|     codes = [np.array([1]), np.array([2]), np.array([3])] | ||||
|     levels = ["a"] | ||||
|  | ||||
|     msg = "Length of levels and codes must be the same" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|     length_error = ( | ||||
|         r"On level 0, code max \(3\) >= length of level \(1\)\. " | ||||
|         "NOTE: this index is in an inconsistent state" | ||||
|     ) | ||||
|     label_error = r"Unequal code lengths: \[4, 2\]" | ||||
|     code_value_error = r"On level 0, code value \(-2\) < -1" | ||||
|  | ||||
|     # important to check that it's looking at the right thing. | ||||
|     with pytest.raises(ValueError, match=length_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=label_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) | ||||
|  | ||||
|     # external API | ||||
|     with pytest.raises(ValueError, match=length_error): | ||||
|         idx.copy().set_levels([["a"], ["b"]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=label_error): | ||||
|         idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) | ||||
|  | ||||
|     # test set_codes with verify_integrity=False | ||||
|     # the setting should not raise any value error | ||||
|     idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) | ||||
|  | ||||
|     # code value smaller than -1 | ||||
|     with pytest.raises(ValueError, match=code_value_error): | ||||
|         MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) | ||||
|  | ||||
|  | ||||
| def test_na_levels(): | ||||
|     # GH26408 | ||||
|     # test if codes are re-assigned value -1 for levels | ||||
|     # with missing values (NaN, NaT, None) | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # verify set_levels and set_codes | ||||
|     result = MultiIndex( | ||||
|         levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] | ||||
|     ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = MultiIndex( | ||||
|         levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] | ||||
|     ).set_codes([[0, -1, 1, 2, 3, 4]]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_copy_in_constructor(): | ||||
|     levels = np.array(["a", "b", "c"]) | ||||
|     codes = np.array([1, 1, 2, 0, 0, 1, 1]) | ||||
|     val = codes[0] | ||||
|     mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) | ||||
|     assert mi.codes[0][0] == val | ||||
|     codes[0] = 15 | ||||
|     assert mi.codes[0][0] == val | ||||
|     val = levels[0] | ||||
|     levels[0] = "PANDA" | ||||
|     assert mi.levels[0][0] == val | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_arrays | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_arrays(idx): | ||||
|     arrays = [ | ||||
|         np.asarray(lev).take(level_codes) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ] | ||||
|  | ||||
|     # list of arrays as input | ||||
|     result = MultiIndex.from_arrays(arrays, names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # infer correctly | ||||
|     result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) | ||||
|     assert result.levels[0].equals(Index([Timestamp("20130101")])) | ||||
|     assert result.levels[1].equals(Index(["a", "b"])) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_iterator(idx): | ||||
|     # GH 18434 | ||||
|     arrays = [ | ||||
|         np.asarray(lev).take(level_codes) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ] | ||||
|  | ||||
|     # iterator as input | ||||
|     result = MultiIndex.from_arrays(iter(arrays), names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|     # invalid iterator input | ||||
|     msg = "Input must be a list / sequence of array-likes." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_arrays(0) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_tuples(idx): | ||||
|     arrays = tuple( | ||||
|         tuple(np.asarray(lev).take(level_codes)) | ||||
|         for lev, level_codes in zip(idx.levels, idx.codes) | ||||
|     ) | ||||
|  | ||||
|     # tuple of tuples as input | ||||
|     result = MultiIndex.from_arrays(arrays, names=idx.names) | ||||
|     tm.assert_index_equal(result, idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("idx1", "idx2"), | ||||
|     [ | ||||
|         ( | ||||
|             pd.period_range("2011-01-01", freq="D", periods=3), | ||||
|             pd.period_range("2015-01-01", freq="h", periods=3), | ||||
|         ), | ||||
|         ( | ||||
|             date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), | ||||
|             date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"), | ||||
|         ), | ||||
|         ( | ||||
|             pd.timedelta_range("1 days", freq="D", periods=3), | ||||
|             pd.timedelta_range("2 hours", freq="h", periods=3), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2): | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|  | ||||
|     tm.assert_index_equal(result, result2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_index_datetimelike_mixed(): | ||||
|     idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") | ||||
|     idx2 = date_range("2015-01-01 10:00", freq="h", periods=3) | ||||
|     idx3 = pd.timedelta_range("1 days", freq="D", periods=3) | ||||
|     idx4 = pd.period_range("2011-01-01", freq="D", periods=3) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|     tm.assert_index_equal(result.get_level_values(2), idx3) | ||||
|     tm.assert_index_equal(result.get_level_values(3), idx4) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays( | ||||
|         [Series(idx1), Series(idx2), Series(idx3), Series(idx4)] | ||||
|     ) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|     tm.assert_index_equal(result2.get_level_values(2), idx3) | ||||
|     tm.assert_index_equal(result2.get_level_values(3), idx4) | ||||
|  | ||||
|     tm.assert_index_equal(result, result2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_index_series_categorical(): | ||||
|     # GH13743 | ||||
|     idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) | ||||
|     idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]) | ||||
|     tm.assert_index_equal(result.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result.get_level_values(1), idx2) | ||||
|  | ||||
|     result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)]) | ||||
|     tm.assert_index_equal(result2.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result2.get_level_values(1), idx2) | ||||
|  | ||||
|     result3 = MultiIndex.from_arrays([idx1.values, idx2.values]) | ||||
|     tm.assert_index_equal(result3.get_level_values(0), idx1) | ||||
|     tm.assert_index_equal(result3.get_level_values(1), idx2) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_empty(): | ||||
|     # 0 levels | ||||
|     msg = "Must pass non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_arrays(arrays=[]) | ||||
|  | ||||
|     # 1 level | ||||
|     result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) | ||||
|     assert isinstance(result, MultiIndex) | ||||
|     expected = Index([], name="A") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["A"] | ||||
|  | ||||
|     # N levels | ||||
|     for N in [2, 3]: | ||||
|         arrays = [[]] * N | ||||
|         names = list("ABC")[:N] | ||||
|         result = MultiIndex.from_arrays(arrays=arrays, names=names) | ||||
|         expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "invalid_sequence_of_arrays", | ||||
|     [ | ||||
|         1, | ||||
|         [1], | ||||
|         [1, 2], | ||||
|         [[1], 2], | ||||
|         [1, [2]], | ||||
|         "a", | ||||
|         ["a"], | ||||
|         ["a", "b"], | ||||
|         [["a"], "b"], | ||||
|         (1,), | ||||
|         (1, 2), | ||||
|         ([1], 2), | ||||
|         (1, [2]), | ||||
|         "a", | ||||
|         ("a",), | ||||
|         ("a", "b"), | ||||
|         (["a"], "b"), | ||||
|         [(1,), 2], | ||||
|         [1, (2,)], | ||||
|         [("a",), "b"], | ||||
|         ((1,), 2), | ||||
|         (1, (2,)), | ||||
|         (("a",), "b"), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrays_invalid_input(invalid_sequence_of_arrays): | ||||
|     msg = "Input must be a list / sequence of array-likes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] | ||||
| ) | ||||
| def test_from_arrays_different_lengths(idx1, idx2): | ||||
|     # see gh-13599 | ||||
|     msg = "^all arrays must be same length$" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_arrays([idx1, idx2]) | ||||
|  | ||||
|  | ||||
| def test_from_arrays_respects_none_names(): | ||||
|     # GH27292 | ||||
|     a = Series([1, 2, 3], name="foo") | ||||
|     b = Series(["a", "b", "c"], name="bar") | ||||
|  | ||||
|     result = MultiIndex.from_arrays([a, b], names=None) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None | ||||
|     ) | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_tuples | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_tuples(): | ||||
|     msg = "Cannot infer number of levels from empty list" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_tuples([]) | ||||
|  | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] | ||||
|     ) | ||||
|  | ||||
|     # input tuples | ||||
|     result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_iterator(): | ||||
|     # GH 18434 | ||||
|     # input iterator for tuples | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # input non-iterables | ||||
|     msg = "Input must be a list / sequence of tuple-likes." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_tuples(0) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_empty(): | ||||
|     # GH 16777 | ||||
|     result = MultiIndex.from_tuples([], names=["a", "b"]) | ||||
|     expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_tuples_index_values(idx): | ||||
|     result = MultiIndex.from_tuples(idx) | ||||
|     assert (result.values == idx.values).all() | ||||
|  | ||||
|  | ||||
| def test_tuples_with_name_string(): | ||||
|     # GH 15110 and GH 14848 | ||||
|  | ||||
|     li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] | ||||
|     msg = "Names should be list-like for a MultiIndex" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         Index(li, name="abc") | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         Index(li, name="a") | ||||
|  | ||||
|  | ||||
| def test_from_tuples_with_tuple_label(): | ||||
|     # GH 15457 | ||||
|     expected = pd.DataFrame( | ||||
|         [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] | ||||
|     ).set_index(["a", "b"]) | ||||
|     idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) | ||||
|     result = pd.DataFrame([2, 3], columns=["c"], index=idx) | ||||
|     tm.assert_frame_equal(expected, result) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_product | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_product_empty_zero_levels(): | ||||
|     # 0 levels | ||||
|     msg = "Must pass non-zero number of levels/codes" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         MultiIndex.from_product([]) | ||||
|  | ||||
|  | ||||
| def test_from_product_empty_one_level(): | ||||
|     result = MultiIndex.from_product([[]], names=["A"]) | ||||
|     expected = Index([], name="A") | ||||
|     tm.assert_index_equal(result.levels[0], expected) | ||||
|     assert result.names == ["A"] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] | ||||
| ) | ||||
| def test_from_product_empty_two_levels(first, second): | ||||
|     names = ["A", "B"] | ||||
|     result = MultiIndex.from_product([first, second], names=names) | ||||
|     expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("N", list(range(4))) | ||||
| def test_from_product_empty_three_levels(N): | ||||
|     # GH12258 | ||||
|     names = ["A", "B", "C"] | ||||
|     lvl2 = list(range(N)) | ||||
|     result = MultiIndex.from_product([[], lvl2, []], names=names) | ||||
|     expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] | ||||
| ) | ||||
| def test_from_product_invalid_input(invalid_input): | ||||
|     msg = r"Input must be a list / sequence of iterables|Input must be list-like" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_product(iterables=invalid_input) | ||||
|  | ||||
|  | ||||
| def test_from_product_datetimeindex(): | ||||
|     dt_index = date_range("2000-01-01", periods=2) | ||||
|     mi = MultiIndex.from_product([[1, 2], dt_index]) | ||||
|     etalon = construct_1d_object_array_from_listlike( | ||||
|         [ | ||||
|             (1, Timestamp("2000-01-01")), | ||||
|             (1, Timestamp("2000-01-02")), | ||||
|             (2, Timestamp("2000-01-01")), | ||||
|             (2, Timestamp("2000-01-02")), | ||||
|         ] | ||||
|     ) | ||||
|     tm.assert_numpy_array_equal(mi.values, etalon) | ||||
|  | ||||
|  | ||||
| def test_from_product_rangeindex(): | ||||
|     # RangeIndex is preserved by factorize, so preserved in levels | ||||
|     rng = Index(range(5)) | ||||
|     other = ["a", "b"] | ||||
|     mi = MultiIndex.from_product([rng, other]) | ||||
|     tm.assert_index_equal(mi._levels[0], rng, exact=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [False, True]) | ||||
| @pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values]) | ||||
| def test_from_product_index_series_categorical(ordered, f): | ||||
|     # GH13743 | ||||
|     first = ["foo", "bar"] | ||||
|  | ||||
|     idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) | ||||
|     expected = pd.CategoricalIndex( | ||||
|         list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_product([first, f(idx)]) | ||||
|     tm.assert_index_equal(result.get_level_values(1), expected) | ||||
|  | ||||
|  | ||||
| def test_from_product(): | ||||
|     first = ["foo", "bar", "buz"] | ||||
|     second = ["a", "b", "c"] | ||||
|     names = ["first", "second"] | ||||
|     result = MultiIndex.from_product([first, second], names=names) | ||||
|  | ||||
|     tuples = [ | ||||
|         ("foo", "a"), | ||||
|         ("foo", "b"), | ||||
|         ("foo", "c"), | ||||
|         ("bar", "a"), | ||||
|         ("bar", "b"), | ||||
|         ("bar", "c"), | ||||
|         ("buz", "a"), | ||||
|         ("buz", "b"), | ||||
|         ("buz", "c"), | ||||
|     ] | ||||
|     expected = MultiIndex.from_tuples(tuples, names=names) | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_iterator(): | ||||
|     # GH 18434 | ||||
|     first = ["foo", "bar", "buz"] | ||||
|     second = ["a", "b", "c"] | ||||
|     names = ["first", "second"] | ||||
|     tuples = [ | ||||
|         ("foo", "a"), | ||||
|         ("foo", "b"), | ||||
|         ("foo", "c"), | ||||
|         ("bar", "a"), | ||||
|         ("bar", "b"), | ||||
|         ("bar", "c"), | ||||
|         ("buz", "a"), | ||||
|         ("buz", "b"), | ||||
|         ("buz", "c"), | ||||
|     ] | ||||
|     expected = MultiIndex.from_tuples(tuples, names=names) | ||||
|  | ||||
|     # iterator as input | ||||
|     result = MultiIndex.from_product(iter([first, second]), names=names) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # Invalid non-iterable input | ||||
|     msg = "Input must be a list / sequence of iterables." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         MultiIndex.from_product(0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b, expected_names", | ||||
|     [ | ||||
|         ( | ||||
|             Series([1, 2, 3], name="foo"), | ||||
|             Series(["a", "b"], name="bar"), | ||||
|             ["foo", "bar"], | ||||
|         ), | ||||
|         (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), | ||||
|         ([1, 2, 3], ["a", "b"], None), | ||||
|     ], | ||||
| ) | ||||
| def test_from_product_infer_names(a, b, expected_names): | ||||
|     # GH27292 | ||||
|     result = MultiIndex.from_product([a, b]) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b"]], | ||||
|         codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], | ||||
|         names=expected_names, | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_respects_none_names(): | ||||
|     # GH27292 | ||||
|     a = Series([1, 2, 3], name="foo") | ||||
|     b = Series(["a", "b"], name="bar") | ||||
|  | ||||
|     result = MultiIndex.from_product([a, b], names=None) | ||||
|     expected = MultiIndex( | ||||
|         levels=[[1, 2, 3], ["a", "b"]], | ||||
|         codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], | ||||
|         names=None, | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_product_readonly(): | ||||
|     # GH#15286 passing read-only array to from_product | ||||
|     a = np.array(range(3)) | ||||
|     b = ["a", "b"] | ||||
|     expected = MultiIndex.from_product([a, b]) | ||||
|  | ||||
|     a.setflags(write=False) | ||||
|     result = MultiIndex.from_product([a, b]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_create_index_existing_name(idx): | ||||
|     # GH11193, when an existing index is passed, and a new name is not | ||||
|     # specified, the new index should inherit the previous object name | ||||
|     index = idx | ||||
|     index.names = ["foo", "bar"] | ||||
|     result = Index(index) | ||||
|     expected = Index( | ||||
|         Index( | ||||
|             [ | ||||
|                 ("foo", "one"), | ||||
|                 ("foo", "two"), | ||||
|                 ("bar", "one"), | ||||
|                 ("baz", "two"), | ||||
|                 ("qux", "one"), | ||||
|                 ("qux", "two"), | ||||
|             ], | ||||
|             dtype="object", | ||||
|         ) | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = Index(index, name="A") | ||||
|     expected = Index( | ||||
|         Index( | ||||
|             [ | ||||
|                 ("foo", "one"), | ||||
|                 ("foo", "two"), | ||||
|                 ("bar", "one"), | ||||
|                 ("baz", "two"), | ||||
|                 ("qux", "one"), | ||||
|                 ("qux", "two"), | ||||
|             ], | ||||
|             dtype="object", | ||||
|         ), | ||||
|         name="A", | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # from_frame | ||||
| # ---------------------------------------------------------------------------- | ||||
| def test_from_frame(): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] | ||||
|     ) | ||||
|     expected = MultiIndex.from_tuples( | ||||
|         [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] | ||||
|     ) | ||||
|     result = MultiIndex.from_frame(df) | ||||
|     tm.assert_index_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_from_frame_missing_values_multiIndex(): | ||||
|     # GH 39984 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "a": Series([1, 2, None], dtype="Int64"), | ||||
|             "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), | ||||
|         } | ||||
|     ) | ||||
|     multi_indexed = MultiIndex.from_frame(df) | ||||
|     expected = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             Series([1, 2, None]).astype("Int64"), | ||||
|             pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])), | ||||
|         ], | ||||
|         names=["a", "b"], | ||||
|     ) | ||||
|     tm.assert_index_equal(multi_indexed, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "non_frame", | ||||
|     [ | ||||
|         Series([1, 2, 3, 4]), | ||||
|         [1, 2, 3, 4], | ||||
|         [[1, 2], [3, 4], [5, 6]], | ||||
|         Index([1, 2, 3, 4]), | ||||
|         np.array([[1, 2], [3, 4], [5, 6]]), | ||||
|         27, | ||||
|     ], | ||||
| ) | ||||
| def test_from_frame_error(non_frame): | ||||
|     # GH 22420 | ||||
|     with pytest.raises(TypeError, match="Input must be a DataFrame"): | ||||
|         MultiIndex.from_frame(non_frame) | ||||
|  | ||||
|  | ||||
| def test_from_frame_dtype_fidelity(): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "dates": date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             "a": [1, 1, 1, 2, 2, 2], | ||||
|             "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             "c": ["x", "x", "y", "z", "x", "y"], | ||||
|         } | ||||
|     ) | ||||
|     original_dtypes = df.dtypes.to_dict() | ||||
|  | ||||
|     expected_mi = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             [1, 1, 1, 2, 2, 2], | ||||
|             pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             ["x", "x", "y", "z", "x", "y"], | ||||
|         ], | ||||
|         names=["dates", "a", "b", "c"], | ||||
|     ) | ||||
|     mi = MultiIndex.from_frame(df) | ||||
|     mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} | ||||
|  | ||||
|     tm.assert_index_equal(expected_mi, mi) | ||||
|     assert original_dtypes == mi_dtypes | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] | ||||
| ) | ||||
| def test_from_frame_valid_names(names_in, names_out): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], | ||||
|         columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), | ||||
|     ) | ||||
|     mi = MultiIndex.from_frame(df, names=names_in) | ||||
|     assert mi.names == names_out | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "names,expected_error_msg", | ||||
|     [ | ||||
|         ("bad_input", "Names should be list-like for a MultiIndex"), | ||||
|         (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), | ||||
|     ], | ||||
| ) | ||||
| def test_from_frame_invalid_names(names, expected_error_msg): | ||||
|     # GH 22420 | ||||
|     df = pd.DataFrame( | ||||
|         [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], | ||||
|         columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=expected_error_msg): | ||||
|         MultiIndex.from_frame(df, names=names) | ||||
|  | ||||
|  | ||||
| def test_index_equal_empty_iterable(): | ||||
|     # #16844 | ||||
|     a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) | ||||
|     b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) | ||||
|     tm.assert_index_equal(a, b) | ||||
|  | ||||
|  | ||||
| def test_raise_invalid_sortorder(): | ||||
|     # Test that the MultiIndex constructor raise when a incorrect sortorder is given | ||||
|     # GH#28518 | ||||
|  | ||||
|     levels = [[0, 1], [0, 1, 2]] | ||||
|  | ||||
|     # Correct sortorder | ||||
|     MultiIndex( | ||||
|         levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): | ||||
|         MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2 | ||||
|         ) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): | ||||
|         MultiIndex( | ||||
|             levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1 | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def test_datetimeindex(): | ||||
|     idx1 = pd.DatetimeIndex( | ||||
|         ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" | ||||
|     ) | ||||
|     idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern") | ||||
|     idx = MultiIndex.from_arrays([idx1, idx2]) | ||||
|  | ||||
|     expected1 = pd.DatetimeIndex( | ||||
|         ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" | ||||
|     ) | ||||
|  | ||||
|     tm.assert_index_equal(idx.levels[0], expected1) | ||||
|     tm.assert_index_equal(idx.levels[1], idx2) | ||||
|  | ||||
|     # from datetime combos | ||||
|     # GH 7888 | ||||
|     date1 = np.datetime64("today") | ||||
|     date2 = datetime.today() | ||||
|     date3 = Timestamp.today() | ||||
|  | ||||
|     for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): | ||||
|         index = MultiIndex.from_product([[d1], [d2]]) | ||||
|         assert isinstance(index.levels[0], pd.DatetimeIndex) | ||||
|         assert isinstance(index.levels[1], pd.DatetimeIndex) | ||||
|  | ||||
|     # but NOT date objects, matching Index behavior | ||||
|     date4 = date.today() | ||||
|     index = MultiIndex.from_product([[date4], [date2]]) | ||||
|     assert not isinstance(index.levels[0], pd.DatetimeIndex) | ||||
|     assert isinstance(index.levels[1], pd.DatetimeIndex) | ||||
|  | ||||
|  | ||||
| def test_constructor_with_tz(): | ||||
|     index = pd.DatetimeIndex( | ||||
|         ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" | ||||
|     ) | ||||
|     columns = pd.DatetimeIndex( | ||||
|         ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" | ||||
|     ) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([index, columns]) | ||||
|  | ||||
|     assert result.names == ["dt1", "dt2"] | ||||
|     tm.assert_index_equal(result.levels[0], index) | ||||
|     tm.assert_index_equal(result.levels[1], columns) | ||||
|  | ||||
|     result = MultiIndex.from_arrays([Series(index), Series(columns)]) | ||||
|  | ||||
|     assert result.names == ["dt1", "dt2"] | ||||
|     tm.assert_index_equal(result.levels[0], index) | ||||
|     tm.assert_index_equal(result.levels[1], columns) | ||||
|  | ||||
|  | ||||
| def test_multiindex_inference_consistency(): | ||||
|     # check that inference behavior matches the base class | ||||
|  | ||||
|     v = date.today() | ||||
|  | ||||
|     arr = [v, v] | ||||
|  | ||||
|     idx = Index(arr) | ||||
|     assert idx.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_product([arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(x,) for x in arr]) | ||||
|     lev = mi.levels[0] | ||||
|     assert lev.dtype == object | ||||
|  | ||||
|  | ||||
| def test_dtype_representation(using_infer_string): | ||||
|     # GH#46900 | ||||
|     pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")]) | ||||
|     result = pmidx.dtypes | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = Series( | ||||
|         ["int64", exp], | ||||
|         index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]), | ||||
|         dtype=object, | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,201 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_to_numpy(idx): | ||||
|     result = idx.to_numpy() | ||||
|     exp = idx.values | ||||
|     tm.assert_numpy_array_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_array_interface(idx): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60046 | ||||
|     result = np.asarray(idx) | ||||
|     expected = np.empty((6,), dtype=object) | ||||
|     expected[:] = [ | ||||
|         ("foo", "one"), | ||||
|         ("foo", "two"), | ||||
|         ("bar", "one"), | ||||
|         ("baz", "two"), | ||||
|         ("qux", "one"), | ||||
|         ("qux", "two"), | ||||
|     ] | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # it always gives a copy by default, but the values are cached, so results | ||||
|     # are still sharing memory | ||||
|     result_copy1 = np.asarray(idx) | ||||
|     result_copy2 = np.asarray(idx) | ||||
|     assert np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     # with explicit copy=True, then it is an actual copy | ||||
|     result_copy1 = np.array(idx, copy=True) | ||||
|     result_copy2 = np.array(idx, copy=True) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     if not np_version_gt2: | ||||
|         # copy=False semantics are only supported in NumPy>=2. | ||||
|         return | ||||
|  | ||||
|     # for MultiIndex, copy=False is never allowed | ||||
|     msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         np.array(idx, copy=False) | ||||
|  | ||||
|  | ||||
| def test_to_frame(): | ||||
|     tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] | ||||
|  | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame(tuples) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] | ||||
|     index = MultiIndex.from_tuples(tuples, names=["first", "second"]) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame(tuples) | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # See GH-22580 | ||||
|     index = MultiIndex.from_tuples(tuples) | ||||
|     result = index.to_frame(index=False, name=["first", "second"]) | ||||
|     expected = DataFrame(tuples) | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame(name=["first", "second"]) | ||||
|     expected.index = index | ||||
|     expected.columns = ["first", "second"] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     msg = "'name' must be a list / sequence of column names." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         index.to_frame(name="first") | ||||
|  | ||||
|     msg = "'name' should have same length as number of levels on index." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index.to_frame(name=["first"]) | ||||
|  | ||||
|     # Tests for datetime index | ||||
|     index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) | ||||
|     result = index.to_frame(index=False) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             0: np.repeat(np.arange(5, dtype="int64"), 3), | ||||
|             1: np.tile(pd.date_range("20130101", periods=3), 5), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame() | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # See GH-22580 | ||||
|     result = index.to_frame(index=False, name=["first", "second"]) | ||||
|     expected = DataFrame( | ||||
|         { | ||||
|             "first": np.repeat(np.arange(5, dtype="int64"), 3), | ||||
|             "second": np.tile(pd.date_range("20130101", periods=3), 5), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = index.to_frame(name=["first", "second"]) | ||||
|     expected.index = index | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_frame_dtype_fidelity(): | ||||
|     # GH 22420 | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             pd.date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             [1, 1, 1, 2, 2, 2], | ||||
|             pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             ["x", "x", "y", "z", "x", "y"], | ||||
|         ], | ||||
|         names=["dates", "a", "b", "c"], | ||||
|     ) | ||||
|     original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} | ||||
|  | ||||
|     expected_df = DataFrame( | ||||
|         { | ||||
|             "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), | ||||
|             "a": [1, 1, 1, 2, 2, 2], | ||||
|             "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), | ||||
|             "c": ["x", "x", "y", "z", "x", "y"], | ||||
|         } | ||||
|     ) | ||||
|     df = mi.to_frame(index=False) | ||||
|     df_dtypes = df.dtypes.to_dict() | ||||
|  | ||||
|     tm.assert_frame_equal(df, expected_df) | ||||
|     assert original_dtypes == df_dtypes | ||||
|  | ||||
|  | ||||
| def test_to_frame_resulting_column_order(): | ||||
|     # GH 22420 | ||||
|     expected = ["z", 0, "a"] | ||||
|     mi = MultiIndex.from_arrays( | ||||
|         [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected | ||||
|     ) | ||||
|     result = mi.to_frame().columns.tolist() | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_to_frame_duplicate_labels(): | ||||
|     # GH 45245 | ||||
|     data = [(1, 2), (3, 4)] | ||||
|     names = ["a", "a"] | ||||
|     index = MultiIndex.from_tuples(data, names=names) | ||||
|     with pytest.raises(ValueError, match="Cannot create duplicate column labels"): | ||||
|         index.to_frame() | ||||
|  | ||||
|     result = index.to_frame(allow_duplicates=True) | ||||
|     expected = DataFrame(data, index=index, columns=names) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     names = [None, 0] | ||||
|     index = MultiIndex.from_tuples(data, names=names) | ||||
|     with pytest.raises(ValueError, match="Cannot create duplicate column labels"): | ||||
|         index.to_frame() | ||||
|  | ||||
|     result = index.to_frame(allow_duplicates=True) | ||||
|     expected = DataFrame(data, index=index, columns=[0, 0]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_flat_index(idx): | ||||
|     expected = pd.Index( | ||||
|         ( | ||||
|             ("foo", "one"), | ||||
|             ("foo", "two"), | ||||
|             ("bar", "one"), | ||||
|             ("baz", "two"), | ||||
|             ("qux", "one"), | ||||
|             ("qux", "two"), | ||||
|         ), | ||||
|         tupleize_cols=False, | ||||
|     ) | ||||
|     result = idx.to_flat_index() | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,96 @@ | ||||
| from copy import ( | ||||
|     copy, | ||||
|     deepcopy, | ||||
| ) | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def assert_multiindex_copied(copy, original): | ||||
|     # Levels should be (at least, shallow copied) | ||||
|     tm.assert_copy(copy.levels, original.levels) | ||||
|     tm.assert_almost_equal(copy.codes, original.codes) | ||||
|  | ||||
|     # Labels doesn't matter which way copied | ||||
|     tm.assert_almost_equal(copy.codes, original.codes) | ||||
|     assert copy.codes is not original.codes | ||||
|  | ||||
|     # Names doesn't matter which way copied | ||||
|     assert copy.names == original.names | ||||
|     assert copy.names is not original.names | ||||
|  | ||||
|     # Sort order should be copied | ||||
|     assert copy.sortorder == original.sortorder | ||||
|  | ||||
|  | ||||
| def test_copy(idx): | ||||
|     i_copy = idx.copy() | ||||
|  | ||||
|     assert_multiindex_copied(i_copy, idx) | ||||
|  | ||||
|  | ||||
| def test_shallow_copy(idx): | ||||
|     i_copy = idx._view() | ||||
|  | ||||
|     assert_multiindex_copied(i_copy, idx) | ||||
|  | ||||
|  | ||||
| def test_view(idx): | ||||
|     i_view = idx.view() | ||||
|     assert_multiindex_copied(i_view, idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [copy, deepcopy]) | ||||
| def test_copy_and_deepcopy(func): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = func(idx) | ||||
|     assert idx_copy is not idx | ||||
|     assert idx_copy.equals(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("deep", [True, False]) | ||||
| def test_copy_method(deep): | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = idx.copy(deep=deep) | ||||
|     assert idx_copy.equals(idx) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("deep", [True, False]) | ||||
| @pytest.mark.parametrize( | ||||
|     "kwarg, value", | ||||
|     [ | ||||
|         ("names", ["third", "fourth"]), | ||||
|     ], | ||||
| ) | ||||
| def test_copy_method_kwargs(deep, kwarg, value): | ||||
|     # gh-12309: Check that the "name" argument as well other kwargs are honored | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|     idx_copy = idx.copy(**{kwarg: value, "deep": deep}) | ||||
|     assert getattr(idx_copy, kwarg) == value | ||||
|  | ||||
|  | ||||
| def test_copy_deep_false_retains_id(): | ||||
|     # GH#47878 | ||||
|     idx = MultiIndex( | ||||
|         levels=[["foo", "bar"], ["fizz", "buzz"]], | ||||
|         codes=[[0, 0, 0, 1], [0, 0, 1, 1]], | ||||
|         names=["first", "second"], | ||||
|     ) | ||||
|  | ||||
|     res = idx.copy(deep=False) | ||||
|     assert res._id is idx._id | ||||
| @ -0,0 +1,190 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import PerformanceWarning | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_drop(idx): | ||||
|     dropped = idx.drop([("foo", "two"), ("qux", "one")]) | ||||
|  | ||||
|     index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) | ||||
|     dropped2 = idx.drop(index) | ||||
|  | ||||
|     expected = idx[[0, 2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|     tm.assert_index_equal(dropped2, expected) | ||||
|  | ||||
|     dropped = idx.drop(["bar"]) | ||||
|     expected = idx[[0, 1, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop("foo") | ||||
|     expected = idx[[2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     index = MultiIndex.from_tuples([("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop([("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop(index) | ||||
|     with pytest.raises(KeyError, match=r"^'two'$"): | ||||
|         idx.drop(["foo", "two"]) | ||||
|  | ||||
|     # partially correct argument | ||||
|     mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) | ||||
|     with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): | ||||
|         idx.drop(mixed_index) | ||||
|  | ||||
|     # error='ignore' | ||||
|     dropped = idx.drop(index, errors="ignore") | ||||
|     expected = idx[[0, 1, 2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop(mixed_index, errors="ignore") | ||||
|     expected = idx[[0, 1, 2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     dropped = idx.drop(["foo", "two"], errors="ignore") | ||||
|     expected = idx[[2, 3, 4, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     # mixed partial / full drop | ||||
|     dropped = idx.drop(["foo", ("qux", "one")]) | ||||
|     expected = idx[[2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|     # mixed partial / full drop / error='ignore' | ||||
|     mixed_index = ["foo", ("qux", "one"), "two"] | ||||
|     with pytest.raises(KeyError, match=r"^'two'$"): | ||||
|         idx.drop(mixed_index) | ||||
|     dropped = idx.drop(mixed_index, errors="ignore") | ||||
|     expected = idx[[2, 3, 5]] | ||||
|     tm.assert_index_equal(dropped, expected) | ||||
|  | ||||
|  | ||||
| def test_droplevel_with_names(idx): | ||||
|     index = idx[idx.get_loc("foo")] | ||||
|     dropped = index.droplevel(0) | ||||
|     assert dropped.name == "second" | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(range(4)), Index(range(4)), Index(range(4))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|         names=["one", "two", "three"], | ||||
|     ) | ||||
|     dropped = index.droplevel(0) | ||||
|     assert dropped.names == ("two", "three") | ||||
|  | ||||
|     dropped = index.droplevel("two") | ||||
|     expected = index.droplevel(1) | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_droplevel_list(): | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(range(4)), Index(range(4)), Index(range(4))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|         names=["one", "two", "three"], | ||||
|     ) | ||||
|  | ||||
|     dropped = index[:2].droplevel(["three", "one"]) | ||||
|     expected = index[:2].droplevel(2).droplevel(0) | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|     dropped = index[:2].droplevel([]) | ||||
|     expected = index[:2] | ||||
|     assert dropped.equals(expected) | ||||
|  | ||||
|     msg = ( | ||||
|         "Cannot remove 3 levels from an index with 3 levels: " | ||||
|         "at least one level must be left" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         index[:2].droplevel(["one", "two", "three"]) | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'Level four not found'"): | ||||
|         index[:2].droplevel(["one", "four"]) | ||||
|  | ||||
|  | ||||
| def test_drop_not_lexsorted(): | ||||
|     # GH 12078 | ||||
|  | ||||
|     # define the lexsorted version of the multi-index | ||||
|     tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] | ||||
|     lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) | ||||
|     assert lexsorted_mi._is_lexsorted() | ||||
|  | ||||
|     # and the not-lexsorted version | ||||
|     df = pd.DataFrame( | ||||
|         columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] | ||||
|     ) | ||||
|     df = df.pivot_table(index="a", columns=["b", "c"], values="d") | ||||
|     df = df.reset_index() | ||||
|     not_lexsorted_mi = df.columns | ||||
|     assert not not_lexsorted_mi._is_lexsorted() | ||||
|  | ||||
|     # compare the results | ||||
|     tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) | ||||
|     with tm.assert_produces_warning(PerformanceWarning): | ||||
|         tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) | ||||
|  | ||||
|  | ||||
| def test_drop_with_nan_in_index(nulls_fixture): | ||||
|     # GH#18853 | ||||
|     mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"]) | ||||
|     msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop(pd.Timestamp("2001"), level="date") | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") | ||||
| def test_drop_with_non_monotonic_duplicates(): | ||||
|     # GH#33494 | ||||
|     mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)]) | ||||
|     result = mi.drop((1, 2)) | ||||
|     expected = MultiIndex.from_tuples([(2, 3)]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_single_level_drop_partially_missing_elements(): | ||||
|     # GH 37820 | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)]) | ||||
|     msg = r"labels \[4\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop(4, level=0) | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([1, 4], level=0) | ||||
|     msg = r"labels \[nan\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan], level=0) | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan, 1, 2, 3], level=0) | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)]) | ||||
|     msg = r"labels \['a'\] not found in level" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         mi.drop([np.nan, 1, "a"], level=0) | ||||
|  | ||||
|  | ||||
| def test_droplevel_multiindex_one_level(): | ||||
|     # GH#37208 | ||||
|     index = MultiIndex.from_tuples([(2,)], names=("b",)) | ||||
|     result = index.droplevel([]) | ||||
|     expected = Index([2], name="b") | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,363 @@ | ||||
| from itertools import product | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import ( | ||||
|     hashtable, | ||||
|     index as libindex, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def idx_dup(): | ||||
|     # compare tests/indexes/multi/conftest.py | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 0, 1, 1]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|     index_names = ["first", "second"] | ||||
|     mi = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], | ||||
|         codes=[major_codes, minor_codes], | ||||
|         names=index_names, | ||||
|         verify_integrity=False, | ||||
|     ) | ||||
|     return mi | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("names", [None, ["first", "second"]]) | ||||
| def test_unique(names): | ||||
|     mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) | ||||
|  | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names) | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names) | ||||
|     res = mi.unique() | ||||
|     exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names) | ||||
|     tm.assert_index_equal(res, exp) | ||||
|  | ||||
|     # GH #20568 - empty MI | ||||
|     mi = MultiIndex.from_arrays([[], []], names=names) | ||||
|     res = mi.unique() | ||||
|     tm.assert_index_equal(mi, res) | ||||
|  | ||||
|  | ||||
| def test_unique_datetimelike(): | ||||
|     idx1 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"] | ||||
|     ) | ||||
|     idx2 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"], | ||||
|         tz="Asia/Tokyo", | ||||
|     ) | ||||
|     result = MultiIndex.from_arrays([idx1, idx2]).unique() | ||||
|  | ||||
|     eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"]) | ||||
|     eidx2 = DatetimeIndex( | ||||
|         ["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo" | ||||
|     ) | ||||
|     exp = MultiIndex.from_arrays([eidx1, eidx2]) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("level", [0, "first", 1, "second"]) | ||||
| def test_unique_level(idx, level): | ||||
|     # GH #17896 - with level= argument | ||||
|     result = idx.unique(level=level) | ||||
|     expected = idx.get_level_values(level).unique() | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # With already unique level | ||||
|     mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"]) | ||||
|     result = mi.unique(level=level) | ||||
|     expected = mi.get_level_values(level) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # With empty MI | ||||
|     mi = MultiIndex.from_arrays([[], []], names=["first", "second"]) | ||||
|     result = mi.unique(level=level) | ||||
|     expected = mi.get_level_values(level) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicate_multiindex_codes(): | ||||
|     # GH 17464 | ||||
|     # Make sure that a MultiIndex with duplicate levels throws a ValueError | ||||
|     msg = r"Level values must be unique: \[[A', ]+\] on level 0" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)]) | ||||
|  | ||||
|     # And that using set_levels with duplicate levels fails | ||||
|     mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) | ||||
|     msg = r"Level values must be unique: \[[AB', ]+\] on level 0" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) | ||||
| def test_duplicate_level_names(names): | ||||
|     # GH18872, GH19029 | ||||
|     mi = MultiIndex.from_product([[0, 1]] * 3, names=names) | ||||
|     assert mi.names == names | ||||
|  | ||||
|     # With .rename() | ||||
|     mi = MultiIndex.from_product([[0, 1]] * 3) | ||||
|     mi = mi.rename(names) | ||||
|     assert mi.names == names | ||||
|  | ||||
|     # With .rename(., level=) | ||||
|     mi.rename(names[1], level=1, inplace=True) | ||||
|     mi = mi.rename([names[0], names[2]], level=[0, 2]) | ||||
|     assert mi.names == names | ||||
|  | ||||
|  | ||||
| def test_duplicate_meta_data(): | ||||
|     # GH 10115 | ||||
|     mi = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] | ||||
|     ) | ||||
|  | ||||
|     for idx in [ | ||||
|         mi, | ||||
|         mi.set_names([None, None]), | ||||
|         mi.set_names([None, "Num"]), | ||||
|         mi.set_names(["Upper", "Num"]), | ||||
|     ]: | ||||
|         assert idx.has_duplicates | ||||
|         assert idx.drop_duplicates().names == idx.names | ||||
|  | ||||
|  | ||||
| def test_has_duplicates(idx, idx_dup): | ||||
|     # see fixtures | ||||
|     assert idx.is_unique is True | ||||
|     assert idx.has_duplicates is False | ||||
|     assert idx_dup.is_unique is False | ||||
|     assert idx_dup.has_duplicates is True | ||||
|  | ||||
|     mi = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] | ||||
|     ) | ||||
|     assert mi.is_unique is False | ||||
|     assert mi.has_duplicates is True | ||||
|  | ||||
|     # single instance of NaN | ||||
|     mi_nan = MultiIndex( | ||||
|         levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]] | ||||
|     ) | ||||
|     assert mi_nan.is_unique is True | ||||
|     assert mi_nan.has_duplicates is False | ||||
|  | ||||
|     # multiple instances of NaN | ||||
|     mi_nan_dup = MultiIndex( | ||||
|         levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]] | ||||
|     ) | ||||
|     assert mi_nan_dup.is_unique is False | ||||
|     assert mi_nan_dup.has_duplicates is True | ||||
|  | ||||
|  | ||||
| def test_has_duplicates_from_tuples(): | ||||
|     # GH 9075 | ||||
|     t = [ | ||||
|         ("x", "out", "z", 5, "y", "in", "z", 169), | ||||
|         ("x", "out", "z", 7, "y", "in", "z", 119), | ||||
|         ("x", "out", "z", 9, "y", "in", "z", 135), | ||||
|         ("x", "out", "z", 13, "y", "in", "z", 145), | ||||
|         ("x", "out", "z", 14, "y", "in", "z", 158), | ||||
|         ("x", "out", "z", 16, "y", "in", "z", 122), | ||||
|         ("x", "out", "z", 17, "y", "in", "z", 160), | ||||
|         ("x", "out", "z", 18, "y", "in", "z", 180), | ||||
|         ("x", "out", "z", 20, "y", "in", "z", 143), | ||||
|         ("x", "out", "z", 21, "y", "in", "z", 128), | ||||
|         ("x", "out", "z", 22, "y", "in", "z", 129), | ||||
|         ("x", "out", "z", 25, "y", "in", "z", 111), | ||||
|         ("x", "out", "z", 28, "y", "in", "z", 114), | ||||
|         ("x", "out", "z", 29, "y", "in", "z", 121), | ||||
|         ("x", "out", "z", 31, "y", "in", "z", 126), | ||||
|         ("x", "out", "z", 32, "y", "in", "z", 155), | ||||
|         ("x", "out", "z", 33, "y", "in", "z", 123), | ||||
|         ("x", "out", "z", 12, "y", "in", "z", 144), | ||||
|     ] | ||||
|  | ||||
|     mi = MultiIndex.from_tuples(t) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("nlevels", [4, 8]) | ||||
| @pytest.mark.parametrize("with_nulls", [True, False]) | ||||
| def test_has_duplicates_overflow(nlevels, with_nulls): | ||||
|     # handle int64 overflow if possible | ||||
|     # no overflow with 4 | ||||
|     # overflow possible with 8 | ||||
|     codes = np.tile(np.arange(500), 2) | ||||
|     level = np.arange(500) | ||||
|  | ||||
|     if with_nulls:  # inject some null values | ||||
|         codes[500] = -1  # common nan value | ||||
|         codes = [codes.copy() for i in range(nlevels)] | ||||
|         for i in range(nlevels): | ||||
|             codes[i][500 + i - nlevels // 2] = -1 | ||||
|  | ||||
|         codes += [np.array([-1, 1]).repeat(500)] | ||||
|     else: | ||||
|         codes = [codes] * nlevels + [np.arange(2).repeat(500)] | ||||
|  | ||||
|     levels = [level] * nlevels + [[0, 1]] | ||||
|  | ||||
|     # no dups | ||||
|     mi = MultiIndex(levels=levels, codes=codes) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     # with a dup | ||||
|     if with_nulls: | ||||
|  | ||||
|         def f(a): | ||||
|             return np.insert(a, 1000, a[0]) | ||||
|  | ||||
|         codes = list(map(f, codes)) | ||||
|         mi = MultiIndex(levels=levels, codes=codes) | ||||
|     else: | ||||
|         values = mi.values.tolist() | ||||
|         mi = MultiIndex.from_tuples(values + [values[0]]) | ||||
|  | ||||
|     assert mi.has_duplicates | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", np.array([False, False, False, True, True, False])), | ||||
|         ("last", np.array([False, True, True, False, False, False])), | ||||
|         (False, np.array([False, True, True, True, True, False])), | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated(idx_dup, keep, expected): | ||||
|     result = idx_dup.duplicated(keep=keep) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.arm_slow | ||||
| def test_duplicated_hashtable_impl(keep, monkeypatch): | ||||
|     # GH 9125 | ||||
|     n, k = 6, 10 | ||||
|     levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)] | ||||
|     codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels] | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", 50) | ||||
|         mi = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         result = mi.duplicated(keep=keep) | ||||
|         expected = hashtable.duplicated(mi.values, keep=keep) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [101, 102]) | ||||
| def test_duplicated_with_nan(val): | ||||
|     # GH5873 | ||||
|     mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]]) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("n", range(1, 6)) | ||||
| @pytest.mark.parametrize("m", range(1, 5)) | ||||
| def test_duplicated_with_nan_multi_shape(n, m): | ||||
|     # GH5873 | ||||
|     # all possible unique combinations, including nan | ||||
|     codes = product(range(-1, n), range(-1, m)) | ||||
|     mi = MultiIndex( | ||||
|         levels=[list("abcde")[:n], list("WXYZ")[:m]], | ||||
|         codes=np.random.default_rng(2).permutation(list(codes)).T, | ||||
|     ) | ||||
|     assert len(mi) == (n + 1) * (m + 1) | ||||
|     assert not mi.has_duplicates | ||||
|  | ||||
|     tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool")) | ||||
|  | ||||
|  | ||||
| def test_duplicated_drop_duplicates(): | ||||
|     # GH#4060 | ||||
|     idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) | ||||
|  | ||||
|     expected = np.array([False, False, False, True, False, False], dtype=bool) | ||||
|     duplicated = idx.duplicated() | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(), expected) | ||||
|  | ||||
|     expected = np.array([True, False, False, False, False, False]) | ||||
|     duplicated = idx.duplicated(keep="last") | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) | ||||
|  | ||||
|     expected = np.array([True, False, False, True, False, False]) | ||||
|     duplicated = idx.duplicated(keep=False) | ||||
|     tm.assert_numpy_array_equal(duplicated, expected) | ||||
|     assert duplicated.dtype == bool | ||||
|     expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) | ||||
|     tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     [ | ||||
|         np.complex64, | ||||
|         np.complex128, | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated_series_complex_numbers(dtype): | ||||
|     # GH 17927 | ||||
|     expected = Series( | ||||
|         [False, False, False, True, False, False, False, True, False, True], | ||||
|         dtype=bool, | ||||
|     ) | ||||
|     result = Series( | ||||
|         [ | ||||
|             np.nan + np.nan * 1j, | ||||
|             0, | ||||
|             1j, | ||||
|             1j, | ||||
|             1, | ||||
|             1 + 1j, | ||||
|             1 + 2j, | ||||
|             1 + 1j, | ||||
|             np.nan, | ||||
|             np.nan + np.nan * 1j, | ||||
|         ], | ||||
|         dtype=dtype, | ||||
|     ).duplicated() | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_midx_unique_ea_dtype(): | ||||
|     # GH#48335 | ||||
|     vals_a = Series([1, 2, NA, NA], dtype="Int64") | ||||
|     vals_b = np.array([1, 2, 3, 3]) | ||||
|     midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"]) | ||||
|     result = midx.unique() | ||||
|  | ||||
|     exp_vals_a = Series([1, 2, NA], dtype="Int64") | ||||
|     exp_vals_b = np.array([1, 2, 3]) | ||||
|     expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,284 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_any_real_numeric_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_equals(idx): | ||||
|     assert idx.equals(idx) | ||||
|     assert idx.equals(idx.copy()) | ||||
|     assert idx.equals(idx.astype(object)) | ||||
|     assert idx.equals(idx.to_flat_index()) | ||||
|     assert idx.equals(idx.to_flat_index().astype("category")) | ||||
|  | ||||
|     assert not idx.equals(list(idx)) | ||||
|     assert not idx.equals(np.array(idx)) | ||||
|  | ||||
|     same_values = Index(idx, dtype=object) | ||||
|     assert idx.equals(same_values) | ||||
|     assert same_values.equals(idx) | ||||
|  | ||||
|     if idx.nlevels == 1: | ||||
|         # do not test MultiIndex | ||||
|         assert not idx.equals(Series(idx)) | ||||
|  | ||||
|  | ||||
| def test_equals_op(idx): | ||||
|     # GH9947, GH10637 | ||||
|     index_a = idx | ||||
|  | ||||
|     n = len(index_a) | ||||
|     index_b = index_a[0:-1] | ||||
|     index_c = index_a[0:-1].append(index_a[-2:-1]) | ||||
|     index_d = index_a[0:1] | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == index_b | ||||
|     expected1 = np.array([True] * n) | ||||
|     expected2 = np.array([True] * (n - 1) + [False]) | ||||
|     tm.assert_numpy_array_equal(index_a == index_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == index_c, expected2) | ||||
|  | ||||
|     # test comparisons with numpy arrays | ||||
|     array_a = np.array(index_a) | ||||
|     array_b = np.array(index_a[0:-1]) | ||||
|     array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) | ||||
|     array_d = np.array(index_a[0:1]) | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == array_b | ||||
|     tm.assert_numpy_array_equal(index_a == array_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == array_c, expected2) | ||||
|  | ||||
|     # test comparisons with Series | ||||
|     series_a = Series(array_a) | ||||
|     series_b = Series(array_b) | ||||
|     series_c = Series(array_c) | ||||
|     series_d = Series(array_d) | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == series_b | ||||
|  | ||||
|     tm.assert_numpy_array_equal(index_a == series_a, expected1) | ||||
|     tm.assert_numpy_array_equal(index_a == series_c, expected2) | ||||
|  | ||||
|     # cases where length is 1 for one of them | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == index_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == series_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         index_a == array_d | ||||
|     msg = "Can only compare identically-labeled Series objects" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         series_a == series_d | ||||
|     with pytest.raises(ValueError, match="Lengths must match"): | ||||
|         series_a == array_d | ||||
|  | ||||
|     # comparing with a scalar should broadcast; note that we are excluding | ||||
|     # MultiIndex because in this case each item in the index is a tuple of | ||||
|     # length 2, and therefore is considered an array of length 2 in the | ||||
|     # comparison instead of a scalar | ||||
|     if not isinstance(index_a, MultiIndex): | ||||
|         expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) | ||||
|         # assuming the 2nd to last item is unique in the data | ||||
|         item = index_a[-2] | ||||
|         tm.assert_numpy_array_equal(index_a == item, expected3) | ||||
|         tm.assert_series_equal(series_a == item, Series(expected3)) | ||||
|  | ||||
|  | ||||
| def test_compare_tuple(): | ||||
|     # GH#21517 | ||||
|     mi = MultiIndex.from_product([[1, 2]] * 2) | ||||
|  | ||||
|     all_false = np.array([False, False, False, False]) | ||||
|  | ||||
|     result = mi == mi[0] | ||||
|     expected = np.array([True, False, False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi != mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     result = mi < mi[0] | ||||
|     tm.assert_numpy_array_equal(result, all_false) | ||||
|  | ||||
|     result = mi <= mi[0] | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi > mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     result = mi >= mi[0] | ||||
|     tm.assert_numpy_array_equal(result, ~all_false) | ||||
|  | ||||
|  | ||||
| def test_compare_tuple_strs(): | ||||
|     # GH#34180 | ||||
|  | ||||
|     mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) | ||||
|  | ||||
|     result = mi == ("c", "a") | ||||
|     expected = np.array([False, False, True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = mi == ("c",) | ||||
|     expected = np.array([False, False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals_multi(idx): | ||||
|     assert idx.equals(idx) | ||||
|     assert not idx.equals(idx.values) | ||||
|     assert idx.equals(Index(idx.values)) | ||||
|  | ||||
|     assert idx.equal_levels(idx) | ||||
|     assert not idx.equals(idx[:-1]) | ||||
|     assert not idx.equals(idx[-1]) | ||||
|  | ||||
|     # different number of levels | ||||
|     index = MultiIndex( | ||||
|         levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], | ||||
|         codes=[ | ||||
|             np.array([0, 0, 1, 2, 2, 2, 3, 3]), | ||||
|             np.array([0, 1, 0, 0, 0, 1, 0, 1]), | ||||
|             np.array([1, 0, 1, 1, 0, 0, 1, 0]), | ||||
|         ], | ||||
|     ) | ||||
|  | ||||
|     index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) | ||||
|     assert not index.equals(index2) | ||||
|     assert not index.equal_levels(index2) | ||||
|  | ||||
|     # levels are different | ||||
|     major_axis = Index(list(range(4))) | ||||
|     minor_axis = Index(list(range(2))) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 1, 2, 2, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 0, 1, 0]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|     assert not idx.equals(index) | ||||
|     assert not idx.equal_levels(index) | ||||
|  | ||||
|     # some of the labels are different | ||||
|     major_axis = Index(["foo", "bar", "baz", "qux"]) | ||||
|     minor_axis = Index(["one", "two"]) | ||||
|  | ||||
|     major_codes = np.array([0, 0, 2, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 0, 1]) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|     assert not idx.equals(index) | ||||
|  | ||||
|  | ||||
| def test_identical(idx): | ||||
|     mi = idx.copy() | ||||
|     mi2 = idx.copy() | ||||
|     assert mi.identical(mi2) | ||||
|  | ||||
|     mi = mi.set_names(["new1", "new2"]) | ||||
|     assert mi.equals(mi2) | ||||
|     assert not mi.identical(mi2) | ||||
|  | ||||
|     mi2 = mi2.set_names(["new1", "new2"]) | ||||
|     assert mi.identical(mi2) | ||||
|  | ||||
|     mi4 = Index(mi.tolist(), tupleize_cols=False) | ||||
|     assert not mi.identical(mi4) | ||||
|     assert mi.equals(mi4) | ||||
|  | ||||
|  | ||||
| def test_equals_operator(idx): | ||||
|     # GH9785 | ||||
|     assert (idx == idx).all() | ||||
|  | ||||
|  | ||||
| def test_equals_missing_values(): | ||||
|     # make sure take is not using -1 | ||||
|     i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) | ||||
|     result = i[0:1].equals(i[0]) | ||||
|     assert not result | ||||
|     result = i[1:2].equals(i[1]) | ||||
|     assert not result | ||||
|  | ||||
|  | ||||
| def test_equals_missing_values_differently_sorted(): | ||||
|     # GH#38439 | ||||
|     mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) | ||||
|     mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) | ||||
|     assert not mi1.equals(mi2) | ||||
|  | ||||
|     mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) | ||||
|     assert mi1.equals(mi2) | ||||
|  | ||||
|  | ||||
| def test_is_(): | ||||
|     mi = MultiIndex.from_tuples(zip(range(10), range(10))) | ||||
|     assert mi.is_(mi) | ||||
|     assert mi.is_(mi.view()) | ||||
|     assert mi.is_(mi.view().view().view().view()) | ||||
|     mi2 = mi.view() | ||||
|     # names are metadata, they don't change id | ||||
|     mi2.names = ["A", "B"] | ||||
|     assert mi2.is_(mi) | ||||
|     assert mi.is_(mi2) | ||||
|  | ||||
|     assert not mi.is_(mi.set_names(["C", "D"])) | ||||
|     # levels are inherent properties, they change identity | ||||
|     mi3 = mi2.set_levels([list(range(10)), list(range(10))]) | ||||
|     assert not mi3.is_(mi2) | ||||
|     # shouldn't change | ||||
|     assert mi2.is_(mi) | ||||
|     mi4 = mi3.view() | ||||
|  | ||||
|     # GH 17464 - Remove duplicate MultiIndex levels | ||||
|     mi4 = mi4.set_levels([list(range(10)), list(range(10))]) | ||||
|     assert not mi4.is_(mi3) | ||||
|     mi5 = mi.view() | ||||
|     mi5 = mi5.set_levels(mi5.levels) | ||||
|     assert not mi5.is_(mi) | ||||
|  | ||||
|  | ||||
| def test_is_all_dates(idx): | ||||
|     assert not idx._is_all_dates | ||||
|  | ||||
|  | ||||
| def test_is_numeric(idx): | ||||
|     # MultiIndex is never numeric | ||||
|     assert not is_any_real_numeric_dtype(idx) | ||||
|  | ||||
|  | ||||
| def test_multiindex_compare(): | ||||
|     # GH 21149 | ||||
|     # Ensure comparison operations for MultiIndex with nlevels == 1 | ||||
|     # behave consistently with those for MultiIndex with nlevels > 1 | ||||
|  | ||||
|     midx = MultiIndex.from_product([[0, 1]]) | ||||
|  | ||||
|     # Equality self-test: MultiIndex object vs self | ||||
|     expected = Series([True, True]) | ||||
|     result = Series(midx == midx) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # Greater than comparison: MultiIndex object vs self | ||||
|     expected = Series([False, False]) | ||||
|     result = Series(midx > midx) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals_ea_int_regular_int(): | ||||
|     # GH#46026 | ||||
|     mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]]) | ||||
|     mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]]) | ||||
|     assert not mi1.equals(mi2) | ||||
|     assert not mi2.equals(mi1) | ||||
| @ -0,0 +1,249 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_format(idx): | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         idx.format() | ||||
|         idx[:0].format() | ||||
|  | ||||
|  | ||||
| def test_format_integer_names(): | ||||
|     index = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] | ||||
|     ) | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         index.format(names=True) | ||||
|  | ||||
|  | ||||
| def test_format_sparse_config(idx): | ||||
|     # GH1538 | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with pd.option_context("display.multi_sparse", False): | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = idx.format() | ||||
|     assert result[1] == "foo  two" | ||||
|  | ||||
|  | ||||
| def test_format_sparse_display(): | ||||
|     index = MultiIndex( | ||||
|         levels=[[0, 1], [0, 1], [0, 1], [0]], | ||||
|         codes=[ | ||||
|             [0, 0, 0, 1, 1, 1], | ||||
|             [0, 0, 1, 0, 0, 1], | ||||
|             [0, 1, 0, 0, 1, 0], | ||||
|             [0, 0, 0, 0, 0, 0], | ||||
|         ], | ||||
|     ) | ||||
|     msg = "MultiIndex.format is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = index.format() | ||||
|     assert result[3] == "1  0  0  0" | ||||
|  | ||||
|  | ||||
| def test_repr_with_unicode_data(): | ||||
|     with pd.option_context("display.encoding", "UTF-8"): | ||||
|         d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} | ||||
|         index = pd.DataFrame(d).set_index(["a", "b"]).index | ||||
|         assert "\\" not in repr(index)  # we don't want unicode-escaped | ||||
|  | ||||
|  | ||||
| def test_repr_roundtrip_raises(): | ||||
|     mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) | ||||
|     msg = "Must pass both levels and codes" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         eval(repr(mi)) | ||||
|  | ||||
|  | ||||
| def test_unicode_string_with_unicode(): | ||||
|     d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} | ||||
|     idx = pd.DataFrame(d).set_index(["a", "b"]).index | ||||
|     str(idx) | ||||
|  | ||||
|  | ||||
| def test_repr_max_seq_item_setting(idx): | ||||
|     # GH10182 | ||||
|     idx = idx.repeat(50) | ||||
|     with pd.option_context("display.max_seq_items", None): | ||||
|         repr(idx) | ||||
|         assert "..." not in str(idx) | ||||
|  | ||||
|  | ||||
| class TestRepr: | ||||
|     def test_unicode_repr_issues(self): | ||||
|         levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] | ||||
|         codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] | ||||
|         index = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         repr(index.levels) | ||||
|         repr(index.get_level_values(1)) | ||||
|  | ||||
|     def test_repr_max_seq_items_equal_to_n(self, idx): | ||||
|         # display.max_seq_items == n | ||||
|         with pd.option_context("display.max_seq_items", 6): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ('bar', 'one'), | ||||
|             ('baz', 'two'), | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'])""" | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_repr(self, idx): | ||||
|         result = idx[:1].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('foo', 'one')], | ||||
|            names=['first', 'second'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = idx.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ('bar', 'one'), | ||||
|             ('baz', 'two'), | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         with pd.option_context("display.max_seq_items", 5): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([('foo', 'one'), | ||||
|             ('foo', 'two'), | ||||
|             ... | ||||
|             ('qux', 'one'), | ||||
|             ('qux', 'two')], | ||||
|            names=['first', 'second'], length=6)""" | ||||
|             assert result == expected | ||||
|  | ||||
|         # display.max_seq_items == 1 | ||||
|         with pd.option_context("display.max_seq_items", 1): | ||||
|             result = idx.__repr__() | ||||
|             expected = """\ | ||||
| MultiIndex([... | ||||
|             ('qux', 'two')], | ||||
|            names=['first', ...], length=6)""" | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_rjust(self): | ||||
|         n = 1000 | ||||
|         ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) | ||||
|         dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) | ||||
|         mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) | ||||
|         result = mi[:1].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('a', 9, '2000-01-01 00:00:00')], | ||||
|            names=['a', 'b', 'dti'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi[::500].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00'), | ||||
|             (  'a',  9, '2000-01-01 00:08:20'), | ||||
|             ('abc', 10, '2000-01-01 00:16:40'), | ||||
|             ('abc', 10, '2000-01-01 00:25:00')], | ||||
|            names=['a', 'b', 'dti'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00'), | ||||
|             (  'a',  9, '2000-01-01 00:00:01'), | ||||
|             (  'a',  9, '2000-01-01 00:00:02'), | ||||
|             (  'a',  9, '2000-01-01 00:00:03'), | ||||
|             (  'a',  9, '2000-01-01 00:00:04'), | ||||
|             (  'a',  9, '2000-01-01 00:00:05'), | ||||
|             (  'a',  9, '2000-01-01 00:00:06'), | ||||
|             (  'a',  9, '2000-01-01 00:00:07'), | ||||
|             (  'a',  9, '2000-01-01 00:00:08'), | ||||
|             (  'a',  9, '2000-01-01 00:00:09'), | ||||
|             ... | ||||
|             ('abc', 10, '2000-01-01 00:33:10'), | ||||
|             ('abc', 10, '2000-01-01 00:33:11'), | ||||
|             ('abc', 10, '2000-01-01 00:33:12'), | ||||
|             ('abc', 10, '2000-01-01 00:33:13'), | ||||
|             ('abc', 10, '2000-01-01 00:33:14'), | ||||
|             ('abc', 10, '2000-01-01 00:33:15'), | ||||
|             ('abc', 10, '2000-01-01 00:33:16'), | ||||
|             ('abc', 10, '2000-01-01 00:33:17'), | ||||
|             ('abc', 10, '2000-01-01 00:33:18'), | ||||
|             ('abc', 10, '2000-01-01 00:33:19')], | ||||
|            names=['a', 'b', 'dti'], length=2000)""" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_tuple_width(self): | ||||
|         n = 1000 | ||||
|         ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) | ||||
|         dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) | ||||
|         levels = [ci, ci.codes + 9, dti, dti, dti] | ||||
|         names = ["a", "b", "dti_1", "dti_2", "dti_3"] | ||||
|         mi = MultiIndex.from_arrays(levels, names=names) | ||||
|         result = mi[:1].__repr__() | ||||
|         expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""  # noqa: E501 | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi[:10].__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), | ||||
|             ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" | ||||
|         assert result == expected | ||||
|  | ||||
|         result = mi.__repr__() | ||||
|         expected = """\ | ||||
| MultiIndex([(  'a',  9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), | ||||
|             (  'a',  9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), | ||||
|             ... | ||||
|             ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), | ||||
|             ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], | ||||
|            names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_multiindex_long_element(self): | ||||
|         # Non-regression test towards GH#52960 | ||||
|         data = MultiIndex.from_tuples([("c" * 62,)]) | ||||
|  | ||||
|         expected = ( | ||||
|             "MultiIndex([('cccccccccccccccccccccccccccccccccccccccc" | ||||
|             "cccccccccccccccccccccc',)],\n           )" | ||||
|         ) | ||||
|         assert str(data) == expected | ||||
| @ -0,0 +1,124 @@ | ||||
| import numpy as np | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetLevelValues: | ||||
|     def test_get_level_values_box_datetime64(self): | ||||
|         dates = date_range("1/1/2000", periods=4) | ||||
|         levels = [dates, [0, 1]] | ||||
|         codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] | ||||
|  | ||||
|         index = MultiIndex(levels=levels, codes=codes) | ||||
|  | ||||
|         assert isinstance(index.get_level_values(0)[0], Timestamp) | ||||
|  | ||||
|  | ||||
| def test_get_level_values(idx): | ||||
|     result = idx.get_level_values(0) | ||||
|     expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|     assert result.name == "first" | ||||
|  | ||||
|     result = idx.get_level_values("first") | ||||
|     expected = idx.get_level_values(0) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     # GH 10460 | ||||
|     index = MultiIndex( | ||||
|         levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])], | ||||
|         codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])], | ||||
|     ) | ||||
|  | ||||
|     exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"]) | ||||
|     tm.assert_index_equal(index.get_level_values(0), exp) | ||||
|     exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) | ||||
|     tm.assert_index_equal(index.get_level_values(1), exp) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_all_na(): | ||||
|     # GH#17924 when level entirely consists of nan | ||||
|     arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([np.nan, np.nan, np.nan], dtype=np.float64) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index(["a", np.nan, 1], dtype=object) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_int_with_na(): | ||||
|     # GH#17924 | ||||
|     arrays = [["a", "b", "b"], [1, np.nan, 2]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index([1, np.nan, 2]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [["a", "b", "b"], [np.nan, np.nan, 2]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index([np.nan, np.nan, 2]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_na(): | ||||
|     arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([np.nan, np.nan, np.nan]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = index.get_level_values(1) | ||||
|     expected = Index(["a", np.nan, 1]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(1) | ||||
|     expected = pd.DatetimeIndex([0, 1, pd.NaT]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     arrays = [[], []] | ||||
|     index = MultiIndex.from_arrays(arrays) | ||||
|     result = index.get_level_values(0) | ||||
|     expected = Index([], dtype=object) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_values_when_periods(): | ||||
|     # GH33131. See also discussion in GH32669. | ||||
|     # This test can probably be removed when PeriodIndex._engine is removed. | ||||
|     from pandas import ( | ||||
|         Period, | ||||
|         PeriodIndex, | ||||
|     ) | ||||
|  | ||||
|     idx = MultiIndex.from_arrays( | ||||
|         [PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")] | ||||
|     ) | ||||
|     idx2 = MultiIndex.from_arrays( | ||||
|         [idx._get_level_values(level) for level in range(idx.nlevels)] | ||||
|     ) | ||||
|     assert all(x.is_monotonic_increasing for x in idx2.levels) | ||||
|  | ||||
|  | ||||
| def test_values_loses_freq_of_underlying_index(): | ||||
|     # GH#49054 | ||||
|     idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME")) | ||||
|     expected = idx.copy(deep=True) | ||||
|     idx2 = Index([1, 2, 3]) | ||||
|     midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]]) | ||||
|     midx.values | ||||
|     assert idx.freq is not None | ||||
|     tm.assert_index_equal(idx, expected) | ||||
| @ -0,0 +1,384 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     MultiIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def assert_matching(actual, expected, check_dtype=False): | ||||
|     # avoid specifying internal representation | ||||
|     # as much as possible | ||||
|     assert len(actual) == len(expected) | ||||
|     for act, exp in zip(actual, expected): | ||||
|         act = np.asarray(act) | ||||
|         exp = np.asarray(exp) | ||||
|         tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) | ||||
|  | ||||
|  | ||||
| def test_get_level_number_integer(idx): | ||||
|     idx.names = [1, 0] | ||||
|     assert idx._get_level_number(1) == 0 | ||||
|     assert idx._get_level_number(0) == 1 | ||||
|     msg = "Too many levels: Index has only 2 levels, not 3" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx._get_level_number(2) | ||||
|     with pytest.raises(KeyError, match="Level fourth not found"): | ||||
|         idx._get_level_number("fourth") | ||||
|  | ||||
|  | ||||
| def test_get_dtypes(using_infer_string): | ||||
|     # Test MultiIndex.dtypes (# Gh37062) | ||||
|     idx_multitype = MultiIndex.from_product( | ||||
|         [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")], | ||||
|         names=["int", "string", "dt"], | ||||
|     ) | ||||
|  | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         { | ||||
|             "int": np.dtype("int64"), | ||||
|             "string": exp, | ||||
|             "dt": DatetimeTZDtype(tz="utc"), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_series_equal(expected, idx_multitype.dtypes) | ||||
|  | ||||
|  | ||||
| def test_get_dtypes_no_level_name(using_infer_string): | ||||
|     # Test MultiIndex.dtypes (# GH38580 ) | ||||
|     idx_multitype = MultiIndex.from_product( | ||||
|         [ | ||||
|             [1, 2, 3], | ||||
|             ["a", "b", "c"], | ||||
|             pd.date_range("20200101", periods=2, tz="UTC"), | ||||
|         ], | ||||
|     ) | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         { | ||||
|             "level_0": np.dtype("int64"), | ||||
|             "level_1": exp, | ||||
|             "level_2": DatetimeTZDtype(tz="utc"), | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_series_equal(expected, idx_multitype.dtypes) | ||||
|  | ||||
|  | ||||
| def test_get_dtypes_duplicate_level_names(using_infer_string): | ||||
|     # Test MultiIndex.dtypes with non-unique level names (# GH45174) | ||||
|     result = MultiIndex.from_product( | ||||
|         [ | ||||
|             [1, 2, 3], | ||||
|             ["a", "b", "c"], | ||||
|             pd.date_range("20200101", periods=2, tz="UTC"), | ||||
|         ], | ||||
|         names=["A", "A", "A"], | ||||
|     ).dtypes | ||||
|     exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan) | ||||
|     expected = pd.Series( | ||||
|         [np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")], | ||||
|         index=["A", "A", "A"], | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data): | ||||
|     frame = multiindex_dataframe_random_data | ||||
|  | ||||
|     with pytest.raises(IndexError, match="Too many levels"): | ||||
|         frame.index._get_level_number(2) | ||||
|     with pytest.raises(IndexError, match="not a valid level number"): | ||||
|         frame.index._get_level_number(-3) | ||||
|  | ||||
|  | ||||
| def test_set_name_methods(idx): | ||||
|     # so long as these are synonyms, we don't need to test set_names | ||||
|     index_names = ["first", "second"] | ||||
|     assert idx.rename == idx.set_names | ||||
|     new_names = [name + "SUFFIX" for name in index_names] | ||||
|     ind = idx.set_names(new_names) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == new_names | ||||
|     msg = "Length of names must match number of levels in MultiIndex" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ind.set_names(new_names + new_names) | ||||
|     new_names2 = [name + "SUFFIX2" for name in new_names] | ||||
|     res = ind.set_names(new_names2, inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == new_names2 | ||||
|  | ||||
|     # set names for specific level (# GH7792) | ||||
|     ind = idx.set_names(new_names[0], level=0) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == [new_names[0], index_names[1]] | ||||
|  | ||||
|     res = ind.set_names(new_names2[0], level=0, inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == [new_names2[0], index_names[1]] | ||||
|  | ||||
|     # set names for multiple levels | ||||
|     ind = idx.set_names(new_names, level=[0, 1]) | ||||
|     assert idx.names == index_names | ||||
|     assert ind.names == new_names | ||||
|  | ||||
|     res = ind.set_names(new_names2, level=[0, 1], inplace=True) | ||||
|     assert res is None | ||||
|     assert ind.names == new_names2 | ||||
|  | ||||
|  | ||||
| def test_set_levels_codes_directly(idx): | ||||
|     # setting levels/codes directly raises AttributeError | ||||
|  | ||||
|     levels = idx.levels | ||||
|     new_levels = [[lev + "a" for lev in level] for level in levels] | ||||
|  | ||||
|     codes = idx.codes | ||||
|     major_codes, minor_codes = codes | ||||
|     major_codes = [(x + 1) % 3 for x in major_codes] | ||||
|     minor_codes = [(x + 1) % 1 for x in minor_codes] | ||||
|     new_codes = [major_codes, minor_codes] | ||||
|  | ||||
|     msg = "Can't set attribute" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         idx.levels = new_levels | ||||
|  | ||||
|     msg = ( | ||||
|         "property 'codes' of 'MultiIndex' object has no setter" | ||||
|         if PY311 | ||||
|         else "can't set attribute" | ||||
|     ) | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         idx.codes = new_codes | ||||
|  | ||||
|  | ||||
| def test_set_levels(idx): | ||||
|     # side note - you probably wouldn't want to use levels and codes | ||||
|     # directly like this - but it is possible. | ||||
|     levels = idx.levels | ||||
|     new_levels = [[lev + "a" for lev in level] for level in levels] | ||||
|  | ||||
|     # level changing [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels) | ||||
|     assert_matching(ind2.levels, new_levels) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # level changing specific level [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels[0], level=0) | ||||
|     assert_matching(ind2.levels, [new_levels[0], levels[1]]) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     ind2 = idx.set_levels(new_levels[1], level=1) | ||||
|     assert_matching(ind2.levels, [levels[0], new_levels[1]]) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # level changing multiple levels [w/o mutation] | ||||
|     ind2 = idx.set_levels(new_levels, level=[0, 1]) | ||||
|     assert_matching(ind2.levels, new_levels) | ||||
|     assert_matching(idx.levels, levels) | ||||
|  | ||||
|     # illegal level changing should not change levels | ||||
|     # GH 13754 | ||||
|     original_index = idx.copy() | ||||
|     with pytest.raises(ValueError, match="^On"): | ||||
|         idx.set_levels(["c"], level=0) | ||||
|     assert_matching(idx.levels, original_index.levels, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="^On"): | ||||
|         idx.set_codes([0, 1, 2, 3, 4, 5], level=0) | ||||
|     assert_matching(idx.codes, original_index.codes, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Levels"): | ||||
|         idx.set_levels("c", level=0) | ||||
|     assert_matching(idx.levels, original_index.levels, check_dtype=True) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="^Codes"): | ||||
|         idx.set_codes(1, level=0) | ||||
|     assert_matching(idx.codes, original_index.codes, check_dtype=True) | ||||
|  | ||||
|  | ||||
| def test_set_codes(idx): | ||||
|     # side note - you probably wouldn't want to use levels and codes | ||||
|     # directly like this - but it is possible. | ||||
|     codes = idx.codes | ||||
|     major_codes, minor_codes = codes | ||||
|     major_codes = [(x + 1) % 3 for x in major_codes] | ||||
|     minor_codes = [(x + 1) % 1 for x in minor_codes] | ||||
|     new_codes = [major_codes, minor_codes] | ||||
|  | ||||
|     # changing codes w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes) | ||||
|     assert_matching(ind2.codes, new_codes) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # codes changing specific level w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes[0], level=0) | ||||
|     assert_matching(ind2.codes, [new_codes[0], codes[1]]) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     ind2 = idx.set_codes(new_codes[1], level=1) | ||||
|     assert_matching(ind2.codes, [codes[0], new_codes[1]]) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # codes changing multiple levels w/o mutation | ||||
|     ind2 = idx.set_codes(new_codes, level=[0, 1]) | ||||
|     assert_matching(ind2.codes, new_codes) | ||||
|     assert_matching(idx.codes, codes) | ||||
|  | ||||
|     # label changing for levels of different magnitude of categories | ||||
|     ind = MultiIndex.from_tuples([(0, i) for i in range(130)]) | ||||
|     new_codes = range(129, -1, -1) | ||||
|     expected = MultiIndex.from_tuples([(0, i) for i in new_codes]) | ||||
|  | ||||
|     # [w/o mutation] | ||||
|     result = ind.set_codes(codes=new_codes, level=1) | ||||
|     assert result.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_set_levels_codes_names_bad_input(idx): | ||||
|     levels, codes = idx.levels, idx.codes | ||||
|     names = idx.names | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of levels"): | ||||
|         idx.set_levels([levels[0]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of codes"): | ||||
|         idx.set_codes([codes[0]]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="Length of names"): | ||||
|         idx.set_names([names[0]]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_levels(levels[0]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_codes(codes[0]) | ||||
|  | ||||
|     # shouldn't scalar data error, instead should demand list-like | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_names(names[0]) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_levels(levels[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_levels(levels, level=0) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(TypeError, match="list of lists-like"): | ||||
|         idx.set_codes(codes[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="list-like"): | ||||
|         idx.set_codes(codes, level=0) | ||||
|  | ||||
|     # should have equal lengths | ||||
|     with pytest.raises(ValueError, match="Length of names"): | ||||
|         idx.set_names(names[0], level=[0, 1]) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="Names must be a"): | ||||
|         idx.set_names(names, level=0) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("inplace", [True, False]) | ||||
| def test_set_names_with_nlevel_1(inplace): | ||||
|     # GH 21149 | ||||
|     # Ensure that .set_names for MultiIndex with | ||||
|     # nlevels == 1 does not raise any errors | ||||
|     expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"]) | ||||
|     m = MultiIndex.from_product([[0, 1]]) | ||||
|     result = m.set_names("first", level=0, inplace=inplace) | ||||
|  | ||||
|     if inplace: | ||||
|         result = m | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| def test_set_levels_categorical(ordered): | ||||
|     # GH13854 | ||||
|     index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) | ||||
|  | ||||
|     cidx = CategoricalIndex(list("bac"), ordered=ordered) | ||||
|     result = index.set_levels(cidx, level=0) | ||||
|     expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result_lvl = result.get_level_values(0) | ||||
|     expected_lvl = CategoricalIndex( | ||||
|         list("bacb"), categories=cidx.categories, ordered=cidx.ordered | ||||
|     ) | ||||
|     tm.assert_index_equal(result_lvl, expected_lvl) | ||||
|  | ||||
|  | ||||
| def test_set_value_keeps_names(): | ||||
|     # motivating example from #3742 | ||||
|     lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"] | ||||
|     lev2 = ["1", "2", "3"] * 2 | ||||
|     idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) | ||||
|     df = pd.DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((6, 4)), | ||||
|         columns=["one", "two", "three", "four"], | ||||
|         index=idx, | ||||
|     ) | ||||
|     df = df.sort_index() | ||||
|     assert df._is_copy is None | ||||
|     assert df.index.names == ("Name", "Number") | ||||
|     df.at[("grethe", "4"), "one"] = 99.34 | ||||
|     assert df._is_copy is None | ||||
|     assert df.index.names == ("Name", "Number") | ||||
|  | ||||
|  | ||||
| def test_set_levels_with_iterable(): | ||||
|     # GH23273 | ||||
|     sizes = [1, 2, 3] | ||||
|     colors = ["black"] * 3 | ||||
|     index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"]) | ||||
|  | ||||
|     result = index.set_levels(map(int, ["3", "2", "1"]), level="size") | ||||
|  | ||||
|     expected_sizes = [3, 2, 1] | ||||
|     expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_set_empty_level(): | ||||
|     # GH#48636 | ||||
|     midx = MultiIndex.from_arrays([[]], names=["A"]) | ||||
|     result = midx.set_levels(pd.DatetimeIndex([]), level=0) | ||||
|     expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_set_levels_pos_args_removal(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/41485 | ||||
|     idx = MultiIndex.from_tuples( | ||||
|         [ | ||||
|             (1, "one"), | ||||
|             (3, "one"), | ||||
|         ], | ||||
|         names=["foo", "bar"], | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match="positional arguments"): | ||||
|         idx.set_levels(["a", "b", "c"], 0) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="positional arguments"): | ||||
|         idx.set_codes([[0, 1], [1, 0]], 0) | ||||
|  | ||||
|  | ||||
| def test_set_levels_categorical_keep_dtype(): | ||||
|     # GH#52125 | ||||
|     midx = MultiIndex.from_arrays([[5, 6]]) | ||||
|     result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0) | ||||
|     expected = MultiIndex.from_arrays([pd.Categorical([1, 2])]) | ||||
|     tm.assert_index_equal(result, expected) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,289 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import index as libindex | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_labels_dtypes(): | ||||
|     # GH 8456 | ||||
|     i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) | ||||
|     assert i.codes[0].dtype == "int8" | ||||
|     assert i.codes[1].dtype == "int8" | ||||
|  | ||||
|     i = MultiIndex.from_product([["a"], range(40)]) | ||||
|     assert i.codes[1].dtype == "int8" | ||||
|     i = MultiIndex.from_product([["a"], range(400)]) | ||||
|     assert i.codes[1].dtype == "int16" | ||||
|     i = MultiIndex.from_product([["a"], range(40000)]) | ||||
|     assert i.codes[1].dtype == "int32" | ||||
|  | ||||
|     i = MultiIndex.from_product([["a"], range(1000)]) | ||||
|     assert (i.codes[0] >= 0).all() | ||||
|     assert (i.codes[1] >= 0).all() | ||||
|  | ||||
|  | ||||
| def test_values_boxed(): | ||||
|     tuples = [ | ||||
|         (1, pd.Timestamp("2000-01-01")), | ||||
|         (2, pd.NaT), | ||||
|         (3, pd.Timestamp("2000-01-03")), | ||||
|         (1, pd.Timestamp("2000-01-04")), | ||||
|         (2, pd.Timestamp("2000-01-02")), | ||||
|         (3, pd.Timestamp("2000-01-03")), | ||||
|     ] | ||||
|     result = MultiIndex.from_tuples(tuples) | ||||
|     expected = construct_1d_object_array_from_listlike(tuples) | ||||
|     tm.assert_numpy_array_equal(result.values, expected) | ||||
|     # Check that code branches for boxed values produce identical results | ||||
|     tm.assert_numpy_array_equal(result.values[:4], result[:4].values) | ||||
|  | ||||
|  | ||||
| def test_values_multiindex_datetimeindex(): | ||||
|     # Test to ensure we hit the boxing / nobox part of MI.values | ||||
|     ints = np.arange(10**18, 10**18 + 5) | ||||
|     naive = pd.DatetimeIndex(ints) | ||||
|  | ||||
|     aware = pd.DatetimeIndex(ints, tz="US/Central") | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([naive, aware]) | ||||
|     result = idx.values | ||||
|  | ||||
|     outer = pd.DatetimeIndex([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, naive) | ||||
|  | ||||
|     inner = pd.DatetimeIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, aware) | ||||
|  | ||||
|     # n_lev > n_lab | ||||
|     result = idx[:2].values | ||||
|  | ||||
|     outer = pd.DatetimeIndex([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, naive[:2]) | ||||
|  | ||||
|     inner = pd.DatetimeIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, aware[:2]) | ||||
|  | ||||
|  | ||||
| def test_values_multiindex_periodindex(): | ||||
|     # Test to ensure we hit the boxing / nobox part of MI.values | ||||
|     ints = np.arange(2007, 2012) | ||||
|     pidx = pd.PeriodIndex(ints, freq="D") | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([ints, pidx]) | ||||
|     result = idx.values | ||||
|  | ||||
|     outer = Index([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, Index(ints, dtype=np.int64)) | ||||
|  | ||||
|     inner = pd.PeriodIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, pidx) | ||||
|  | ||||
|     # n_lev > n_lab | ||||
|     result = idx[:2].values | ||||
|  | ||||
|     outer = Index([x[0] for x in result]) | ||||
|     tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64)) | ||||
|  | ||||
|     inner = pd.PeriodIndex([x[1] for x in result]) | ||||
|     tm.assert_index_equal(inner, pidx[:2]) | ||||
|  | ||||
|  | ||||
| def test_consistency(): | ||||
|     # need to construct an overflow | ||||
|     major_axis = list(range(70000)) | ||||
|     minor_axis = list(range(10)) | ||||
|  | ||||
|     major_codes = np.arange(70000) | ||||
|     minor_codes = np.repeat(range(10), 7000) | ||||
|  | ||||
|     # the fact that is works means it's consistent | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|  | ||||
|     # inconsistent | ||||
|     major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) | ||||
|     minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) | ||||
|     index = MultiIndex( | ||||
|         levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] | ||||
|     ) | ||||
|  | ||||
|     assert index.is_unique is False | ||||
|  | ||||
|  | ||||
| @pytest.mark.slow | ||||
| def test_hash_collisions(monkeypatch): | ||||
|     # non-smoke test that we don't get hash collisions | ||||
|     size_cutoff = 50 | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|         index = MultiIndex.from_product( | ||||
|             [np.arange(8), np.arange(8)], names=["one", "two"] | ||||
|         ) | ||||
|         result = index.get_indexer(index.values) | ||||
|         tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) | ||||
|  | ||||
|         for i in [0, 1, len(index) - 2, len(index) - 1]: | ||||
|             result = index.get_loc(index[i]) | ||||
|             assert result == i | ||||
|  | ||||
|  | ||||
| def test_dims(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| def test_take_invalid_kwargs(): | ||||
|     vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] | ||||
|     idx = MultiIndex.from_product(vals, names=["str", "dt"]) | ||||
|     indices = [1, 2] | ||||
|  | ||||
|     msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         idx.take(indices, foo=2) | ||||
|  | ||||
|     msg = "the 'out' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, out=indices) | ||||
|  | ||||
|     msg = "the 'mode' parameter is not supported" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         idx.take(indices, mode="clip") | ||||
|  | ||||
|  | ||||
| def test_isna_behavior(idx): | ||||
|     # should not segfault GH5123 | ||||
|     # NOTE: if MI representation changes, may make sense to allow | ||||
|     # isna(MI) | ||||
|     msg = "isna is not defined for MultiIndex" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         pd.isna(idx) | ||||
|  | ||||
|  | ||||
| def test_large_multiindex_error(monkeypatch): | ||||
|     # GH12527 | ||||
|     size_cutoff = 50 | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff) | ||||
|         df_below_cutoff = pd.DataFrame( | ||||
|             1, | ||||
|             index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]), | ||||
|             columns=["dest"], | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): | ||||
|             df_below_cutoff.loc[(-1, 0), "dest"] | ||||
|         with pytest.raises(KeyError, match=r"^\(3, 0\)$"): | ||||
|             df_below_cutoff.loc[(3, 0), "dest"] | ||||
|         df_above_cutoff = pd.DataFrame( | ||||
|             1, | ||||
|             index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]), | ||||
|             columns=["dest"], | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): | ||||
|             df_above_cutoff.loc[(-1, 0), "dest"] | ||||
|         with pytest.raises(KeyError, match=r"^\(3, 0\)$"): | ||||
|             df_above_cutoff.loc[(3, 0), "dest"] | ||||
|  | ||||
|  | ||||
| def test_mi_hashtable_populated_attribute_error(monkeypatch): | ||||
|     # GH 18165 | ||||
|     monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) | ||||
|     r = range(50) | ||||
|     df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r])) | ||||
|  | ||||
|     msg = "'Series' object has no attribute 'foo'" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         df["a"].foo() | ||||
|  | ||||
|  | ||||
| def test_can_hold_identifiers(idx): | ||||
|     key = idx[0] | ||||
|     assert idx._can_hold_identifiers_and_holds_name(key) is True | ||||
|  | ||||
|  | ||||
| def test_metadata_immutable(idx): | ||||
|     levels, codes = idx.levels, idx.codes | ||||
|     # shouldn't be able to set at either the top level or base level | ||||
|     mutable_regex = re.compile("does not support mutable operations") | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         levels[0] = levels[0] | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         levels[0][0] = levels[0][0] | ||||
|     # ditto for labels | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         codes[0] = codes[0] | ||||
|     with pytest.raises(ValueError, match="assignment destination is read-only"): | ||||
|         codes[0][0] = codes[0][0] | ||||
|     # and for names | ||||
|     names = idx.names | ||||
|     with pytest.raises(TypeError, match=mutable_regex): | ||||
|         names[0] = names[0] | ||||
|  | ||||
|  | ||||
| def test_level_setting_resets_attributes(): | ||||
|     ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) | ||||
|     assert ind.is_monotonic_increasing | ||||
|     ind = ind.set_levels([["A", "B"], [1, 3, 2]]) | ||||
|     # if this fails, probably didn't reset the cache correctly. | ||||
|     assert not ind.is_monotonic_increasing | ||||
|  | ||||
|  | ||||
| def test_rangeindex_fallback_coercion_bug(): | ||||
|     # GH 12893 | ||||
|     df1 = pd.DataFrame(np.arange(100).reshape((10, 10))) | ||||
|     df2 = pd.DataFrame(np.arange(100).reshape((10, 10))) | ||||
|     df = pd.concat( | ||||
|         {"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)}, | ||||
|         axis=1, | ||||
|     ) | ||||
|     df.index.names = ["fizz", "buzz"] | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"df2": np.arange(100), "df1": np.arange(100)}, | ||||
|         index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(df, expected, check_like=True) | ||||
|  | ||||
|     result = df.index.get_level_values("fizz") | ||||
|     expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     result = df.index.get_level_values("buzz") | ||||
|     expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz") | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_memory_usage(idx): | ||||
|     result = idx.memory_usage() | ||||
|     if len(idx): | ||||
|         idx.get_loc(idx[0]) | ||||
|         result2 = idx.memory_usage() | ||||
|         result3 = idx.memory_usage(deep=True) | ||||
|  | ||||
|         # RangeIndex, IntervalIndex | ||||
|         # don't have engines | ||||
|         if not isinstance(idx, (RangeIndex, IntervalIndex)): | ||||
|             assert result2 > result | ||||
|  | ||||
|         if idx.inferred_type == "object": | ||||
|             assert result3 > result2 | ||||
|  | ||||
|     else: | ||||
|         # we report 0 for no-length | ||||
|         assert result == 0 | ||||
|  | ||||
|  | ||||
| def test_nlevels(idx): | ||||
|     assert idx.nlevels == 2 | ||||
| @ -0,0 +1,103 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import MultiIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_isin_nan(): | ||||
|     idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) | ||||
|     tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True])) | ||||
|     tm.assert_numpy_array_equal( | ||||
|         idx.isin([("bar", float("nan"))]), np.array([False, True]) | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_isin_missing(nulls_fixture): | ||||
|     # GH48905 | ||||
|     mi1 = MultiIndex.from_tuples([(1, nulls_fixture)]) | ||||
|     mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)]) | ||||
|     result = mi2.isin(mi1) | ||||
|     expected = np.array([False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin(): | ||||
|     values = [("foo", 2), ("bar", 3), ("quux", 4)] | ||||
|  | ||||
|     idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) | ||||
|     result = idx.isin(values) | ||||
|     expected = np.array([False, False, True, True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # empty, return dtype bool | ||||
|     idx = MultiIndex.from_arrays([[], []]) | ||||
|     result = idx.isin(values) | ||||
|     assert len(result) == 0 | ||||
|     assert result.dtype == np.bool_ | ||||
|  | ||||
|  | ||||
| def test_isin_level_kwarg(): | ||||
|     idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) | ||||
|  | ||||
|     vals_0 = ["foo", "bar", "quux"] | ||||
|     vals_1 = [2, 3, 10] | ||||
|  | ||||
|     expected = np.array([False, False, True, True]) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) | ||||
|  | ||||
|     msg = "Too many levels: Index has only 2 levels, not 6" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.isin(vals_0, level=5) | ||||
|     msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number" | ||||
|     with pytest.raises(IndexError, match=msg): | ||||
|         idx.isin(vals_0, level=-5) | ||||
|  | ||||
|     with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): | ||||
|         idx.isin(vals_0, level=1.0) | ||||
|     with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): | ||||
|         idx.isin(vals_1, level=-1.0) | ||||
|     with pytest.raises(KeyError, match="'Level A not found'"): | ||||
|         idx.isin(vals_1, level="A") | ||||
|  | ||||
|     idx.names = ["A", "B"] | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A")) | ||||
|     tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B")) | ||||
|  | ||||
|     with pytest.raises(KeyError, match="'Level C not found'"): | ||||
|         idx.isin(vals_1, level="C") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "labels,expected,level", | ||||
|     [ | ||||
|         ([("b", np.nan)], np.array([False, False, True]), None), | ||||
|         ([np.nan, "a"], np.array([True, True, False]), 0), | ||||
|         (["d", np.nan], np.array([False, True, True]), 1), | ||||
|     ], | ||||
| ) | ||||
| def test_isin_multi_index_with_missing_value(labels, expected, level): | ||||
|     # GH 19132 | ||||
|     midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) | ||||
|     result = midx.isin(labels, level=level) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_empty(): | ||||
|     # GH#51599 | ||||
|     midx = MultiIndex.from_arrays([[1, 2], [3, 4]]) | ||||
|     result = midx.isin([]) | ||||
|     expected = np.array([False, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_generator(): | ||||
|     # GH#52568 | ||||
|     midx = MultiIndex.from_tuples([(1, 2)]) | ||||
|     result = midx.isin(x for x in [(1, 2)]) | ||||
|     expected = np.array([True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user