done
This commit is contained in:
		| @ -0,0 +1,254 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import ( | ||||
|     CategoricalDtype, | ||||
|     IntervalDtype, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     NaT, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     interval_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class AstypeTests: | ||||
|     """Tests common to IntervalIndex with any subtype""" | ||||
|  | ||||
|     def test_astype_idempotent(self, index): | ||||
|         result = index.astype("interval") | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.astype(index.dtype) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|     def test_astype_object(self, index): | ||||
|         result = index.astype(object) | ||||
|         expected = Index(index.values, dtype="object") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|         assert not result.equals(index) | ||||
|  | ||||
|     def test_astype_category(self, index): | ||||
|         result = index.astype("category") | ||||
|         expected = CategoricalIndex(index.values) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index.astype(CategoricalDtype()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # non-default params | ||||
|         categories = index.dropna().unique().values[:-1] | ||||
|         dtype = CategoricalDtype(categories=categories, ordered=True) | ||||
|         result = index.astype(dtype) | ||||
|         expected = CategoricalIndex(index.values, categories=categories, ordered=True) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         [ | ||||
|             "int64", | ||||
|             "uint64", | ||||
|             "float64", | ||||
|             "complex128", | ||||
|             "period[M]", | ||||
|             "timedelta64", | ||||
|             "timedelta64[ns]", | ||||
|             "datetime64", | ||||
|             "datetime64[ns]", | ||||
|             "datetime64[ns, US/Eastern]", | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_cannot_cast(self, index, dtype): | ||||
|         msg = "Cannot cast IntervalIndex to dtype" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     def test_astype_invalid_dtype(self, index): | ||||
|         msg = "data type [\"']fake_dtype[\"'] not understood" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype("fake_dtype") | ||||
|  | ||||
|  | ||||
| class TestIntSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with integer-like subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), | ||||
|         IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] | ||||
|     ) | ||||
|     def test_subtype_conversion(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, index.closed) | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] | ||||
|     ) | ||||
|     def test_subtype_integer(self, subtype_start, subtype_end): | ||||
|         index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) | ||||
|         dtype = IntervalDtype(subtype_end, index.closed) | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype_end), | ||||
|             index.right.astype(subtype_end), | ||||
|             closed=index.closed, | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.xfail(reason="GH#15832") | ||||
|     def test_subtype_integer_errors(self): | ||||
|         # int64 -> uint64 fails with negative values | ||||
|         index = interval_range(-10, 10) | ||||
|         dtype = IntervalDtype("uint64", "right") | ||||
|  | ||||
|         # Until we decide what the exception message _should_ be, we | ||||
|         #  assert something that it should _not_ be. | ||||
|         #  We should _not_ be getting a message suggesting that the -10 | ||||
|         #  has been wrapped around to a large-positive integer | ||||
|         msg = "^(?!(left side of interval must be <= right side))" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|  | ||||
| class TestFloatSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with float subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         interval_range(-10.0, 10.0, closed="neither"), | ||||
|         IntervalIndex.from_arrays( | ||||
|             [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" | ||||
|         ), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer(self, subtype): | ||||
|         index = interval_range(0.0, 10.0) | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # raises with NA | ||||
|         msg = r"Cannot convert non-finite values \(NA or inf\) to integer" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.insert(0, np.nan).astype(dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer_with_non_integer_borders(self, subtype): | ||||
|         index = interval_range(0.0, 3.0, freq=0.25) | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         result = index.astype(dtype) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             index.left.astype(subtype), index.right.astype(subtype), closed=index.closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_subtype_integer_errors(self): | ||||
|         # float64 -> uint64 fails with negative values | ||||
|         index = interval_range(-10.0, 10.0) | ||||
|         dtype = IntervalDtype("uint64", "right") | ||||
|         msg = re.escape( | ||||
|             "Cannot convert interval[float64, right] to interval[uint64, right]; " | ||||
|             "subtypes are incompatible" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) | ||||
|     def test_subtype_datetimelike(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_astype_category(self, index): | ||||
|         super().test_astype_category(index) | ||||
|  | ||||
|  | ||||
| class TestDatetimelikeSubtype(AstypeTests): | ||||
|     """Tests specific to IntervalIndex with datetime-like subtype""" | ||||
|  | ||||
|     indexes = [ | ||||
|         interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), | ||||
|         interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), | ||||
|         interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), | ||||
|         interval_range(Timedelta("0 days"), periods=10, closed="both"), | ||||
|         interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), | ||||
|     ] | ||||
|  | ||||
|     @pytest.fixture(params=indexes) | ||||
|     def index(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.mark.parametrize("subtype", ["int64", "uint64"]) | ||||
|     def test_subtype_integer(self, index, subtype): | ||||
|         dtype = IntervalDtype(subtype, "right") | ||||
|  | ||||
|         if subtype != "int64": | ||||
|             msg = ( | ||||
|                 r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] " | ||||
|                 r"to interval\[uint64, .*\]" | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 index.astype(dtype) | ||||
|             return | ||||
|  | ||||
|         result = index.astype(dtype) | ||||
|         new_left = index.left.astype(subtype) | ||||
|         new_right = index.right.astype(subtype) | ||||
|  | ||||
|         expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_subtype_float(self, index): | ||||
|         dtype = IntervalDtype("float64", "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|     def test_subtype_datetimelike(self): | ||||
|         # datetime -> timedelta raises | ||||
|         dtype = IntervalDtype("timedelta64[ns]", "right") | ||||
|         msg = "Cannot convert .* to .*; subtypes are incompatible" | ||||
|  | ||||
|         index = interval_range(Timestamp("2018-01-01"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|         index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
|  | ||||
|         # timedelta -> datetime raises | ||||
|         dtype = IntervalDtype("datetime64[ns]", "right") | ||||
|         index = interval_range(Timedelta("0 days"), periods=10) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             index.astype(dtype) | ||||
| @ -0,0 +1,535 @@ | ||||
| from functools import partial | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas.core.dtypes.common import is_unsigned_integer_dtype | ||||
| from pandas.core.dtypes.dtypes import IntervalDtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     date_range, | ||||
|     notna, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class ConstructorTests: | ||||
|     """ | ||||
|     Common tests for all variations of IntervalIndex construction. Input data | ||||
|     to be supplied in breaks format, then converted by the subclass method | ||||
|     get_kwargs_from_breaks to the expected format. | ||||
|     """ | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[ | ||||
|             ([3, 14, 15, 92, 653], np.int64), | ||||
|             (np.arange(10, dtype="int64"), np.int64), | ||||
|             (Index(np.arange(-10, 11, dtype=np.int64)), np.int64), | ||||
|             (Index(np.arange(10, 31, dtype=np.uint64)), np.uint64), | ||||
|             (Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64), | ||||
|             (date_range("20180101", periods=10), "<M8[ns]"), | ||||
|             ( | ||||
|                 date_range("20180101", periods=10, tz="US/Eastern"), | ||||
|                 "datetime64[ns, US/Eastern]", | ||||
|             ), | ||||
|             (timedelta_range("1 day", periods=10), "<m8[ns]"), | ||||
|         ] | ||||
|     ) | ||||
|     def breaks_and_expected_subtype(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name): | ||||
|         breaks, expected_subtype = breaks_and_expected_subtype | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks, closed) | ||||
|  | ||||
|         result = constructor(closed=closed, name=name, **result_kwargs) | ||||
|  | ||||
|         assert result.closed == closed | ||||
|         assert result.name == name | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype)) | ||||
|         tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks, subtype", | ||||
|         [ | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"), | ||||
|             (Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"), | ||||
|             (date_range("2017-01-01", periods=5), "int64"), | ||||
|             (timedelta_range("1 day", periods=5), "int64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_dtype(self, constructor, breaks, subtype): | ||||
|         # GH 19262: conversion via dtype parameter | ||||
|         expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) | ||||
|         expected = constructor(**expected_kwargs) | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         iv_dtype = IntervalDtype(subtype, "right") | ||||
|         for dtype in (iv_dtype, str(iv_dtype)): | ||||
|             result = constructor(dtype=dtype, **result_kwargs) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.int64), | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.uint64), | ||||
|             Index([0, 1, 2, 3, 4], dtype=np.float64), | ||||
|             date_range("2017-01-01", periods=5), | ||||
|             timedelta_range("1 day", periods=5), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_pass_closed(self, constructor, breaks): | ||||
|         # not passing closed to IntervalDtype, but to IntervalArray constructor | ||||
|         iv_dtype = IntervalDtype(breaks.dtype) | ||||
|  | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|  | ||||
|         for dtype in (iv_dtype, str(iv_dtype)): | ||||
|             with tm.assert_produces_warning(None): | ||||
|                 result = constructor(dtype=dtype, closed="left", **result_kwargs) | ||||
|             assert result.dtype.closed == "left" | ||||
|  | ||||
|     @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) | ||||
|     def test_constructor_nan(self, constructor, breaks, closed): | ||||
|         # GH 18421 | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         result = constructor(closed=closed, **result_kwargs) | ||||
|  | ||||
|         expected_subtype = np.float64 | ||||
|         expected_values = np.array(breaks[:-1], dtype=object) | ||||
|  | ||||
|         assert result.closed == closed | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_numpy_array_equal(np.array(result), expected_values) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             [], | ||||
|             np.array([], dtype="int64"), | ||||
|             np.array([], dtype="uint64"), | ||||
|             np.array([], dtype="float64"), | ||||
|             np.array([], dtype="datetime64[ns]"), | ||||
|             np.array([], dtype="timedelta64[ns]"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_empty(self, constructor, breaks, closed): | ||||
|         # GH 18421 | ||||
|         result_kwargs = self.get_kwargs_from_breaks(breaks) | ||||
|         result = constructor(closed=closed, **result_kwargs) | ||||
|  | ||||
|         expected_values = np.array([], dtype=object) | ||||
|         expected_subtype = getattr(breaks, "dtype", np.int64) | ||||
|  | ||||
|         assert result.empty | ||||
|         assert result.closed == closed | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|         tm.assert_numpy_array_equal(np.array(result), expected_values) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             tuple("0123456789"), | ||||
|             list("abcdefghij"), | ||||
|             np.array(list("abcdefghij"), dtype=object), | ||||
|             np.array(list("abcdefghij"), dtype="<U1"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_string(self, constructor, breaks): | ||||
|         # GH 19016 | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(**self.get_kwargs_from_breaks(breaks)) | ||||
|  | ||||
|     @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex]) | ||||
|     def test_constructor_categorical_valid(self, constructor, cat_constructor): | ||||
|         # GH 21243/21253 | ||||
|  | ||||
|         breaks = np.arange(10, dtype="int64") | ||||
|         expected = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         cat_breaks = cat_constructor(breaks) | ||||
|         result_kwargs = self.get_kwargs_from_breaks(cat_breaks) | ||||
|         result = constructor(**result_kwargs) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_generic_errors(self, constructor): | ||||
|         # filler input data to be used when supplying invalid kwargs | ||||
|         filler = self.get_kwargs_from_breaks(range(10)) | ||||
|  | ||||
|         # invalid closed | ||||
|         msg = "closed must be one of 'right', 'left', 'both', 'neither'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(closed="invalid", **filler) | ||||
|  | ||||
|         # unsupported dtype | ||||
|         msg = "dtype must be an IntervalDtype, got int64" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(dtype="int64", **filler) | ||||
|  | ||||
|         # invalid dtype | ||||
|         msg = "data type [\"']invalid[\"'] not understood" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             constructor(dtype="invalid", **filler) | ||||
|  | ||||
|         # no point in nesting periods in an IntervalIndex | ||||
|         periods = period_range("2000-01-01", periods=10) | ||||
|         periods_kwargs = self.get_kwargs_from_breaks(periods) | ||||
|         msg = "Period dtypes are not supported, use a PeriodIndex instead" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(**periods_kwargs) | ||||
|  | ||||
|         # decreasing values | ||||
|         decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) | ||||
|         msg = "left side of interval must be <= right side" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             constructor(**decreasing_kwargs) | ||||
|  | ||||
|  | ||||
| class TestFromArrays(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_arrays""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_arrays | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_arrays | ||||
|         """ | ||||
|         return {"left": breaks[:-1], "right": breaks[1:]} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # GH 19016: categorical data | ||||
|         data = Categorical(list("01234abcde"), ordered=True) | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             IntervalIndex.from_arrays(data[:-1], data[1:]) | ||||
|  | ||||
|         # unequal length | ||||
|         left = [0, 1, 2] | ||||
|         right = [2, 3] | ||||
|         msg = "left and right must have the same length" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntervalIndex.from_arrays(left, right) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)] | ||||
|     ) | ||||
|     def test_mixed_float_int(self, left_subtype, right_subtype): | ||||
|         """mixed int/float left/right results in float for both sides""" | ||||
|         left = np.arange(9, dtype=left_subtype) | ||||
|         right = np.arange(1, 10, dtype=right_subtype) | ||||
|         result = IntervalIndex.from_arrays(left, right) | ||||
|  | ||||
|         expected_left = Index(left, dtype=np.float64) | ||||
|         expected_right = Index(right, dtype=np.float64) | ||||
|         expected_subtype = np.float64 | ||||
|  | ||||
|         tm.assert_index_equal(result.left, expected_left) | ||||
|         tm.assert_index_equal(result.right, expected_right) | ||||
|         assert result.dtype.subtype == expected_subtype | ||||
|  | ||||
|     @pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex]) | ||||
|     def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls): | ||||
|         # GH#55714 | ||||
|         left = date_range("2016-01-01", periods=3, unit="s") | ||||
|         right = date_range("2017-01-01", periods=3, unit="ms") | ||||
|         result = interval_cls.from_arrays(left, right) | ||||
|         expected = interval_cls.from_arrays(left.as_unit("ms"), right) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         # td64 | ||||
|         left2 = left - left[0] | ||||
|         right2 = right - left[0] | ||||
|         result2 = interval_cls.from_arrays(left2, right2) | ||||
|         expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2) | ||||
|         tm.assert_equal(result2, expected2) | ||||
|  | ||||
|         # dt64tz | ||||
|         left3 = left.tz_localize("UTC") | ||||
|         right3 = right.tz_localize("UTC") | ||||
|         result3 = interval_cls.from_arrays(left3, right3) | ||||
|         expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3) | ||||
|         tm.assert_equal(result3, expected3) | ||||
|  | ||||
|  | ||||
| class TestFromBreaks(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_breaks""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_breaks | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_breaks | ||||
|         """ | ||||
|         return {"breaks": breaks} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # GH 19016: categorical data | ||||
|         data = Categorical(list("01234abcde"), ordered=True) | ||||
|         msg = ( | ||||
|             "category, object, and string subtypes are not supported " | ||||
|             "for IntervalIndex" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             IntervalIndex.from_breaks(data) | ||||
|  | ||||
|     def test_length_one(self): | ||||
|         """breaks of length one produce an empty IntervalIndex""" | ||||
|         breaks = [0] | ||||
|         result = IntervalIndex.from_breaks(breaks) | ||||
|         expected = IntervalIndex.from_breaks([]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_left_right_dont_share_data(self): | ||||
|         # GH#36310 | ||||
|         breaks = np.arange(5) | ||||
|         result = IntervalIndex.from_breaks(breaks)._data | ||||
|         assert result._left.base is None or result._left.base is not result._right.base | ||||
|  | ||||
|  | ||||
| class TestFromTuples(ConstructorTests): | ||||
|     """Tests specific to IntervalIndex.from_tuples""" | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex.from_tuples | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by IntervalIndex.from_tuples | ||||
|         """ | ||||
|         if is_unsigned_integer_dtype(breaks): | ||||
|             pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests") | ||||
|  | ||||
|         if len(breaks) == 0: | ||||
|             return {"data": breaks} | ||||
|  | ||||
|         tuples = list(zip(breaks[:-1], breaks[1:])) | ||||
|         if isinstance(breaks, (list, tuple)): | ||||
|             return {"data": tuples} | ||||
|         elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): | ||||
|             return {"data": breaks._constructor(tuples)} | ||||
|         return {"data": com.asarray_tuplesafe(tuples)} | ||||
|  | ||||
|     def test_constructor_errors(self): | ||||
|         # non-tuple | ||||
|         tuples = [(0, 1), 2, (3, 4)] | ||||
|         msg = "IntervalIndex.from_tuples received an invalid item, 2" | ||||
|         with pytest.raises(TypeError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|         # too few/many items | ||||
|         tuples = [(0, 1), (2,), (3, 4)] | ||||
|         msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}" | ||||
|         with pytest.raises(ValueError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|         tuples = [(0, 1), (2, 3, 4), (5, 6)] | ||||
|         with pytest.raises(ValueError, match=msg.format(t=tuples)): | ||||
|             IntervalIndex.from_tuples(tuples) | ||||
|  | ||||
|     def test_na_tuples(self): | ||||
|         # tuple (NA, NA) evaluates the same as NA as an element | ||||
|         na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)] | ||||
|         idx_na_tuple = IntervalIndex.from_tuples(na_tuple) | ||||
|         idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) | ||||
|         tm.assert_index_equal(idx_na_tuple, idx_na_element) | ||||
|  | ||||
|  | ||||
| class TestClassConstructors(ConstructorTests): | ||||
|     """Tests specific to the IntervalIndex/Index constructors""" | ||||
|  | ||||
|     @pytest.fixture( | ||||
|         params=[IntervalIndex, partial(Index, dtype="interval")], | ||||
|         ids=["IntervalIndex", "Index"], | ||||
|     ) | ||||
|     def klass(self, request): | ||||
|         # We use a separate fixture here to include Index.__new__ with dtype kwarg | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def constructor(self): | ||||
|         return IntervalIndex | ||||
|  | ||||
|     def get_kwargs_from_breaks(self, breaks, closed="right"): | ||||
|         """ | ||||
|         converts intervals in breaks format to a dictionary of kwargs to | ||||
|         specific to the format expected by the IntervalIndex/Index constructors | ||||
|         """ | ||||
|         if is_unsigned_integer_dtype(breaks): | ||||
|             pytest.skip(f"{breaks.dtype} not relevant for class constructor tests") | ||||
|  | ||||
|         if len(breaks) == 0: | ||||
|             return {"data": breaks} | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) if notna(left) else left | ||||
|             for left, right in zip(breaks[:-1], breaks[1:]) | ||||
|         ] | ||||
|  | ||||
|         if isinstance(breaks, list): | ||||
|             return {"data": ivs} | ||||
|         elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): | ||||
|             return {"data": breaks._constructor(ivs)} | ||||
|         return {"data": np.array(ivs, dtype=object)} | ||||
|  | ||||
|     def test_generic_errors(self, constructor): | ||||
|         """ | ||||
|         override the base class implementation since errors are handled | ||||
|         differently; checks unnecessary since caught at the Interval level | ||||
|         """ | ||||
|  | ||||
|     def test_constructor_string(self): | ||||
|         # GH23013 | ||||
|         # When forming the interval from breaks, | ||||
|         # the interval of strings is already forbidden. | ||||
|         pass | ||||
|  | ||||
|     def test_constructor_errors(self, klass): | ||||
|         # mismatched closed within intervals with no constructor override | ||||
|         ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")] | ||||
|         msg = "intervals must all be closed on the same side" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             klass(ivs) | ||||
|  | ||||
|         # scalar | ||||
|         msg = ( | ||||
|             r"(IntervalIndex|Index)\(...\) must be called with a collection of " | ||||
|             "some kind, 5 was passed" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             klass(5) | ||||
|  | ||||
|         # not an interval; dtype depends on 32bit/windows builds | ||||
|         msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             klass([0, 1]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, closed", | ||||
|         [ | ||||
|             ([], "both"), | ||||
|             ([np.nan, np.nan], "neither"), | ||||
|             ( | ||||
|                 [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")], | ||||
|                 "left", | ||||
|             ), | ||||
|             ( | ||||
|                 [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")], | ||||
|                 "neither", | ||||
|             ), | ||||
|             (IntervalIndex.from_breaks(range(5), closed="both"), "right"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_override_inferred_closed(self, constructor, data, closed): | ||||
|         # GH 19370 | ||||
|         if isinstance(data, IntervalIndex): | ||||
|             tuples = data.to_tuples() | ||||
|         else: | ||||
|             tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] | ||||
|         expected = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = constructor(data, closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values_constructor", [list, np.array, IntervalIndex, IntervalArray] | ||||
|     ) | ||||
|     def test_index_object_dtype(self, values_constructor): | ||||
|         # Index(intervals, dtype=object) is an Index (not an IntervalIndex) | ||||
|         intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] | ||||
|         values = values_constructor(intervals) | ||||
|         result = Index(values, dtype=object) | ||||
|  | ||||
|         assert type(result) is Index | ||||
|         tm.assert_numpy_array_equal(result.values, np.array(values)) | ||||
|  | ||||
|     def test_index_mixed_closed(self): | ||||
|         # GH27172 | ||||
|         intervals = [ | ||||
|             Interval(0, 1, closed="left"), | ||||
|             Interval(1, 2, closed="right"), | ||||
|             Interval(2, 3, closed="neither"), | ||||
|             Interval(3, 4, closed="both"), | ||||
|         ] | ||||
|         result = Index(intervals) | ||||
|         expected = Index(intervals, dtype=object) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"]) | ||||
| def test_interval_index_subtype(timezone, inclusive_endpoints_fixture): | ||||
|     # GH#46999 | ||||
|     dates = date_range("2022", periods=3, tz=timezone) | ||||
|     dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]" | ||||
|     result = IntervalIndex.from_arrays( | ||||
|         ["2022-01-01", "2022-01-02"], | ||||
|         ["2022-01-02", "2022-01-03"], | ||||
|         closed=inclusive_endpoints_fixture, | ||||
|         dtype=dtype, | ||||
|     ) | ||||
|     expected = IntervalIndex.from_arrays( | ||||
|         dates[:-1], dates[1:], closed=inclusive_endpoints_fixture | ||||
|     ) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_dtype_closed_mismatch(): | ||||
|     # GH#38394 closed specified in both dtype and IntervalIndex constructor | ||||
|  | ||||
|     dtype = IntervalDtype(np.int64, "left") | ||||
|  | ||||
|     msg = "closed keyword does not match dtype.closed" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         IntervalIndex([], dtype=dtype, closed="neither") | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         IntervalArray([], dtype=dtype, closed="neither") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     ["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))], | ||||
| ) | ||||
| def test_ea_dtype(dtype): | ||||
|     # GH#56765 | ||||
|     bins = [(0.0, 0.4), (0.4, 0.6)] | ||||
|     interval_dtype = IntervalDtype(subtype=dtype, closed="left") | ||||
|     result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype) | ||||
|     assert result.dtype == interval_dtype | ||||
|     expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype) | ||||
|     tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,36 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     IntervalIndex, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestEquals: | ||||
|     def test_equals(self, closed): | ||||
|         expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) | ||||
|         assert expected.equals(expected) | ||||
|         assert expected.equals(expected.copy()) | ||||
|  | ||||
|         assert not expected.equals(expected.astype(object)) | ||||
|         assert not expected.equals(np.array(expected)) | ||||
|         assert not expected.equals(list(expected)) | ||||
|  | ||||
|         assert not expected.equals([1, 2]) | ||||
|         assert not expected.equals(np.array([1, 2])) | ||||
|         assert not expected.equals(date_range("20130101", periods=2)) | ||||
|  | ||||
|         expected_name1 = IntervalIndex.from_breaks( | ||||
|             np.arange(5), closed=closed, name="foo" | ||||
|         ) | ||||
|         expected_name2 = IntervalIndex.from_breaks( | ||||
|             np.arange(5), closed=closed, name="bar" | ||||
|         ) | ||||
|         assert expected.equals(expected_name1) | ||||
|         assert expected_name1.equals(expected_name2) | ||||
|  | ||||
|         for other_closed in {"left", "right", "both", "neither"} - {closed}: | ||||
|             expected_other_closed = IntervalIndex.from_breaks( | ||||
|                 np.arange(5), closed=other_closed | ||||
|             ) | ||||
|             assert not expected.equals(expected_other_closed) | ||||
| @ -0,0 +1,119 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIntervalIndexRendering: | ||||
|     # TODO: this is a test for DataFrame/Series, not IntervalIndex | ||||
|     @pytest.mark.parametrize( | ||||
|         "constructor,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 Series, | ||||
|                 ( | ||||
|                     "(0.0, 1.0]    a\n" | ||||
|                     "NaN           b\n" | ||||
|                     "(2.0, 3.0]    c\n" | ||||
|                     "dtype: object" | ||||
|                 ), | ||||
|             ), | ||||
|             (DataFrame, ("            0\n(0.0, 1.0]  a\nNaN         b\n(2.0, 3.0]  c")), | ||||
|         ], | ||||
|     ) | ||||
|     def test_repr_missing(self, constructor, expected, using_infer_string, request): | ||||
|         # GH 25984 | ||||
|         if using_infer_string and constructor is Series: | ||||
|             request.applymarker(pytest.mark.xfail(reason="repr different")) | ||||
|         index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) | ||||
|         obj = constructor(list("abc"), index=index) | ||||
|         result = repr(obj) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_repr_floats(self): | ||||
|         # GH 32553 | ||||
|  | ||||
|         markers = Series( | ||||
|             [1, 2], | ||||
|             index=IntervalIndex( | ||||
|                 [ | ||||
|                     Interval(left, right) | ||||
|                     for left, right in zip( | ||||
|                         Index([329.973, 345.137], dtype="float64"), | ||||
|                         Index([345.137, 360.191], dtype="float64"), | ||||
|                     ) | ||||
|                 ] | ||||
|             ), | ||||
|         ) | ||||
|         result = str(markers) | ||||
|         expected = "(329.973, 345.137]    1\n(345.137, 360.191]    2\ndtype: int64" | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples, closed, expected_data", | ||||
|         [ | ||||
|             ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]), | ||||
|             ( | ||||
|                 [(0.5, 1.0), np.nan, (2.0, 3.0)], | ||||
|                 "right", | ||||
|                 ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"], | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     (Timestamp("20180101"), Timestamp("20180102")), | ||||
|                     np.nan, | ||||
|                     ((Timestamp("20180102"), Timestamp("20180103"))), | ||||
|                 ], | ||||
|                 "both", | ||||
|                 [ | ||||
|                     "[2018-01-01 00:00:00, 2018-01-02 00:00:00]", | ||||
|                     "NaN", | ||||
|                     "[2018-01-02 00:00:00, 2018-01-03 00:00:00]", | ||||
|                 ], | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     (Timedelta("0 days"), Timedelta("1 days")), | ||||
|                     (Timedelta("1 days"), Timedelta("2 days")), | ||||
|                     np.nan, | ||||
|                 ], | ||||
|                 "neither", | ||||
|                 [ | ||||
|                     "(0 days 00:00:00, 1 days 00:00:00)", | ||||
|                     "(1 days 00:00:00, 2 days 00:00:00)", | ||||
|                     "NaN", | ||||
|                 ], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_values_for_csv(self, tuples, closed, expected_data): | ||||
|         # GH 28210 | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index._get_values_for_csv(na_rep="NaN") | ||||
|         expected = np.array(expected_data) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_timestamp_with_timezone(self, unit): | ||||
|         # GH 55035 | ||||
|         left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]") | ||||
|         right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]") | ||||
|         index = IntervalIndex.from_arrays(left, right) | ||||
|         result = repr(index) | ||||
|         expected = ( | ||||
|             "IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], " | ||||
|             f"dtype='interval[datetime64[{unit}, UTC], right]')" | ||||
|         ) | ||||
|         assert result == expected | ||||
| @ -0,0 +1,674 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import InvalidIndexError | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     NaT, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     array, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     isna, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetItem: | ||||
|     def test_getitem(self, closed): | ||||
|         idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) | ||||
|         assert idx[0] == Interval(0.0, 1.0, closed=closed) | ||||
|         assert idx[1] == Interval(1.0, 2.0, closed=closed) | ||||
|         assert isna(idx[2]) | ||||
|  | ||||
|         result = idx[0:1] | ||||
|         expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx[0:2] | ||||
|         expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = idx[1:3] | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             (1.0, np.nan), (2.0, np.nan), closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_2d_deprecated(self): | ||||
|         # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable | ||||
|         idx = IntervalIndex.from_breaks(range(11), closed="right") | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             idx[:, None] | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             # GH#44051 | ||||
|             idx[True] | ||||
|         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): | ||||
|             # GH#44051 | ||||
|             idx[False] | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where(self, listlike_box): | ||||
|         klass = listlike_box | ||||
|  | ||||
|         idx = IntervalIndex.from_breaks(range(11), closed="right") | ||||
|         cond = [True] * len(idx) | ||||
|         expected = idx | ||||
|         result = expected.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         cond = [False] + [True] * len(idx[1:]) | ||||
|         expected = IntervalIndex([np.nan] + idx[1:].tolist()) | ||||
|         result = idx.where(klass(cond)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take(self, closed): | ||||
|         index = IntervalIndex.from_breaks(range(11), closed=closed) | ||||
|  | ||||
|         result = index.take(range(10)) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.take([0, 0, 1]) | ||||
|         expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestGetLoc: | ||||
|     @pytest.mark.parametrize("side", ["right", "left", "both", "neither"]) | ||||
|     def test_get_loc_interval(self, closed, side): | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|  | ||||
|         for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: | ||||
|             # if get_loc is supplied an interval, it should only search | ||||
|             # for exact matches, not overlaps or covers, else KeyError. | ||||
|             msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')") | ||||
|             if closed == side: | ||||
|                 if bound == [0, 1]: | ||||
|                     assert idx.get_loc(Interval(0, 1, closed=side)) == 0 | ||||
|                 elif bound == [2, 3]: | ||||
|                     assert idx.get_loc(Interval(2, 3, closed=side)) == 1 | ||||
|                 else: | ||||
|                     with pytest.raises(KeyError, match=msg): | ||||
|                         idx.get_loc(Interval(*bound, closed=side)) | ||||
|             else: | ||||
|                 with pytest.raises(KeyError, match=msg): | ||||
|                     idx.get_loc(Interval(*bound, closed=side)) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) | ||||
|     def test_get_loc_scalar(self, closed, scalar): | ||||
|         # correct = {side: {query: answer}}. | ||||
|         # If query is not in the dict, that query should raise a KeyError | ||||
|         correct = { | ||||
|             "right": {0.5: 0, 1: 0, 2.5: 1, 3: 1}, | ||||
|             "left": {0: 0, 0.5: 0, 2: 1, 2.5: 1}, | ||||
|             "both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1}, | ||||
|             "neither": {0.5: 0, 2.5: 1}, | ||||
|         } | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|  | ||||
|         # if get_loc is supplied a scalar, it should return the index of | ||||
|         # the interval which contains the scalar, or KeyError. | ||||
|         if scalar in correct[closed].keys(): | ||||
|             assert idx.get_loc(scalar) == correct[closed][scalar] | ||||
|         else: | ||||
|             with pytest.raises(KeyError, match=str(scalar)): | ||||
|                 idx.get_loc(scalar) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6]) | ||||
|     def test_get_loc_length_one_scalar(self, scalar, closed): | ||||
|         # GH 20921 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         if scalar in index[0]: | ||||
|             result = index.get_loc(scalar) | ||||
|             assert result == 0 | ||||
|         else: | ||||
|             with pytest.raises(KeyError, match=str(scalar)): | ||||
|                 index.get_loc(scalar) | ||||
|  | ||||
|     @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) | ||||
|     @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)]) | ||||
|     def test_get_loc_length_one_interval(self, left, right, closed, other_closed): | ||||
|         # GH 20921 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         interval = Interval(left, right, closed=other_closed) | ||||
|         if interval == index[0]: | ||||
|             result = index.get_loc(interval) | ||||
|             assert result == 0 | ||||
|         else: | ||||
|             with pytest.raises( | ||||
|                 KeyError, | ||||
|                 match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"), | ||||
|             ): | ||||
|                 index.get_loc(interval) | ||||
|  | ||||
|     # Make consistent with test_interval_new.py (see #16316, #16386) | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             date_range("20180101", periods=4), | ||||
|             date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|             timedelta_range("0 days", periods=4), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_get_loc_datetimelike_nonoverlapping(self, breaks): | ||||
|         # GH 20636 | ||||
|         # nonoverlapping = IntervalIndex method and no i8 conversion | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         value = index[0].mid | ||||
|         result = index.get_loc(value) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|         interval = Interval(index[0].left, index[0].right) | ||||
|         result = index.get_loc(interval) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arrays", | ||||
|         [ | ||||
|             (date_range("20180101", periods=4), date_range("20180103", periods=4)), | ||||
|             ( | ||||
|                 date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|                 date_range("20180103", periods=4, tz="US/Eastern"), | ||||
|             ), | ||||
|             ( | ||||
|                 timedelta_range("0 days", periods=4), | ||||
|                 timedelta_range("2 days", periods=4), | ||||
|             ), | ||||
|         ], | ||||
|         ids=lambda x: str(x[0].dtype), | ||||
|     ) | ||||
|     def test_get_loc_datetimelike_overlapping(self, arrays): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_arrays(*arrays) | ||||
|  | ||||
|         value = index[0].mid + Timedelta("12 hours") | ||||
|         result = index.get_loc(value) | ||||
|         expected = slice(0, 2, None) | ||||
|         assert result == expected | ||||
|  | ||||
|         interval = Interval(index[0].left, index[0].right) | ||||
|         result = index.get_loc(interval) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values", | ||||
|         [ | ||||
|             date_range("2018-01-04", periods=4, freq="-1D"), | ||||
|             date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"), | ||||
|             timedelta_range("3 days", periods=4, freq="-1D"), | ||||
|             np.arange(3.0, -1.0, -1.0), | ||||
|             np.arange(3, -1, -1), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_get_loc_decreasing(self, values): | ||||
|         # GH 25860 | ||||
|         index = IntervalIndex.from_arrays(values[1:], values[:-1]) | ||||
|         result = index.get_loc(index[0]) | ||||
|         expected = 0 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("key", [[5], (2, 3)]) | ||||
|     def test_get_loc_non_scalar_errors(self, key): | ||||
|         # GH 31117 | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) | ||||
|  | ||||
|         msg = str(key) | ||||
|         with pytest.raises(InvalidIndexError, match=msg): | ||||
|             idx.get_loc(key) | ||||
|  | ||||
|     def test_get_indexer_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, Interval(1, 2), np.nan]) | ||||
|  | ||||
|         expected = np.array([True, False, True]) | ||||
|         for key in [None, np.nan, NA]: | ||||
|             assert key in index | ||||
|             result = index.get_loc(key) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]: | ||||
|             with pytest.raises(KeyError, match=str(key)): | ||||
|                 index.get_loc(key) | ||||
|  | ||||
|  | ||||
| class TestGetIndexer: | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([Interval(2, 4, closed="right")], [1]), | ||||
|             ([Interval(2, 4, closed="left")], [-1]), | ||||
|             ([Interval(2, 4, closed="both")], [-1]), | ||||
|             ([Interval(2, 4, closed="neither")], [-1]), | ||||
|             ([Interval(1, 4, closed="right")], [-1]), | ||||
|             ([Interval(0, 4, closed="right")], [-1]), | ||||
|             ([Interval(0.5, 1.5, closed="right")], [-1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]), | ||||
|             ([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]), | ||||
|             ([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_with_interval(self, query, expected): | ||||
|         tuples = [(0, 2), (2, 4), (5, 7)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="right") | ||||
|  | ||||
|         result = index.get_indexer(query) | ||||
|         expected = np.array(expected, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([-0.5], [-1]), | ||||
|             ([0], [-1]), | ||||
|             ([0.5], [0]), | ||||
|             ([1], [0]), | ||||
|             ([1.5], [1]), | ||||
|             ([2], [1]), | ||||
|             ([2.5], [-1]), | ||||
|             ([3], [-1]), | ||||
|             ([3.5], [2]), | ||||
|             ([4], [2]), | ||||
|             ([4.5], [-1]), | ||||
|             ([1, 2], [0, 1]), | ||||
|             ([1, 2, 3], [0, 1, -1]), | ||||
|             ([1, 2, 3, 4], [0, 1, -1, 2]), | ||||
|             ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_with_int_and_float(self, query, expected): | ||||
|         tuples = [(0, 1), (1, 2), (3, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="right") | ||||
|  | ||||
|         result = index.get_indexer(query) | ||||
|         expected = np.array(expected, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)]) | ||||
|     def test_get_indexer_length_one(self, item, closed): | ||||
|         # GH 17284 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         result = index.get_indexer(item) | ||||
|         expected = np.array([0] * len(item), dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("size", [1, 5]) | ||||
|     def test_get_indexer_length_one_interval(self, size, closed): | ||||
|         # GH 17284 | ||||
|         index = IntervalIndex.from_tuples([(0, 5)], closed=closed) | ||||
|         result = index.get_indexer([Interval(0, 5, closed)] * size) | ||||
|         expected = np.array([0] * size, dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "target", | ||||
|         [ | ||||
|             IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]), | ||||
|             IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]), | ||||
|             IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"), | ||||
|             [-1, 0, 0.5, 1, 2, 2.5, np.nan], | ||||
|             ["foo", "foo", "bar", "baz"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_categorical(self, target, ordered): | ||||
|         # GH 30063: categorical and non-categorical results should be consistent | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) | ||||
|         categorical_target = CategoricalIndex(target, ordered=ordered) | ||||
|  | ||||
|         result = index.get_indexer(categorical_target) | ||||
|         expected = index.get_indexer(target) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_get_indexer_categorical_with_nans(self): | ||||
|         # GH#41934 nans in both index and in target | ||||
|         ii = IntervalIndex.from_breaks(range(5)) | ||||
|         ii2 = ii.append(IntervalIndex([np.nan])) | ||||
|         ci2 = CategoricalIndex(ii2) | ||||
|  | ||||
|         result = ii2.get_indexer(ci2) | ||||
|         expected = np.arange(5, dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # not-all-matches | ||||
|         result = ii2[1:].get_indexer(ci2[::-1]) | ||||
|         expected = np.array([3, 2, 1, 0, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # non-unique target, non-unique nans | ||||
|         result = ii2.get_indexer(ci2.append(ci2)) | ||||
|         expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_datetime(self): | ||||
|         ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4)) | ||||
|         # TODO: with mismatched resolution get_indexer currently raises; | ||||
|         #  this should probably coerce? | ||||
|         target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]") | ||||
|         result = ii.get_indexer(target) | ||||
|         expected = np.array([0], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = ii.get_indexer(target.astype(str)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/47772 | ||||
|         result = ii.get_indexer(target.asi8) | ||||
|         expected = np.array([-1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples, closed", | ||||
|         [ | ||||
|             ([(0, 2), (1, 3), (3, 4)], "neither"), | ||||
|             ([(0, 5), (1, 4), (6, 7)], "left"), | ||||
|             ([(0, 1), (0, 1), (1, 2)], "right"), | ||||
|             ([(0, 1), (2, 3), (3, 4)], "both"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_errors(self, tuples, closed): | ||||
|         # IntervalIndex needs non-overlapping for uniqueness when querying | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|  | ||||
|         msg = ( | ||||
|             "cannot handle overlapping indices; use " | ||||
|             "IntervalIndex.get_indexer_non_unique" | ||||
|         ) | ||||
|         with pytest.raises(InvalidIndexError, match=msg): | ||||
|             index.get_indexer([0, 2]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "query, expected", | ||||
|         [ | ||||
|             ([-0.5], ([-1], [0])), | ||||
|             ([0], ([0], [])), | ||||
|             ([0.5], ([0], [])), | ||||
|             ([1], ([0, 1], [])), | ||||
|             ([1.5], ([0, 1], [])), | ||||
|             ([2], ([0, 1, 2], [])), | ||||
|             ([2.5], ([1, 2], [])), | ||||
|             ([3], ([2], [])), | ||||
|             ([3.5], ([2], [])), | ||||
|             ([4], ([-1], [0])), | ||||
|             ([4.5], ([-1], [0])), | ||||
|             ([1, 2], ([0, 1, 0, 1, 2], [])), | ||||
|             ([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])), | ||||
|             ([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])), | ||||
|             ([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_get_indexer_non_unique_with_int_and_float(self, query, expected): | ||||
|         tuples = [(0, 2.5), (1, 3), (2, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed="left") | ||||
|  | ||||
|         result_indexer, result_missing = index.get_indexer_non_unique(query) | ||||
|         expected_indexer = np.array(expected[0], dtype="intp") | ||||
|         expected_missing = np.array(expected[1], dtype="intp") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result_indexer, expected_indexer) | ||||
|         tm.assert_numpy_array_equal(result_missing, expected_missing) | ||||
|  | ||||
|         # TODO we may also want to test get_indexer for the case when | ||||
|         # the intervals are duplicated, decreasing, non-monotonic, etc.. | ||||
|  | ||||
|     def test_get_indexer_non_monotonic(self): | ||||
|         # GH 16410 | ||||
|         idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) | ||||
|         idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) | ||||
|         result = idx1.get_indexer(idx2) | ||||
|         expected = np.array([2, 0, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = idx1.get_indexer(idx1[1:]) | ||||
|         expected = np.array([1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|         other = IntervalIndex([np.nan]) | ||||
|  | ||||
|         assert not index._index_as_unique | ||||
|  | ||||
|         result = index.get_indexer_for(other) | ||||
|         expected = np.array([0, 1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_index_non_unique_non_monotonic(self): | ||||
|         # GH#44084 (root cause) | ||||
|         index = IntervalIndex.from_tuples( | ||||
|             [(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)] | ||||
|         ) | ||||
|  | ||||
|         result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)]) | ||||
|         expected = np.array([1, 3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_multiindex_with_intervals(self): | ||||
|         # GH#44084 (MultiIndex case as reported) | ||||
|         interval_index = IntervalIndex.from_tuples( | ||||
|             [(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval" | ||||
|         ) | ||||
|         foo_index = Index([1, 2, 3], name="foo") | ||||
|  | ||||
|         multi_index = MultiIndex.from_product([foo_index, interval_index]) | ||||
|  | ||||
|         result = multi_index.get_level_values("interval").get_indexer_for( | ||||
|             [Interval(0.0, 1.0)] | ||||
|         ) | ||||
|         expected = np.array([1, 4, 7], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [IntervalIndex, array, list]) | ||||
|     def test_get_indexer_interval_index(self, box): | ||||
|         # GH#30178 | ||||
|         rng = period_range("2022-07-01", freq="D", periods=3) | ||||
|         idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3)) | ||||
|  | ||||
|         actual = rng.get_indexer(idx) | ||||
|         expected = np.array([-1, -1, -1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     def test_get_indexer_read_only(self): | ||||
|         idx = interval_range(start=0, end=5) | ||||
|         arr = np.array([1, 2]) | ||||
|         arr.flags.writeable = False | ||||
|         result = idx.get_indexer(arr) | ||||
|         expected = np.array([0, 1]) | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|         result = idx.get_indexer_non_unique(arr)[0] | ||||
|         tm.assert_numpy_array_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|  | ||||
| class TestSliceLocs: | ||||
|     def test_slice_locs_with_interval(self): | ||||
|         # increasing monotonically | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 1) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) | ||||
|  | ||||
|         # decreasing monotonically | ||||
|         index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (2, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 1) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) | ||||
|  | ||||
|         # sorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 2) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) | ||||
|  | ||||
|         # unsorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get left slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get left slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(0, 2)) | ||||
|  | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 2) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get right slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(end=Interval(0, 2)) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=re.escape( | ||||
|                 '"Cannot get right slice bound for non-unique label: ' | ||||
|                 "Interval(0, 2, closed='right')\"" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) | ||||
|  | ||||
|         # another unsorted duplicates | ||||
|         index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) | ||||
|  | ||||
|         assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(start=Interval(0, 2)) == (0, 4) | ||||
|         assert index.slice_locs(end=Interval(2, 4)) == (0, 3) | ||||
|         assert index.slice_locs(end=Interval(0, 2)) == (0, 2) | ||||
|         assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) | ||||
|  | ||||
|     def test_slice_locs_with_ints_and_floats_succeeds(self): | ||||
|         # increasing non-overlapping | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) | ||||
|  | ||||
|         assert index.slice_locs(0, 1) == (0, 1) | ||||
|         assert index.slice_locs(0, 2) == (0, 2) | ||||
|         assert index.slice_locs(0, 3) == (0, 2) | ||||
|         assert index.slice_locs(3, 1) == (2, 1) | ||||
|         assert index.slice_locs(3, 4) == (2, 3) | ||||
|         assert index.slice_locs(0, 4) == (0, 3) | ||||
|  | ||||
|         # decreasing non-overlapping | ||||
|         index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) | ||||
|         assert index.slice_locs(0, 1) == (3, 3) | ||||
|         assert index.slice_locs(0, 2) == (3, 2) | ||||
|         assert index.slice_locs(0, 3) == (3, 1) | ||||
|         assert index.slice_locs(3, 1) == (1, 3) | ||||
|         assert index.slice_locs(3, 4) == (1, 1) | ||||
|         assert index.slice_locs(0, 4) == (3, 1) | ||||
|  | ||||
|     @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             [(0, 2), (1, 3), (2, 4)], | ||||
|             [(2, 4), (1, 3), (0, 2)], | ||||
|             [(0, 2), (0, 2), (2, 4)], | ||||
|             [(0, 2), (2, 4), (0, 2)], | ||||
|             [(0, 2), (0, 2), (2, 4), (1, 3)], | ||||
|         ], | ||||
|     ) | ||||
|     def test_slice_locs_with_ints_and_floats_errors(self, tuples, query): | ||||
|         start, stop = query | ||||
|         index = IntervalIndex.from_tuples(tuples) | ||||
|         with pytest.raises( | ||||
|             KeyError, | ||||
|             match=( | ||||
|                 "'can only get slices from an IntervalIndex if bounds are " | ||||
|                 "non-overlapping and all monotonic increasing or decreasing'" | ||||
|             ), | ||||
|         ): | ||||
|             index.slice_locs(start, stop) | ||||
|  | ||||
|  | ||||
| class TestPutmask: | ||||
|     @pytest.mark.parametrize("tz", ["US/Pacific", None]) | ||||
|     def test_putmask_dt64(self, tz): | ||||
|         # GH#37968 | ||||
|         dti = date_range("2016-01-01", periods=9, tz=tz) | ||||
|         idx = IntervalIndex.from_breaks(dti) | ||||
|         mask = np.zeros(idx.shape, dtype=bool) | ||||
|         mask[0:3] = True | ||||
|  | ||||
|         result = idx.putmask(mask, idx[-1]) | ||||
|         expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:])) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_putmask_td64(self): | ||||
|         # GH#37968 | ||||
|         dti = date_range("2016-01-01", periods=9) | ||||
|         tdi = dti - dti[0] | ||||
|         idx = IntervalIndex.from_breaks(tdi) | ||||
|         mask = np.zeros(idx.shape, dtype=bool) | ||||
|         mask[0:3] = True | ||||
|  | ||||
|         result = idx.putmask(mask, idx[-1]) | ||||
|         expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:])) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     # .__contains__, not .contains | ||||
|  | ||||
|     def test_contains_dunder(self): | ||||
|         index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") | ||||
|  | ||||
|         # __contains__ requires perfect matches to intervals. | ||||
|         assert 0 not in index | ||||
|         assert 1 not in index | ||||
|         assert 2 not in index | ||||
|  | ||||
|         assert Interval(0, 1, closed="right") in index | ||||
|         assert Interval(0, 2, closed="right") not in index | ||||
|         assert Interval(0, 0.5, closed="right") not in index | ||||
|         assert Interval(3, 5, closed="right") not in index | ||||
|         assert Interval(-1, 0, closed="left") not in index | ||||
|         assert Interval(0, 1, closed="left") not in index | ||||
|         assert Interval(0, 1, closed="both") not in index | ||||
| @ -0,0 +1,918 @@ | ||||
| from itertools import permutations | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     isna, | ||||
|     notna, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     index = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|  | ||||
|     def create_index(self, closed="right"): | ||||
|         return IntervalIndex.from_breaks(range(11), closed=closed) | ||||
|  | ||||
|     def create_index_with_nan(self, closed="right"): | ||||
|         mask = [True, False] + [True] * 8 | ||||
|         return IntervalIndex.from_arrays( | ||||
|             np.where(mask, np.arange(10), np.nan), | ||||
|             np.where(mask, np.arange(1, 11), np.nan), | ||||
|             closed=closed, | ||||
|         ) | ||||
|  | ||||
|     def test_properties(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|         assert len(index) == 10 | ||||
|         assert index.size == 10 | ||||
|         assert index.shape == (10,) | ||||
|  | ||||
|         tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64))) | ||||
|         tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64))) | ||||
|         tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64))) | ||||
|  | ||||
|         assert index.closed == closed | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) | ||||
|             for left, right in zip(range(10), range(1, 11)) | ||||
|         ] | ||||
|         expected = np.array(ivs, dtype=object) | ||||
|         tm.assert_numpy_array_equal(np.asarray(index), expected) | ||||
|  | ||||
|         # with nans | ||||
|         index = self.create_index_with_nan(closed=closed) | ||||
|         assert len(index) == 10 | ||||
|         assert index.size == 10 | ||||
|         assert index.shape == (10,) | ||||
|  | ||||
|         expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) | ||||
|         expected_right = expected_left + 1 | ||||
|         expected_mid = expected_left + 0.5 | ||||
|         tm.assert_index_equal(index.left, expected_left) | ||||
|         tm.assert_index_equal(index.right, expected_right) | ||||
|         tm.assert_index_equal(index.mid, expected_mid) | ||||
|  | ||||
|         assert index.closed == closed | ||||
|  | ||||
|         ivs = [ | ||||
|             Interval(left, right, closed) if notna(left) else np.nan | ||||
|             for left, right in zip(expected_left, expected_right) | ||||
|         ] | ||||
|         expected = np.array(ivs, dtype=object) | ||||
|         tm.assert_numpy_array_equal(np.asarray(index), expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], | ||||
|             [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], | ||||
|             date_range("2017-01-01", "2017-01-04"), | ||||
|             pytest.param( | ||||
|                 date_range("2017-01-01", "2017-01-04", unit="s"), | ||||
|                 marks=pytest.mark.xfail(reason="mismatched result unit"), | ||||
|             ), | ||||
|             pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_length(self, closed, breaks): | ||||
|         # GH 18789 | ||||
|         index = IntervalIndex.from_breaks(breaks, closed=closed) | ||||
|         result = index.length | ||||
|         expected = Index(iv.length for iv in index) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # with NA | ||||
|         index = index.insert(1, np.nan) | ||||
|         result = index.length | ||||
|         expected = Index(iv.length if notna(iv) else iv for iv in index) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_with_nans(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|         assert index.hasnans is False | ||||
|  | ||||
|         result = index.isna() | ||||
|         expected = np.zeros(len(index), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.notna() | ||||
|         expected = np.ones(len(index), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         index = self.create_index_with_nan(closed=closed) | ||||
|         assert index.hasnans is True | ||||
|  | ||||
|         result = index.isna() | ||||
|         expected = np.array([False, True] + [False] * (len(index) - 2)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.notna() | ||||
|         expected = np.array([True, False] + [True] * (len(index) - 2)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_copy(self, closed): | ||||
|         expected = self.create_index(closed=closed) | ||||
|  | ||||
|         result = expected.copy() | ||||
|         assert result.equals(expected) | ||||
|  | ||||
|         result = expected.copy(deep=True) | ||||
|         assert result.equals(expected) | ||||
|         assert result.left is not expected.left | ||||
|  | ||||
|     def test_ensure_copied_data(self, closed): | ||||
|         # exercise the copy flag in the constructor | ||||
|  | ||||
|         # not copying | ||||
|         index = self.create_index(closed=closed) | ||||
|         result = IntervalIndex(index, copy=False) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.left.values, result.left.values, check_same="same" | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.right.values, result.right.values, check_same="same" | ||||
|         ) | ||||
|  | ||||
|         # by-definition make a copy | ||||
|         result = IntervalIndex(np.array(index), copy=False) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.left.values, result.left.values, check_same="copy" | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             index.right.values, result.right.values, check_same="copy" | ||||
|         ) | ||||
|  | ||||
|     def test_delete(self, closed): | ||||
|         breaks = np.arange(1, 11, dtype=np.int64) | ||||
|         expected = IntervalIndex.from_breaks(breaks, closed=closed) | ||||
|         result = self.create_index(closed=closed).delete(0) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data", | ||||
|         [ | ||||
|             interval_range(0, periods=10, closed="neither"), | ||||
|             interval_range(1.7, periods=8, freq=2.5, closed="both"), | ||||
|             interval_range(Timestamp("20170101"), periods=12, closed="left"), | ||||
|             interval_range(Timedelta("1 day"), periods=6, closed="right"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_insert(self, data): | ||||
|         item = data[0] | ||||
|         idx_item = IntervalIndex([item]) | ||||
|  | ||||
|         # start | ||||
|         expected = idx_item.append(data) | ||||
|         result = data.insert(0, item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # end | ||||
|         expected = data.append(idx_item) | ||||
|         result = data.insert(len(data), item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # mid | ||||
|         expected = data[:3].append(idx_item).append(data[3:]) | ||||
|         result = data.insert(3, item) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # invalid type | ||||
|         res = data.insert(1, "foo") | ||||
|         expected = data.astype(object).insert(1, "foo") | ||||
|         tm.assert_index_equal(res, expected) | ||||
|  | ||||
|         msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             data._data.insert(1, "foo") | ||||
|  | ||||
|         # invalid closed | ||||
|         msg = "'value.closed' is 'left', expected 'right'." | ||||
|         for closed in {"left", "right", "both", "neither"} - {item.closed}: | ||||
|             msg = f"'value.closed' is '{closed}', expected '{item.closed}'." | ||||
|             bad_item = Interval(item.left, item.right, closed=closed) | ||||
|             res = data.insert(1, bad_item) | ||||
|             expected = data.astype(object).insert(1, bad_item) | ||||
|             tm.assert_index_equal(res, expected) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 data._data.insert(1, bad_item) | ||||
|  | ||||
|         # GH 18295 (test missing) | ||||
|         na_idx = IntervalIndex([np.nan], closed=data.closed) | ||||
|         for na in [np.nan, None, pd.NA]: | ||||
|             expected = data[:1].append(na_idx).append(data[1:]) | ||||
|             result = data.insert(1, na) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         if data.left.dtype.kind not in ["m", "M"]: | ||||
|             # trying to insert pd.NaT into a numeric-dtyped Index should cast | ||||
|             expected = data.astype(object).insert(1, pd.NaT) | ||||
|  | ||||
|             msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 data._data.insert(1, pd.NaT) | ||||
|  | ||||
|         result = data.insert(1, pd.NaT) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_is_unique_interval(self, closed): | ||||
|         """ | ||||
|         Interval specific tests for is_unique in addition to base class tests | ||||
|         """ | ||||
|         # unique overlapping - distinct endpoints | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique overlapping - shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique nested | ||||
|         idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # unique NaN | ||||
|         idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed) | ||||
|         assert idx.is_unique is True | ||||
|  | ||||
|         # non-unique NaN | ||||
|         idx = IntervalIndex.from_tuples( | ||||
|             [(np.nan, np.nan), (np.nan, np.nan)], closed=closed | ||||
|         ) | ||||
|         assert idx.is_unique is False | ||||
|  | ||||
|     def test_monotonic(self, closed): | ||||
|         # increasing non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # unordered non-overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # increasing overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing overlapping | ||||
|         idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # unordered overlapping | ||||
|         idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # increasing overlapping shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is False | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # decreasing overlapping shared endpoints | ||||
|         idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is False | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|         # stationary | ||||
|         idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is False | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is False | ||||
|  | ||||
|         # empty | ||||
|         idx = IntervalIndex([], closed=closed) | ||||
|         assert idx.is_monotonic_increasing is True | ||||
|         assert idx._is_strictly_monotonic_increasing is True | ||||
|         assert idx.is_monotonic_decreasing is True | ||||
|         assert idx._is_strictly_monotonic_decreasing is True | ||||
|  | ||||
|     def test_is_monotonic_with_nans(self): | ||||
|         # GH#41831 | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|  | ||||
|         assert not index.is_monotonic_increasing | ||||
|         assert not index._is_strictly_monotonic_increasing | ||||
|         assert not index.is_monotonic_increasing | ||||
|         assert not index._is_strictly_monotonic_decreasing | ||||
|         assert not index.is_monotonic_decreasing | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [ | ||||
|             date_range("20180101", periods=4), | ||||
|             date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|             timedelta_range("0 days", periods=4), | ||||
|         ], | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     def test_maybe_convert_i8(self, breaks): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         # intervalindex | ||||
|         result = index._maybe_convert_i8(index) | ||||
|         expected = IntervalIndex.from_breaks(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # interval | ||||
|         interval = Interval(breaks[0], breaks[1]) | ||||
|         result = index._maybe_convert_i8(interval) | ||||
|         expected = Interval(breaks[0]._value, breaks[1]._value) | ||||
|         assert result == expected | ||||
|  | ||||
|         # datetimelike index | ||||
|         result = index._maybe_convert_i8(breaks) | ||||
|         expected = Index(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # datetimelike scalar | ||||
|         result = index._maybe_convert_i8(breaks[0]) | ||||
|         expected = breaks[0]._value | ||||
|         assert result == expected | ||||
|  | ||||
|         # list-like of datetimelike scalars | ||||
|         result = index._maybe_convert_i8(list(breaks)) | ||||
|         expected = Index(breaks.asi8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks", | ||||
|         [date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_nat(self, breaks): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns") | ||||
|         expected = Index([np.nan] * 3, dtype=np.float64) | ||||
|         result = index._maybe_convert_i8(to_convert) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         to_convert = to_convert.insert(0, breaks[0]) | ||||
|         expected = expected.insert(0, float(breaks[0]._value)) | ||||
|         result = index._maybe_convert_i8(to_convert) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [lambda breaks: breaks, list], | ||||
|         ids=["lambda", "list"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype): | ||||
|         # GH 20636 | ||||
|         breaks = np.arange(5, dtype=any_real_numpy_dtype) | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|         key = make_key(breaks) | ||||
|  | ||||
|         result = index._maybe_convert_i8(key) | ||||
|         kind = breaks.dtype.kind | ||||
|         expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind] | ||||
|         expected = Index(key, dtype=expected_dtype) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [ | ||||
|             IntervalIndex.from_breaks, | ||||
|             lambda breaks: Interval(breaks[0], breaks[1]), | ||||
|             lambda breaks: breaks[0], | ||||
|         ], | ||||
|         ids=["IntervalIndex", "Interval", "scalar"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype): | ||||
|         # GH 20636 | ||||
|         breaks = np.arange(5, dtype=any_real_numpy_dtype) | ||||
|         index = IntervalIndex.from_breaks(breaks) | ||||
|         key = make_key(breaks) | ||||
|  | ||||
|         # test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex | ||||
|         result = index._maybe_convert_i8(key) | ||||
|         assert result is key | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "breaks1, breaks2", | ||||
|         permutations( | ||||
|             [ | ||||
|                 date_range("20180101", periods=4), | ||||
|                 date_range("20180101", periods=4, tz="US/Eastern"), | ||||
|                 timedelta_range("0 days", periods=4), | ||||
|             ], | ||||
|             2, | ||||
|         ), | ||||
|         ids=lambda x: str(x.dtype), | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "make_key", | ||||
|         [ | ||||
|             IntervalIndex.from_breaks, | ||||
|             lambda breaks: Interval(breaks[0], breaks[1]), | ||||
|             lambda breaks: breaks, | ||||
|             lambda breaks: breaks[0], | ||||
|             list, | ||||
|         ], | ||||
|         ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], | ||||
|     ) | ||||
|     def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): | ||||
|         # GH 20636 | ||||
|         index = IntervalIndex.from_breaks(breaks1) | ||||
|         key = make_key(breaks2) | ||||
|  | ||||
|         msg = ( | ||||
|             f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " | ||||
|             f"values of dtype {breaks2.dtype}" | ||||
|         ) | ||||
|         msg = re.escape(msg) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index._maybe_convert_i8(key) | ||||
|  | ||||
|     def test_contains_method(self): | ||||
|         # can select values that are IN the range of a value | ||||
|         i = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|  | ||||
|         expected = np.array([False, False], dtype="bool") | ||||
|         actual = i.contains(0) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = i.contains(3) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         expected = np.array([True, False], dtype="bool") | ||||
|         actual = i.contains(0.5) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = i.contains(1) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         # __contains__ not implemented for "interval in interval", follow | ||||
|         # that for the contains method for now | ||||
|         with pytest.raises( | ||||
|             NotImplementedError, match="contains not implemented for two" | ||||
|         ): | ||||
|             i.contains(Interval(0, 1)) | ||||
|  | ||||
|     def test_dropna(self, closed): | ||||
|         expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) | ||||
|  | ||||
|         ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) | ||||
|         result = ii.dropna() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) | ||||
|         result = ii.dropna() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_non_contiguous(self, closed): | ||||
|         index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) | ||||
|         target = [0.5, 1.5, 2.5] | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([0, -1, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         assert 1.5 not in index | ||||
|  | ||||
|     def test_isin(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|  | ||||
|         expected = np.array([True] + [False] * (len(index) - 1)) | ||||
|         result = index.isin(index[:1]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.isin([index[0]]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) | ||||
|         expected = np.array([True] * (len(index) - 1) + [False]) | ||||
|         result = index.isin(other) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = index.isin(other.tolist()) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         for other_closed in ["right", "left", "both", "neither"]: | ||||
|             other = self.create_index(closed=other_closed) | ||||
|             expected = np.repeat(closed == other_closed, len(index)) | ||||
|             result = index.isin(other) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|             result = index.isin(other.tolist()) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_comparison(self): | ||||
|         actual = Interval(0, 1) < self.index | ||||
|         expected = np.array([False, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = Interval(0.5, 1.5) < self.index | ||||
|         expected = np.array([False, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index > Interval(0.5, 1.5) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == self.index | ||||
|         expected = np.array([True, True]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index <= self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index >= self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index < self.index | ||||
|         expected = np.array([False, False]) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|         actual = self.index > self.index | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         actual = self.index == self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index.values == self.index | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index <= self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([True, True])) | ||||
|         actual = self.index != self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index > self.index.values | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index.values > self.index | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|  | ||||
|         # invalid comparisons | ||||
|         actual = self.index == 0 | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|         actual = self.index == self.index.left | ||||
|         tm.assert_numpy_array_equal(actual, np.array([False, False])) | ||||
|  | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "not supported between instances of 'int' and '.*.Interval'", | ||||
|                 r"Invalid comparison between dtype=interval\[int64, right\] and ", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index > 0 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index <= 0 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             self.index > np.arange(2) | ||||
|  | ||||
|         msg = "Lengths must match to compare" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             self.index > np.arange(3) | ||||
|  | ||||
|     def test_missing_values(self, closed): | ||||
|         idx = Index( | ||||
|             [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] | ||||
|         ) | ||||
|         idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) | ||||
|         assert idx.equals(idx2) | ||||
|  | ||||
|         msg = ( | ||||
|             "missing values must be missing in the same location both left " | ||||
|             "and right sides" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntervalIndex.from_arrays( | ||||
|                 [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed | ||||
|             ) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) | ||||
|  | ||||
|     def test_sort_values(self, closed): | ||||
|         index = self.create_index(closed=closed) | ||||
|  | ||||
|         result = index.sort_values() | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         result = index.sort_values(ascending=False) | ||||
|         tm.assert_index_equal(result, index[::-1]) | ||||
|  | ||||
|         # with nan | ||||
|         index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) | ||||
|  | ||||
|         result = index.sort_values() | ||||
|         expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index.sort_values(ascending=False, na_position="first") | ||||
|         expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     def test_datetime(self, tz): | ||||
|         start = Timestamp("2000-01-01", tz=tz) | ||||
|         dates = date_range(start=start, periods=10) | ||||
|         index = IntervalIndex.from_breaks(dates) | ||||
|  | ||||
|         # test mid | ||||
|         start = Timestamp("2000-01-01T12:00", tz=tz) | ||||
|         expected = date_range(start=start, periods=9) | ||||
|         tm.assert_index_equal(index.mid, expected) | ||||
|  | ||||
|         # __contains__ doesn't check individual points | ||||
|         assert Timestamp("2000-01-01", tz=tz) not in index | ||||
|         assert Timestamp("2000-01-01T12", tz=tz) not in index | ||||
|         assert Timestamp("2000-01-02", tz=tz) not in index | ||||
|         iv_true = Interval( | ||||
|             Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) | ||||
|         ) | ||||
|         iv_false = Interval( | ||||
|             Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) | ||||
|         ) | ||||
|         assert iv_true in index | ||||
|         assert iv_false not in index | ||||
|  | ||||
|         # .contains does check individual points | ||||
|         assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() | ||||
|         assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() | ||||
|         assert index.contains(Timestamp("2000-01-02", tz=tz)).any() | ||||
|  | ||||
|         # test get_indexer | ||||
|         start = Timestamp("1999-12-31T12:00", tz=tz) | ||||
|         target = date_range(start=start, periods=7, freq="12h") | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|         start = Timestamp("2000-01-08T18:00", tz=tz) | ||||
|         target = date_range(start=start, periods=7, freq="6h") | ||||
|         actual = index.get_indexer(target) | ||||
|         expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     def test_append(self, closed): | ||||
|         index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) | ||||
|         index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) | ||||
|  | ||||
|         result = index1.append(index2) | ||||
|         expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = index1.append([index1, index2]) | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         for other_closed in {"left", "right", "both", "neither"} - {closed}: | ||||
|             index_other_closed = IntervalIndex.from_arrays( | ||||
|                 [0, 1], [1, 2], closed=other_closed | ||||
|             ) | ||||
|             result = index1.append(index_other_closed) | ||||
|             expected = index1.astype(object).append(index_other_closed.astype(object)) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_is_non_overlapping_monotonic(self, closed): | ||||
|         # Should be True in all cases | ||||
|         tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|         # Should be False in all cases (overlapping) | ||||
|         tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         # Should be False in all cases (non-monotonic) | ||||
|         tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] | ||||
|         idx = IntervalIndex.from_tuples(tpls, closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) | ||||
|         assert idx.is_non_overlapping_monotonic is False | ||||
|  | ||||
|         # Should be False for closed='both', otherwise True (GH16560) | ||||
|         if closed == "both": | ||||
|             idx = IntervalIndex.from_breaks(range(4), closed=closed) | ||||
|             assert idx.is_non_overlapping_monotonic is False | ||||
|         else: | ||||
|             idx = IntervalIndex.from_breaks(range(4), closed=closed) | ||||
|             assert idx.is_non_overlapping_monotonic is True | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, shift, na_value", | ||||
|         [ | ||||
|             (0, 1, np.nan), | ||||
|             (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), | ||||
|             (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_is_overlapping(self, start, shift, na_value, closed): | ||||
|         # GH 23309 | ||||
|         # see test_interval_tree.py for extensive tests; interface tests here | ||||
|  | ||||
|         # non-overlapping | ||||
|         tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is False | ||||
|  | ||||
|         # non-overlapping with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is False | ||||
|  | ||||
|         # overlapping | ||||
|         tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is True | ||||
|  | ||||
|         # overlapping with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         assert index.is_overlapping is True | ||||
|  | ||||
|         # common endpoints | ||||
|         tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index.is_overlapping | ||||
|         expected = closed == "both" | ||||
|         assert result is expected | ||||
|  | ||||
|         # common endpoints with NA | ||||
|         tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] | ||||
|         index = IntervalIndex.from_tuples(tuples, closed=closed) | ||||
|         result = index.is_overlapping | ||||
|         assert result is expected | ||||
|  | ||||
|         # intervals with duplicate left values | ||||
|         a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] | ||||
|         b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] | ||||
|         index = IntervalIndex.from_arrays(a, b, closed="right") | ||||
|         result = index.is_overlapping | ||||
|         assert result is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             list(zip(range(10), range(1, 11))), | ||||
|             list( | ||||
|                 zip( | ||||
|                     date_range("20170101", periods=10), | ||||
|                     date_range("20170101", periods=10), | ||||
|                 ) | ||||
|             ), | ||||
|             list( | ||||
|                 zip( | ||||
|                     timedelta_range("0 days", periods=10), | ||||
|                     timedelta_range("1 day", periods=10), | ||||
|                 ) | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_tuples(self, tuples): | ||||
|         # GH 18756 | ||||
|         idx = IntervalIndex.from_tuples(tuples) | ||||
|         result = idx.to_tuples() | ||||
|         expected = Index(com.asarray_tuplesafe(tuples)) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "tuples", | ||||
|         [ | ||||
|             list(zip(range(10), range(1, 11))) + [np.nan], | ||||
|             list( | ||||
|                 zip( | ||||
|                     date_range("20170101", periods=10), | ||||
|                     date_range("20170101", periods=10), | ||||
|                 ) | ||||
|             ) | ||||
|             + [np.nan], | ||||
|             list( | ||||
|                 zip( | ||||
|                     timedelta_range("0 days", periods=10), | ||||
|                     timedelta_range("1 day", periods=10), | ||||
|                 ) | ||||
|             ) | ||||
|             + [np.nan], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("na_tuple", [True, False]) | ||||
|     def test_to_tuples_na(self, tuples, na_tuple): | ||||
|         # GH 18756 | ||||
|         idx = IntervalIndex.from_tuples(tuples) | ||||
|         result = idx.to_tuples(na_tuple=na_tuple) | ||||
|  | ||||
|         # check the non-NA portion | ||||
|         expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) | ||||
|         result_notna = result[:-1] | ||||
|         tm.assert_index_equal(result_notna, expected_notna) | ||||
|  | ||||
|         # check the NA portion | ||||
|         result_na = result[-1] | ||||
|         if na_tuple: | ||||
|             assert isinstance(result_na, tuple) | ||||
|             assert len(result_na) == 2 | ||||
|             assert all(isna(x) for x in result_na) | ||||
|         else: | ||||
|             assert isna(result_na) | ||||
|  | ||||
|     def test_nbytes(self): | ||||
|         # GH 19209 | ||||
|         left = np.arange(0, 4, dtype="i8") | ||||
|         right = np.arange(1, 5, dtype="i8") | ||||
|  | ||||
|         result = IntervalIndex.from_arrays(left, right).nbytes | ||||
|         expected = 64  # 4 * 8 * 2 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) | ||||
|     def test_set_closed(self, name, closed, new_closed): | ||||
|         # GH 21670 | ||||
|         index = interval_range(0, 5, closed=closed, name=name) | ||||
|         result = index.set_closed(new_closed) | ||||
|         expected = interval_range(0, 5, closed=new_closed, name=name) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) | ||||
|     def test_set_closed_errors(self, bad_closed): | ||||
|         # GH 21670 | ||||
|         index = interval_range(0, 5) | ||||
|         msg = f"invalid option for 'closed': {bad_closed}" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             index.set_closed(bad_closed) | ||||
|  | ||||
|     def test_is_all_dates(self): | ||||
|         # GH 23576 | ||||
|         year_2017 = Interval( | ||||
|             Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00") | ||||
|         ) | ||||
|         year_2017_index = IntervalIndex([year_2017]) | ||||
|         assert not year_2017_index._is_all_dates | ||||
|  | ||||
|  | ||||
| def test_dir(): | ||||
|     # GH#27571 dir(interval_index) should not raise | ||||
|     index = IntervalIndex.from_arrays([0, 1], [1, 2]) | ||||
|     result = dir(index) | ||||
|     assert "str" not in result | ||||
|  | ||||
|  | ||||
| def test_searchsorted_different_argument_classes(listlike_box): | ||||
|     # https://github.com/pandas-dev/pandas/issues/32762 | ||||
|     values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) | ||||
|     result = values.searchsorted(listlike_box(values)) | ||||
|     expected = np.array([0, 1], dtype=result.dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = values._data.searchsorted(listlike_box(values)) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2] | ||||
| ) | ||||
| def test_searchsorted_invalid_argument(arg): | ||||
|     values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) | ||||
|     msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         values.searchsorted(arg) | ||||
| @ -0,0 +1,369 @@ | ||||
| from datetime import timedelta | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_integer | ||||
|  | ||||
| from pandas import ( | ||||
|     DateOffset, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     interval_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.tseries.offsets import Day | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "foo"]) | ||||
| def name(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestIntervalRange: | ||||
|     @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) | ||||
|     def test_constructor_numeric(self, closed, name, freq, periods): | ||||
|         start, end = 0, 100 | ||||
|         breaks = np.arange(101, step=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         result = interval_range( | ||||
|             start=start, end=end, periods=periods, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Eastern"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)] | ||||
|     ) | ||||
|     def test_constructor_timestamp(self, closed, name, freq, periods, tz): | ||||
|         start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) | ||||
|         breaks = date_range(start=start, end=end, freq=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         if not breaks.freq.n == 1 and tz is None: | ||||
|             result = interval_range( | ||||
|                 start=start, end=end, periods=periods, name=name, closed=closed | ||||
|             ) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)] | ||||
|     ) | ||||
|     def test_constructor_timedelta(self, closed, name, freq, periods): | ||||
|         start, end = Timedelta("0 days"), Timedelta("100 days") | ||||
|         breaks = timedelta_range(start=start, end=end, freq=freq) | ||||
|         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         result = interval_range( | ||||
|             start=start, end=end, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         result = interval_range( | ||||
|             start=start, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         result = interval_range( | ||||
|             end=end, periods=periods, freq=freq, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         result = interval_range( | ||||
|             start=start, end=end, periods=periods, name=name, closed=closed | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, end, freq, expected_endpoint", | ||||
|         [ | ||||
|             (0, 10, 3, 9), | ||||
|             (0, 10, 1.5, 9), | ||||
|             (0.5, 10, 3, 9.5), | ||||
|             (Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")), | ||||
|             ( | ||||
|                 Timestamp("2018-01-01"), | ||||
|                 Timestamp("2018-02-09"), | ||||
|                 "MS", | ||||
|                 Timestamp("2018-02-01"), | ||||
|             ), | ||||
|             ( | ||||
|                 Timestamp("2018-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2018-01-20", tz="US/Eastern"), | ||||
|                 "5D12h", | ||||
|                 Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_early_truncation(self, start, end, freq, expected_endpoint): | ||||
|         # index truncates early if freq causes end to be skipped | ||||
|         result = interval_range(start=start, end=end, freq=freq) | ||||
|         result_endpoint = result.right[-1] | ||||
|         assert result_endpoint == expected_endpoint | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, end, freq", | ||||
|         [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], | ||||
|     ) | ||||
|     def test_no_invalid_float_truncation(self, start, end, freq): | ||||
|         # GH 21161 | ||||
|         if freq is None: | ||||
|             breaks = [0.5, 1.5, 2.5, 3.5, 4.5] | ||||
|         else: | ||||
|             breaks = [0.5, 2.0, 3.5, 5.0, 6.5] | ||||
|         expected = IntervalIndex.from_breaks(breaks) | ||||
|  | ||||
|         result = interval_range(start=start, end=end, periods=4, freq=freq) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "start, mid, end", | ||||
|         [ | ||||
|             ( | ||||
|                 Timestamp("2018-03-10", tz="US/Eastern"), | ||||
|                 Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2018-03-12", tz="US/Eastern"), | ||||
|             ), | ||||
|             ( | ||||
|                 Timestamp("2018-11-03", tz="US/Eastern"), | ||||
|                 Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2018-11-05", tz="US/Eastern"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_linspace_dst_transition(self, start, mid, end): | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         # accounts for the hour gained/lost during DST transition | ||||
|         start = start.as_unit("ns") | ||||
|         mid = mid.as_unit("ns") | ||||
|         end = end.as_unit("ns") | ||||
|         result = interval_range(start=start, end=end, periods=2) | ||||
|         expected = IntervalIndex.from_breaks([start, mid, end]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq", [2, 2.0]) | ||||
|     @pytest.mark.parametrize("end", [10, 10.0]) | ||||
|     @pytest.mark.parametrize("start", [0, 0.0]) | ||||
|     def test_float_subtype(self, start, end, freq): | ||||
|         # Has float subtype if any of start/end/freq are float, even if all | ||||
|         # resulting endpoints can safely be upcast to integers | ||||
|  | ||||
|         # defined from start/end/freq | ||||
|         index = interval_range(start=start, end=end, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + end + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # defined from start/periods/freq | ||||
|         index = interval_range(start=start, periods=5, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # defined from end/periods/freq | ||||
|         index = interval_range(end=end, periods=5, freq=freq) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(end + freq) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|         # GH 20976: linspace behavior defined from start/end/periods | ||||
|         index = interval_range(start=start, end=end, periods=5) | ||||
|         result = index.dtype.subtype | ||||
|         expected = "int64" if is_integer(start + end) else "float64" | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_interval_range_fractional_period(self): | ||||
|         # float value for periods | ||||
|         expected = interval_range(start=0, periods=10) | ||||
|         msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = interval_range(start=0, periods=10.5) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_coverage(self): | ||||
|         # equivalent timestamp-like start/end | ||||
|         start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") | ||||
|         expected = interval_range(start=start, end=end) | ||||
|  | ||||
|         result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(start=start.asm8, end=end.asm8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent freq with timestamp | ||||
|         equiv_freq = [ | ||||
|             "D", | ||||
|             Day(), | ||||
|             Timedelta(days=1), | ||||
|             timedelta(days=1), | ||||
|             DateOffset(days=1), | ||||
|         ] | ||||
|         for freq in equiv_freq: | ||||
|             result = interval_range(start=start, end=end, freq=freq) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent timedelta-like start/end | ||||
|         start, end = Timedelta(days=1), Timedelta(days=10) | ||||
|         expected = interval_range(start=start, end=end) | ||||
|  | ||||
|         result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(start=start.asm8, end=end.asm8) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # equivalent freq with timedelta | ||||
|         equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] | ||||
|         for freq in equiv_freq: | ||||
|             result = interval_range(start=start, end=end, freq=freq) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_errors(self): | ||||
|         # not enough params | ||||
|         msg = ( | ||||
|             "Of the four parameters: start, end, periods, and freq, " | ||||
|             "exactly three must be specified" | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=5) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(periods=2) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range() | ||||
|  | ||||
|         # too many params | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0, end=5, periods=6, freq=1.5) | ||||
|  | ||||
|         # mixed units | ||||
|         msg = "start, end, freq need to be type compatible" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=Timestamp("20130101"), freq=2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=Timedelta("1 day"), freq=2) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timestamp("20130101"), end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timedelta("1 day"), end=10, freq="D") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range( | ||||
|                 start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) | ||||
|  | ||||
|         # invalid periods | ||||
|         msg = "periods must be a number, got foo" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=0, periods="foo") | ||||
|  | ||||
|         # invalid start | ||||
|         msg = "start must be numeric or datetime-like, got foo" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start="foo", periods=10) | ||||
|  | ||||
|         # invalid end | ||||
|         msg = r"end must be numeric or datetime-like, got \(0, 1\]" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=Interval(0, 1), periods=10) | ||||
|  | ||||
|         # invalid freq for datetime-like | ||||
|         msg = "freq must be numeric or convertible to DateOffset, got foo" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=0, end=10, freq="foo") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(start=Timestamp("20130101"), periods=10, freq="foo") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             interval_range(end=Timedelta("1 day"), periods=10, freq="foo") | ||||
|  | ||||
|         # mixed tz | ||||
|         start = Timestamp("2017-01-01", tz="US/Eastern") | ||||
|         end = Timestamp("2017-01-07", tz="US/Pacific") | ||||
|         msg = "Start and end cannot both be tz-aware with different timezones" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_range(start=start, end=end) | ||||
|  | ||||
|     def test_float_freq(self): | ||||
|         # GH 54477 | ||||
|         result = interval_range(0, 1, freq=0.1) | ||||
|         expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = interval_range(0, 1, freq=0.6) | ||||
|         expected = IntervalIndex.from_breaks([0, 0.6]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,208 @@ | ||||
| from itertools import permutations | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.interval import IntervalTree | ||||
| from pandas.compat import IS64 | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def skipif_32bit(param): | ||||
|     """ | ||||
|     Skip parameters in a parametrize on 32bit systems. Specifically used | ||||
|     here to skip leaf_size parameters related to GH 23440. | ||||
|     """ | ||||
|     marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") | ||||
|     return pytest.param(param, marks=marks) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["int64", "float64", "uint64"]) | ||||
| def dtype(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) | ||||
| def leaf_size(request): | ||||
|     """ | ||||
|     Fixture to specify IntervalTree leaf_size parameter; to be used with the | ||||
|     tree fixture. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         np.arange(5, dtype="int64"), | ||||
|         np.arange(5, dtype="uint64"), | ||||
|         np.arange(5, dtype="float64"), | ||||
|         np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), | ||||
|     ] | ||||
| ) | ||||
| def tree(request, leaf_size): | ||||
|     left = request.param | ||||
|     return IntervalTree(left, left + 2, leaf_size=leaf_size) | ||||
|  | ||||
|  | ||||
| class TestIntervalTree: | ||||
|     def test_get_indexer(self, tree): | ||||
|         result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) | ||||
|         expected = np.array([0, 4, -1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="'indexer does not intersect a unique set of intervals'" | ||||
|         ): | ||||
|             tree.get_indexer(np.array([3.0])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, target_value, target_dtype", | ||||
|         [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], | ||||
|     ) | ||||
|     def test_get_indexer_overflow(self, dtype, target_value, target_dtype): | ||||
|         left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) | ||||
|         tree = IntervalTree(left, right) | ||||
|  | ||||
|         result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) | ||||
|         expected = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_get_indexer_non_unique(self, tree): | ||||
|         indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) | ||||
|  | ||||
|         result = indexer[:1] | ||||
|         expected = np.array([0], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.sort(indexer[1:3]) | ||||
|         expected = np.array([0, 1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.sort(indexer[3:]) | ||||
|         expected = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = missing | ||||
|         expected = np.array([2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, target_value, target_dtype", | ||||
|         [("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")], | ||||
|     ) | ||||
|     def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): | ||||
|         left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) | ||||
|         tree = IntervalTree(left, right) | ||||
|         target = np.array([target_value], dtype=target_dtype) | ||||
|  | ||||
|         result_indexer, result_missing = tree.get_indexer_non_unique(target) | ||||
|         expected_indexer = np.array([-1], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result_indexer, expected_indexer) | ||||
|  | ||||
|         expected_missing = np.array([0], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result_missing, expected_missing) | ||||
|  | ||||
|     def test_duplicates(self, dtype): | ||||
|         left = np.array([0, 0, 0], dtype=dtype) | ||||
|         tree = IntervalTree(left, left + 1) | ||||
|  | ||||
|         with pytest.raises( | ||||
|             KeyError, match="'indexer does not intersect a unique set of intervals'" | ||||
|         ): | ||||
|             tree.get_indexer(np.array([0.5])) | ||||
|  | ||||
|         indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) | ||||
|         result = np.sort(indexer) | ||||
|         expected = np.array([0, 1, 2], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = missing | ||||
|         expected = np.array([], dtype="intp") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] | ||||
|     ) | ||||
|     def test_get_indexer_closed(self, closed, leaf_size): | ||||
|         x = np.arange(1000, dtype="float64") | ||||
|         found = x.astype("intp") | ||||
|         not_found = (-1 * np.ones(1000)).astype("intp") | ||||
|  | ||||
|         tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) | ||||
|         tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) | ||||
|  | ||||
|         expected = found if tree.closed_left else not_found | ||||
|         tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) | ||||
|  | ||||
|         expected = found if tree.closed_right else not_found | ||||
|         tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right, expected", | ||||
|         [ | ||||
|             (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), | ||||
|             (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), | ||||
|             (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), | ||||
|             (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), | ||||
|             (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) | ||||
|     def test_is_overlapping(self, closed, order, left, right, expected): | ||||
|         # GH 23309 | ||||
|         tree = IntervalTree(left[order], right[order], closed=closed) | ||||
|         result = tree.is_overlapping | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) | ||||
|     def test_is_overlapping_endpoints(self, closed, order): | ||||
|         """shared endpoints are marked as overlapping""" | ||||
|         # GH 23309 | ||||
|         left, right = np.arange(3, dtype="int64"), np.arange(1, 4) | ||||
|         tree = IntervalTree(left[order], right[order], closed=closed) | ||||
|         result = tree.is_overlapping | ||||
|         expected = closed == "both" | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right", | ||||
|         [ | ||||
|             (np.array([], dtype="int64"), np.array([], dtype="int64")), | ||||
|             (np.array([0], dtype="int64"), np.array([1], dtype="int64")), | ||||
|             (np.array([np.nan]), np.array([np.nan])), | ||||
|             (np.array([np.nan] * 3), np.array([np.nan] * 3)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_is_overlapping_trivial(self, closed, left, right): | ||||
|         # GH 23309 | ||||
|         tree = IntervalTree(left, right, closed=closed) | ||||
|         assert tree.is_overlapping is False | ||||
|  | ||||
|     @pytest.mark.skipif(not IS64, reason="GH 23440") | ||||
|     def test_construction_overflow(self): | ||||
|         # GH 25485 | ||||
|         left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 | ||||
|         tree = IntervalTree(left, right) | ||||
|  | ||||
|         # pivot should be average of left/right medians | ||||
|         result = tree.root.pivot | ||||
|         expected = (50 + np.iinfo(np.int64).max) / 2 | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right, expected", | ||||
|         [ | ||||
|             ([-np.inf, 1.0], [1.0, 2.0], 0.0), | ||||
|             ([-np.inf, -2.0], [-2.0, -1.0], -2.0), | ||||
|             ([-2.0, -1.0], [-1.0, np.inf], 0.0), | ||||
|             ([1.0, 2.0], [2.0, np.inf], 2.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_inf_bound_infinite_recursion(self, left, right, expected): | ||||
|         # GH 46658 | ||||
|  | ||||
|         tree = IntervalTree(left * 101, right * 101) | ||||
|  | ||||
|         result = tree.root.pivot | ||||
|         assert result == expected | ||||
| @ -0,0 +1,44 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def range_index(): | ||||
|     return RangeIndex(3, name="range_index") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def interval_index(): | ||||
|     return IntervalIndex.from_tuples( | ||||
|         [(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index" | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index): | ||||
|     #  GH-45661 | ||||
|     multi_index = MultiIndex.from_product([interval_index, range_index]) | ||||
|     result = multi_index.join(interval_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, multi_index) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index): | ||||
|     #  GH-45661 | ||||
|     multi_index = MultiIndex.from_product([interval_index, range_index]) | ||||
|     result = interval_index.join(multi_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, multi_index) | ||||
|  | ||||
|  | ||||
| def test_join_overlapping_interval_to_another_intervalindex(interval_index): | ||||
|     #  GH-45661 | ||||
|     flipped_interval_index = interval_index[::-1] | ||||
|     result = interval_index.join(flipped_interval_index) | ||||
|  | ||||
|     tm.assert_index_equal(result, interval_index) | ||||
| @ -0,0 +1,13 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import IntervalIndex | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestPickle: | ||||
|     @pytest.mark.parametrize("closed", ["left", "right", "both"]) | ||||
|     def test_pickle_round_trip_closed(self, closed): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35658 | ||||
|         idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) | ||||
|         result = tm.round_trip_pickle(idx) | ||||
|         tm.assert_index_equal(result, idx) | ||||
| @ -0,0 +1,208 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Timestamp, | ||||
|     interval_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def monotonic_index(start, end, dtype="int64", closed="right"): | ||||
|     return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) | ||||
|  | ||||
|  | ||||
| def empty_index(dtype="int64", closed="right"): | ||||
|     return IntervalIndex(np.array([], dtype=dtype), closed=closed) | ||||
|  | ||||
|  | ||||
| class TestIntervalIndex: | ||||
|     def test_union(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         other = monotonic_index(5, 13, closed=closed) | ||||
|  | ||||
|         expected = monotonic_index(0, 13, closed=closed) | ||||
|         result = index[::-1].union(other, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         result = other[::-1].union(index, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(index.union(index, sort=sort), index) | ||||
|         tm.assert_index_equal(index.union(index[:1], sort=sort), index) | ||||
|  | ||||
|     def test_union_empty_result(self, closed, sort): | ||||
|         # GH 19101: empty result, same dtype | ||||
|         index = empty_index(dtype="int64", closed=closed) | ||||
|         result = index.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, index) | ||||
|  | ||||
|         # GH 19101: empty result, different numeric dtypes -> common dtype is f8 | ||||
|         other = empty_index(dtype="float64", closed=closed) | ||||
|         result = index.union(other, sort=sort) | ||||
|         expected = other | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = index.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = empty_index(dtype="uint64", closed=closed) | ||||
|         result = index.union(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         result = other.union(index, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         other = monotonic_index(5, 13, closed=closed) | ||||
|  | ||||
|         expected = monotonic_index(5, 11, closed=closed) | ||||
|         result = index[::-1].intersection(other, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         result = other[::-1].intersection(index, sort=sort) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         tm.assert_index_equal(index.intersection(index, sort=sort), index) | ||||
|  | ||||
|         # GH 26225: nested intervals | ||||
|         index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) | ||||
|         other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 26225 | ||||
|         index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) | ||||
|         other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(0, 2)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 26225: duplicate nan element | ||||
|         index = IntervalIndex([np.nan, np.nan]) | ||||
|         other = IntervalIndex([np.nan]) | ||||
|         expected = IntervalIndex([np.nan]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection_empty_result(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         other = monotonic_index(300, 314, closed=closed) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, different numeric dtypes -> common dtype is float64 | ||||
|         other = monotonic_index(300, 314, dtype="float64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         expected = other[:0] | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         other = monotonic_index(300, 314, dtype="uint64", closed=closed) | ||||
|         result = index.intersection(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_intersection_duplicates(self): | ||||
|         # GH#38743 | ||||
|         index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) | ||||
|         other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) | ||||
|         expected = IntervalIndex.from_tuples([(1, 2), (2, 3)]) | ||||
|         result = index.intersection(other) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_difference(self, closed, sort): | ||||
|         index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) | ||||
|         result = index.difference(index[:1], sort=sort) | ||||
|         expected = index[1:] | ||||
|         if sort is None: | ||||
|             expected = expected.sort_values() | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         result = index.difference(index, sort=sort) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19101: empty result, different dtypes | ||||
|         other = IntervalIndex.from_arrays( | ||||
|             index.left.astype("float64"), index.right, closed=closed | ||||
|         ) | ||||
|         result = index.difference(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_symmetric_difference(self, closed, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         result = index[1:].symmetric_difference(index[:-1], sort=sort) | ||||
|         expected = IntervalIndex([index[0], index[-1]]) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         # GH 19101: empty result, same dtype | ||||
|         result = index.symmetric_difference(index, sort=sort) | ||||
|         expected = empty_index(dtype="int64", closed=closed) | ||||
|         if sort in (None, True): | ||||
|             tm.assert_index_equal(result, expected) | ||||
|         else: | ||||
|             tm.assert_index_equal(result.sort_values(), expected) | ||||
|  | ||||
|         # GH 19101: empty result, different dtypes | ||||
|         other = IntervalIndex.from_arrays( | ||||
|             index.left.astype("float64"), index.right, closed=closed | ||||
|         ) | ||||
|         result = index.symmetric_difference(other, sort=sort) | ||||
|         expected = empty_index(dtype="float64", closed=closed) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning") | ||||
|     @pytest.mark.parametrize( | ||||
|         "op_name", ["union", "intersection", "difference", "symmetric_difference"] | ||||
|     ) | ||||
|     def test_set_incompatible_types(self, closed, op_name, sort): | ||||
|         index = monotonic_index(0, 11, closed=closed) | ||||
|         set_op = getattr(index, op_name) | ||||
|  | ||||
|         # TODO: standardize return type of non-union setops type(self vs other) | ||||
|         # non-IntervalIndex | ||||
|         if op_name == "difference": | ||||
|             expected = index | ||||
|         else: | ||||
|             expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) | ||||
|         result = set_op(Index([1, 2, 3]), sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # mixed closed -> cast to object | ||||
|         for other_closed in {"right", "left", "both", "neither"} - {closed}: | ||||
|             other = monotonic_index(0, 11, closed=other_closed) | ||||
|             expected = getattr(index.astype(object), op_name)(other, sort=sort) | ||||
|             if op_name == "difference": | ||||
|                 expected = index | ||||
|             result = set_op(other, sort=sort) | ||||
|             tm.assert_index_equal(result, expected) | ||||
|  | ||||
|         # GH 19016: incompatible dtypes -> cast to object | ||||
|         other = interval_range(Timestamp("20180101"), periods=9, closed=closed) | ||||
|         expected = getattr(index.astype(object), op_name)(other, sort=sort) | ||||
|         if op_name == "difference": | ||||
|             expected = index | ||||
|         result = set_op(other, sort=sort) | ||||
|         tm.assert_index_equal(result, expected) | ||||
		Reference in New Issue
	
	Block a user