This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,78 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestIndexConstructor:
# Tests for the Index constructor, specifically for cases that do
# not return a subclass
@pytest.mark.parametrize("value", [1, np.int64(1)])
def test_constructor_corner(self, value):
# corner case
msg = (
r"Index\(\.\.\.\) must be called with a collection of some "
f"kind, {value} was passed"
)
with pytest.raises(TypeError, match=msg):
Index(value)
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
def test_construction_list_mixed_tuples(self, index_vals):
# see gh-10697: if we are constructing from a mixed list of tuples,
# make sure that we are independent of the sorting order.
index = Index(index_vals)
assert isinstance(index, Index)
assert not isinstance(index, MultiIndex)
def test_constructor_cast(self):
msg = "could not convert string to float"
with pytest.raises(ValueError, match=msg):
Index(["a", "b", "c"], dtype=float)
@pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
def test_construct_empty_tuples(self, tuple_list):
# GH #45608
result = Index(tuple_list)
expected = MultiIndex.from_tuples(tuple_list)
tm.assert_index_equal(result, expected)
def test_index_string_inference(self):
# GH#54430
expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
with pd.option_context("future.infer_string", True):
ser = Index(["a", "b"])
tm.assert_index_equal(ser, expected)
expected = Index(["a", 1], dtype="object")
with pd.option_context("future.infer_string", True):
ser = Index(["a", 1])
tm.assert_index_equal(ser, expected)
def test_inference_on_pandas_objects(self):
# GH#56012
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
result = Index(idx)
assert result.dtype != np.object_
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
result = Index(ser)
assert result.dtype != np.object_
def test_constructor_not_read_only(self):
# GH#57130
ser = Series([1, 2], dtype=object)
with pd.option_context("mode.copy_on_write", True):
idx = Index(ser)
assert idx._values.flags.writeable

View File

@ -0,0 +1,163 @@
import numpy as np
import pytest
from pandas._config import using_string_dtype
import pandas._config.config as cf
from pandas import Index
import pandas._testing as tm
class TestIndexRendering:
def test_repr_is_valid_construction_code(self):
# for the case of Index, where the repr is traditional rather than
# stylized
idx = Index(["a", "b"])
res = eval(repr(idx))
tm.assert_index_equal(res, idx)
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
# ASCII
# short
(
Index(["a", "bb", "ccc"]),
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
),
# multiple lines
(
Index(["a", "bb", "ccc"] * 10),
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
" dtype='object')",
),
# truncated
(
Index(["a", "bb", "ccc"] * 100),
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
" ...\n"
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
" dtype='object', length=300)",
),
# Non-ASCII
# short
(
Index(["", "いい", "ううう"]),
"""Index(['', 'いい', 'ううう'], dtype='object')""",
),
# multiple lines
(
Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう'],\n"
" dtype='object')"
),
),
# truncated
(
Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr(self, index, expected):
result = repr(index)
assert result == expected
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
# short
(
Index(["", "いい", "ううう"]),
("Index(['', 'いい', 'ううう'], dtype='object')"),
),
# multiple lines
(
Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう'],\n"
" dtype='object')"
""
),
),
# truncated
(
Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', "
"'いい', 'ううう', '', 'いい',\n"
" 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr_with_unicode_option(self, index, expected):
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
result = repr(index)
assert result == expected
def test_repr_summary(self):
with cf.option_context("display.max_seq_items", 10):
result = repr(Index(np.arange(1000)))
assert len(result) < 200
assert "..." in result
def test_summary_bug(self):
# GH#3869
ind = Index(["{other}%s", "~:{range}:0"], name="A")
result = ind._summary()
# shouldn't be formatted accidentally.
assert "~:{range}:0" in result
assert "{other}%s" in result
def test_index_repr_bool_nan(self):
# GH32146
arr = Index([True, False, np.nan], dtype=object)
msg = "Index.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
exp1 = arr.format()
out1 = ["True", "False", "NaN"]
assert out1 == exp1
exp2 = repr(arr)
out2 = "Index([True, False, nan], dtype='object')"
assert out2 == exp2
def test_format_different_scalar_lengths(self):
# GH#35439
idx = Index(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
msg = r"Index\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected

View File

@ -0,0 +1,104 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
Index,
NaT,
)
import pandas._testing as tm
class TestGetSliceBounds:
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
def test_get_slice_bounds_within(self, side, expected):
index = Index(list("abcdef"))
result = index.get_slice_bound("e", side=side)
assert result == expected
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize(
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
)
def test_get_slice_bounds_outside(self, side, expected, data, bound):
index = Index(data)
result = index.get_slice_bound(bound, side=side)
assert result == expected
def test_get_slice_bounds_invalid_side(self):
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
Index([]).get_slice_bound("a", side="middle")
class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_dtype_mismatch(self):
# GH#25459
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
@pytest.mark.parametrize(
"idx_values,idx_non_unique",
[
([np.nan, 100, 200, 100], [np.nan, 100]),
([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
],
)
def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
indexes, missing = Index(idx_values).get_indexer_non_unique(
Index(idx_non_unique)
)
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
class TestGetLoc:
@pytest.mark.slow # to_flat_index takes a while
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
# Go through the libindex path for which using
# _bin_search vs ndarray.searchsorted makes a difference
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
lev = list("ABCD")
dti = pd.date_range("2016-01-01", periods=10)
mi = pd.MultiIndex.from_product([lev, range(5), dti])
oidx = mi.to_flat_index()
loc = len(oidx) // 2
tup = oidx[loc]
res = oidx.get_loc(tup)
assert res == loc
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
# case that goes through _maybe_get_bool_indexer
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
# we dont raise KeyError on nan
res = idx.get_loc(np.nan)
assert res == 1
# we only match on None, not on np.nan
res = idx.get_loc(None)
expected = np.array([False, False, True, False, False, True])
tm.assert_numpy_array_equal(res, expected)
# we don't match at all on mismatched NA
with pytest.raises(KeyError, match="NaT"):
idx.get_loc(NaT)
def test_getitem_boolean_ea_indexer():
# GH#45806
ser = pd.Series([True, False, pd.NA], dtype="boolean")
result = ser.index[ser]
expected = Index([0])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,11 @@
from pandas import Index
import pandas._testing as tm
def test_pickle_preserves_object_dtype():
# GH#43188, GH#43155 don't infer numeric dtype
index = Index([1, 2, 3], dtype=object)
result = tm.round_trip_pickle(index)
assert result.dtype == object
tm.assert_index_equal(index, result)

View File

@ -0,0 +1,97 @@
"""
Tests for ndarray-like method on the base Index class
"""
import numpy as np
import pytest
import pandas as pd
from pandas import Index
import pandas._testing as tm
class TestReshape:
def test_repeat(self):
repeats = 2
index = Index([1, 2, 3])
expected = Index([1, 1, 2, 2, 3, 3])
result = index.repeat(repeats)
tm.assert_index_equal(result, expected)
def test_insert(self):
# GH 7256
# validate neg/pos inserts
result = Index(["b", "c", "d"])
# test 0th element
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
# test Nth element that follows Python list behavior
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
# test loc +/- neq (0, -1)
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
# test empty
null_index = Index([])
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
def test_insert_missing(self, request, nulls_fixture, using_infer_string):
if using_infer_string and nulls_fixture is pd.NA:
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
# GH#22295
# test there is no mangling of NA values
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
result = Index(list("abc"), dtype=object).insert(
1, Index([nulls_fixture], dtype=object)
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
)
@pytest.mark.parametrize("loc", [-1, 2])
def test_insert_datetime_into_object(self, loc, val):
# GH#44509
idx = Index(["1", "2", "3"])
result = idx.insert(loc, val)
expected = Index(["1", "2", val, "3"])
tm.assert_index_equal(result, expected)
assert type(expected[2]) is type(val)
def test_insert_none_into_string_numpy(self, string_dtype_no_object):
# GH#55365
index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
result = index.insert(-1, None)
expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"pos,expected",
[
(0, Index(["b", "c", "d"], name="index")),
(-1, Index(["a", "b", "c"], name="index")),
],
)
def test_delete(self, pos, expected):
index = Index(["a", "b", "c", "d"], name="index")
result = index.delete(pos)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_delete_raises(self):
index = Index(["a", "b", "c", "d"], name="index")
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(IndexError, match=msg):
index.delete(5)
def test_append_multiple(self):
index = Index(["a", "b", "c", "d", "e", "f"])
foos = [index[:2], index[2:4], index[4:]]
result = foos[0].append(foos[1:])
tm.assert_index_equal(result, index)
# empty
result = index.append([])
tm.assert_index_equal(result, index)

View File

@ -0,0 +1,266 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.core.algorithms import safe_sort
def equal_contents(arr1, arr2) -> bool:
"""
Checks if the set of unique elements of arr1 and arr2 are equivalent.
"""
return frozenset(arr1) == frozenset(arr2)
class TestIndexSetOps:
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_sort_validation(self, method):
idx1 = Index(["a", "b"])
idx2 = Index(["b", "c"])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=2)
# sort=True is supported as of GH#??
getattr(idx1, method)(idx2, sort=True)
def test_setops_preserve_object_dtype(self):
idx = Index([1, 2, 3], dtype=object)
result = idx.intersection(idx[1:])
expected = idx[1:]
tm.assert_index_equal(result, expected)
# if other is not monotonic increasing, intersection goes through
# a different route
result = idx.intersection(idx[1:][::-1])
tm.assert_index_equal(result, expected)
result = idx._union(idx[1:], sort=None)
expected = idx
tm.assert_numpy_array_equal(result, expected.values)
result = idx.union(idx[1:], sort=None)
tm.assert_index_equal(result, expected)
# if other is not monotonic increasing, _union goes through
# a different route
result = idx._union(idx[1:][::-1], sort=None)
tm.assert_numpy_array_equal(result, expected.values)
result = idx.union(idx[1:][::-1], sort=None)
tm.assert_index_equal(result, expected)
def test_union_base(self):
index = Index([0, "a", 1, "b", 2, "c"])
first = index[3:]
second = index[:5]
result = first.union(second)
expected = Index([0, 1, 2, "a", "b", "c"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
def test_union_different_type_base(self, klass):
# GH 10149
index = Index([0, "a", 1, "b", 2, "c"])
first = index[3:]
second = index[:5]
result = first.union(klass(second.values))
assert equal_contents(result, index)
def test_union_sort_other_incomparable(self):
# https://github.com/pandas-dev/pandas/issues/24959
idx = Index([1, pd.Timestamp("2000")])
# default (sort=None)
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1], sort=None)
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
def test_union_sort_other_incomparable_true(self):
idx = Index([1, pd.Timestamp("2000")])
with pytest.raises(TypeError, match=".*"):
idx.union(idx[:1], sort=True)
def test_intersection_equal_sort_true(self):
idx = Index(["c", "a", "b"])
sorted_ = Index(["a", "b", "c"])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
def test_intersection_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:5]
second = index[:3]
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
result = first.intersection(second, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
def test_intersection_different_type_base(self, klass, sort):
# GH 10149
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:5]
second = index[:3]
result = first.intersection(klass(second.values), sort=sort)
assert equal_contents(result, second)
def test_intersection_nosort(self):
result = Index(["c", "b", "a"]).intersection(["b", "a"])
expected = Index(["b", "a"])
tm.assert_index_equal(result, expected)
def test_intersection_equal_sort(self):
idx = Index(["c", "a", "b"])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
def test_intersection_str_dates(self, sort):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
i1 = Index(dt_dates, dtype=object)
i2 = Index(["aa"], dtype=object)
result = i2.intersection(i1, sort=sort)
assert len(result) == 0
@pytest.mark.parametrize(
"index2,expected_arr",
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
)
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr)
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:4]
second = index[3:]
result = first.difference(second, sort)
expected = Index([0, "a", 1])
if sort is None:
expected = Index(safe_sort(expected))
tm.assert_index_equal(result, expected)
def test_symmetric_difference(self):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
first = index[:4]
second = index[3:]
result = first.symmetric_difference(second)
expected = Index([0, 1, 2, "a", "c"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"method,expected,sort",
[
(
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "S1")],
),
None,
),
],
)
def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "S1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "S1")],
)
)
result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1
expected = Index(expected)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("first_list", [["b", "a"], []])
@pytest.mark.parametrize("second_list", [["a", "b"], []])
@pytest.mark.parametrize(
"first_name, second_name, expected_name",
[("A", "B", None), (None, "B", None), ("A", None, None)],
)
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)
vals = set(first_list).union(second_list)
if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
tm.assert_index_equal(union.sort_values(), expected.sort_values())
@pytest.mark.parametrize(
"diff_type, expected",
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
)
def test_difference_object_type(self, diff_type, expected):
# GH 13432
idx1 = Index([0, 1, "A", "B"])
idx2 = Index([0, 2, "A", "C"])
result = getattr(idx1, diff_type)(idx2)
expected = Index(expected)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,13 @@
import numpy as np
from pandas import Index
import pandas._testing as tm
class TestWhere:
def test_where_intlike_str_doesnt_cast_ints(self):
idx = Index(range(3))
mask = np.array([True, False, True])
res = idx.where(mask, "2")
expected = Index([0, "2", 2])
tm.assert_index_equal(res, expected)

View File

@ -0,0 +1,62 @@
import pytest
from pandas import (
CategoricalIndex,
Index,
)
import pandas._testing as tm
class TestAppend:
@pytest.fixture
def ci(self):
categories = list("cab")
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
def test_append(self, ci):
# append cats with the same categories
result = ci[:3].append(ci[3:])
tm.assert_index_equal(result, ci, exact=True)
foos = [ci[:1], ci[1:3], ci[3:]]
result = foos[0].append(foos[1:])
tm.assert_index_equal(result, ci, exact=True)
def test_append_empty(self, ci):
# empty
result = ci.append([])
tm.assert_index_equal(result, ci, exact=True)
def test_append_mismatched_categories(self, ci):
# appending with different categories or reordered is not ok
msg = "all inputs must be Index"
with pytest.raises(TypeError, match=msg):
ci.append(ci.values.set_categories(list("abcd")))
with pytest.raises(TypeError, match=msg):
ci.append(ci.values.reorder_categories(list("abc")))
def test_append_category_objects(self, ci):
# with objects
result = ci.append(Index(["c", "a"]))
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
tm.assert_index_equal(result, expected, exact=True)
def test_append_non_categories(self, ci):
# invalid objects -> cast to object via concat_compat
result = ci.append(Index(["a", "d"]))
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
tm.assert_index_equal(result, expected, exact=True)
def test_append_object(self, ci):
# GH#14298 - if base object is not categorical -> coerce to object
result = Index(["c", "a"]).append(ci)
expected = Index(list("caaabbca"))
tm.assert_index_equal(result, expected, exact=True)
def test_append_to_another(self):
# hits Index._concat
fst = Index(["a", "b"])
snd = CategoricalIndex(["d", "e"])
result = fst.append(snd)
expected = Index(["a", "b", "d", "e"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,90 @@
from datetime import date
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
IntervalIndex,
)
import pandas._testing as tm
class TestAstype:
def test_astype(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
result = ci.astype(object)
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
# this IS equal, but not the same class
assert result.equals(ci)
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)
# interval
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
ci = CategoricalIndex(
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
)
result = ci.astype("interval")
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
tm.assert_index_equal(result, expected)
result = IntervalIndex(result.values)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("index_ordered", [True, False])
def test_astype_category(self, name, dtype_ordered, index_ordered):
# GH#18630
index = CategoricalIndex(
list("aabbca"), categories=list("cab"), ordered=index_ordered
)
if name:
index = index.rename(name)
# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(
index.tolist(),
name=name,
categories=index.categories,
ordered=dtype_ordered,
)
tm.assert_index_equal(result, expected)
# non-standard categories
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
tm.assert_index_equal(result, expected)
if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = index.astype("category")
expected = index
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("box", [True, False])
def test_categorical_date_roundtrip(self, box):
# astype to categorical and back should preserve date objects
v = date.today()
obj = Index([v, v])
assert obj.dtype == object
if box:
obj = obj.array
cat = obj.astype("category")
rtrip = cat.astype(object)
assert rtrip.dtype == object
assert type(rtrip[0]) is date

View File

@ -0,0 +1,391 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas._libs.arrays import NDArrayBacked
import pandas as pd
from pandas import (
Categorical,
CategoricalDtype,
)
import pandas._testing as tm
from pandas.core.indexes.api import (
CategoricalIndex,
Index,
)
class TestCategoricalIndex:
@pytest.fixture
def simple_index(self) -> CategoricalIndex:
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
def test_can_hold_identifiers(self):
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_insert(self, simple_index):
ci = simple_index
categories = ci.categories
# test 0th element
result = ci.insert(0, "a")
expected = CategoricalIndex(list("aaabbca"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# test Nth element that follows Python list behavior
result = ci.insert(-1, "a")
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# test empty
result = CategoricalIndex([], categories=categories).insert(0, "a")
expected = CategoricalIndex(["a"], categories=categories)
tm.assert_index_equal(result, expected, exact=True)
# invalid -> cast to object
expected = ci.astype(object).insert(0, "d")
result = ci.insert(0, "d").astype(object)
tm.assert_index_equal(result, expected, exact=True)
# GH 18295 (test missing)
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
for na in (np.nan, pd.NaT, None):
result = CategoricalIndex(list("aabcb")).insert(1, na)
tm.assert_index_equal(result, expected)
def test_insert_na_mismatched_dtype(self):
ci = CategoricalIndex([0, 1, 1])
result = ci.insert(0, pd.NaT)
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_delete(self, simple_index):
ci = simple_index
categories = ci.categories
result = ci.delete(0)
expected = CategoricalIndex(list("abbca"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
result = ci.delete(-1)
expected = CategoricalIndex(list("aabbc"), categories=categories)
tm.assert_index_equal(result, expected, exact=True)
with tm.external_error_raised((IndexError, ValueError)):
# Either depending on NumPy version
ci.delete(10)
@pytest.mark.parametrize(
"data, non_lexsorted_data",
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
)
def test_is_monotonic(self, data, non_lexsorted_data):
c = CategoricalIndex(data)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(data, ordered=True)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(data, categories=reversed(data))
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is True
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is True
# test when data is neither monotonic increasing nor decreasing
reordered_data = [data[0], data[2], data[1]]
c = CategoricalIndex(reordered_data, categories=reversed(data))
assert c.is_monotonic_increasing is False
assert c.is_monotonic_decreasing is False
# non lexsorted categories
categories = non_lexsorted_data
c = CategoricalIndex(categories[:2], categories=categories)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
c = CategoricalIndex(categories[1:3], categories=categories)
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False
def test_has_duplicates(self):
idx = CategoricalIndex([0, 0, 0], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
assert idx.is_unique is False
assert idx.has_duplicates is True
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
assert idx.is_unique is True
assert idx.has_duplicates is False
@pytest.mark.parametrize(
"data, categories, expected",
[
(
[1, 1, 1],
[1, 2, 3],
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[1, 1, 1],
list("abc"),
{
"first": np.array([False, True, True]),
"last": np.array([True, True, False]),
False: np.array([True, True, True]),
},
),
(
[2, "a", "b"],
list("abc"),
{
"first": np.zeros(shape=(3), dtype=np.bool_),
"last": np.zeros(shape=(3), dtype=np.bool_),
False: np.zeros(shape=(3), dtype=np.bool_),
},
),
(
list("abb"),
list("abc"),
{
"first": np.array([False, False, True]),
"last": np.array([False, True, False]),
False: np.array([False, True, True]),
},
),
],
)
def test_drop_duplicates(self, data, categories, expected):
idx = CategoricalIndex(data, categories=categories, name="foo")
for keep, e in expected.items():
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
e = idx[~e]
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, e)
@pytest.mark.parametrize(
"data, categories, expected_data",
[
([1, 1, 1], [1, 2, 3], [1]),
([1, 1, 1], list("abc"), [np.nan]),
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
],
)
def test_unique(self, data, categories, expected_data, ordered):
dtype = CategoricalDtype(categories, ordered=ordered)
idx = CategoricalIndex(data, dtype=dtype)
expected = CategoricalIndex(expected_data, dtype=dtype)
tm.assert_index_equal(idx.unique(), expected)
def test_repr_roundtrip(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
str(ci)
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
# formatting
str(ci)
# long format
# this is not reprable
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
str(ci)
def test_isin(self):
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
tm.assert_numpy_array_equal(
ci.isin(["c"]), np.array([False, False, False, True, False, False])
)
tm.assert_numpy_array_equal(
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
)
tm.assert_numpy_array_equal(
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
)
# mismatched categorical -> coerced to ndarray so doesn't matter
result = ci.isin(ci.set_categories(list("abcdefghi")))
expected = np.array([True] * 6)
tm.assert_numpy_array_equal(result, expected)
result = ci.isin(ci.set_categories(list("defghi")))
expected = np.array([False] * 5 + [True])
tm.assert_numpy_array_equal(result, expected)
def test_isin_overlapping_intervals(self):
# GH 34974
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
result = CategoricalIndex(idx).isin(idx)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
def test_identical(self):
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
assert ci1.identical(ci1)
assert ci1.identical(ci1.copy())
assert not ci1.identical(ci2)
def test_ensure_copied_data(self):
# gh-12309: Check the "copy" argument of each
# Index.__new__ is honored.
#
# Must be tested separately from other indexes because
# self.values is not an ndarray.
index = CategoricalIndex(list("ab") * 5)
result = CategoricalIndex(index.values, copy=True)
tm.assert_index_equal(index, result)
assert not np.shares_memory(result._data._codes, index._data._codes)
result = CategoricalIndex(index.values, copy=False)
assert result._data._codes is index._data._codes
class TestCategoricalIndex2:
def test_view_i8(self):
# GH#25464
ci = CategoricalIndex(list("ab") * 50)
msg = "When changing to a larger dtype, its size must be a divisor"
with pytest.raises(ValueError, match=msg):
ci.view("i8")
with pytest.raises(ValueError, match=msg):
ci._data.view("i8")
ci = ci[:-4] # length divisible by 8
res = ci.view("i8")
expected = ci._data.codes.view("i8")
tm.assert_numpy_array_equal(res, expected)
cat = ci._data
tm.assert_numpy_array_equal(cat.view("i8"), expected)
@pytest.mark.parametrize(
"dtype, engine_type",
[
(np.int8, libindex.Int8Engine),
(np.int16, libindex.Int16Engine),
(np.int32, libindex.Int32Engine),
(np.int64, libindex.Int64Engine),
],
)
def test_engine_type(self, dtype, engine_type):
if dtype != np.int64:
# num. of uniques required to push CategoricalIndex.codes to a
# dtype (128 categories required for .codes dtype to be int16 etc.)
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
ci = CategoricalIndex(range(num_uniques))
else:
# having 2**32 - 2**31 categories would be very memory-intensive,
# so we cheat a bit with the dtype
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
arr = ci.values._ndarray.astype("int64")
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
assert np.issubdtype(ci.codes.dtype, dtype)
assert isinstance(ci._engine, engine_type)
@pytest.mark.parametrize(
"func,op_name",
[
(lambda idx: idx - idx, "__sub__"),
(lambda idx: idx + idx, "__add__"),
(lambda idx: idx - ["a", "b"], "__sub__"),
(lambda idx: idx + ["a", "b"], "__add__"),
(lambda idx: ["a", "b"] - idx, "__rsub__"),
(lambda idx: ["a", "b"] + idx, "__radd__"),
],
)
def test_disallow_addsub_ops(self, func, op_name):
# GH 10039
# set ops (+/-) raise TypeError
idx = Index(Categorical(["a", "b"]))
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
msg = "|".join(
[
f"cannot perform {op_name} with this index type: CategoricalIndex",
"can only concatenate list",
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
]
)
with pytest.raises(TypeError, match=msg):
func(idx)
def test_method_delegation(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.set_categories(list("cab"))
tm.assert_index_equal(
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.rename_categories(list("efg"))
tm.assert_index_equal(
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
)
# GH18862 (let rename_categories take callables)
result = ci.rename_categories(lambda x: x.upper())
tm.assert_index_equal(
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.add_categories(["d"])
tm.assert_index_equal(
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
)
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
result = ci.remove_categories(["c"])
tm.assert_index_equal(
result,
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
)
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.as_unordered()
tm.assert_index_equal(result, ci)
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
result = ci.as_ordered()
tm.assert_index_equal(
result,
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
)
# invalid
msg = "cannot use inplace with CategoricalIndex"
with pytest.raises(ValueError, match=msg):
ci.set_categories(list("cab"), inplace=True)
def test_remove_maintains_order(self):
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
tm.assert_index_equal(
result,
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
)
result = result.remove_categories(["c"])
tm.assert_index_equal(
result,
CategoricalIndex(
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
),
)

View File

@ -0,0 +1,142 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
)
import pandas._testing as tm
class TestCategoricalIndexConstructors:
def test_construction_disallows_scalar(self):
msg = "must be called with a collection of some kind"
with pytest.raises(TypeError, match=msg):
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
with pytest.raises(TypeError, match=msg):
CategoricalIndex(categories=list("abcd"), ordered=False)
def test_construction(self):
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
categories = ci.categories
result = Index(ci)
tm.assert_index_equal(result, ci, exact=True)
assert not result.ordered
result = Index(ci.values)
tm.assert_index_equal(result, ci, exact=True)
assert not result.ordered
# empty
result = CategoricalIndex([], categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
assert not result.ordered
# passing categories
result = CategoricalIndex(list("aabbca"), categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
c = Categorical(list("aabbca"))
result = CategoricalIndex(c)
tm.assert_index_equal(result.categories, Index(list("abc")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(c, categories=categories)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
ci = CategoricalIndex(c, categories=list("abcd"))
result = CategoricalIndex(ci)
tm.assert_index_equal(result.categories, Index(categories))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(ci, categories=list("ab"))
tm.assert_index_equal(result.categories, Index(list("ab")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
)
assert not result.ordered
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
tm.assert_index_equal(result.categories, Index(list("ab")))
tm.assert_numpy_array_equal(
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
)
assert result.ordered
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
expected = CategoricalIndex(
ci, categories=list("ab"), ordered=True, dtype="category"
)
tm.assert_index_equal(result, expected, exact=True)
# turn me to an Index
result = Index(np.array(ci))
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)
def test_construction_with_dtype(self):
# specify dtype
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
result = Index(np.array(ci), dtype="category")
tm.assert_index_equal(result, ci, exact=True)
result = Index(np.array(ci).tolist(), dtype="category")
tm.assert_index_equal(result, ci, exact=True)
# these are generally only equal when the categories are reordered
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
tm.assert_index_equal(result, ci, exact=True)
# make sure indexes are handled
idx = Index(range(3))
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
result = CategoricalIndex(idx, categories=idx, ordered=True)
tm.assert_index_equal(result, expected, exact=True)
def test_construction_empty_with_bool_categories(self):
# see GH#22702
cat = CategoricalIndex([], categories=[True, False])
categories = sorted(cat.categories.tolist())
assert categories == [False, True]
def test_construction_with_categorical_dtype(self):
# construction with CategoricalDtype
# GH#18109
data, cats, ordered = "a a b b".split(), "c b a".split(), True
dtype = CategoricalDtype(categories=cats, ordered=ordered)
result = CategoricalIndex(data, dtype=dtype)
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
tm.assert_index_equal(result, expected, exact=True)
# GH#19032
result = Index(data, dtype=dtype)
tm.assert_index_equal(result, expected, exact=True)
# error when combining categories/ordered and dtype kwargs
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, categories=cats, dtype=dtype)
with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, ordered=ordered, dtype=dtype)

View File

@ -0,0 +1,96 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalIndex,
Index,
MultiIndex,
)
class TestEquals:
def test_equals_categorical(self):
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
assert ci1.equals(ci1)
assert not ci1.equals(ci2)
assert ci1.equals(ci1.astype(object))
assert ci1.astype(object).equals(ci1)
assert (ci1 == ci1).all()
assert not (ci1 != ci1).all()
assert not (ci1 > ci1).all()
assert not (ci1 < ci1).all()
assert (ci1 <= ci1).all()
assert (ci1 >= ci1).all()
assert not (ci1 == 1).all()
assert (ci1 == Index(["a", "b"])).all()
assert (ci1 == ci1.values).all()
# invalid comparisons
with pytest.raises(ValueError, match="Lengths must match"):
ci1 == Index(["a", "b", "c"])
msg = "Categoricals can only be compared if 'categories' are the same"
with pytest.raises(TypeError, match=msg):
ci1 == ci2
with pytest.raises(TypeError, match=msg):
ci1 == Categorical(ci1.values, ordered=False)
with pytest.raises(TypeError, match=msg):
ci1 == Categorical(ci1.values, categories=list("abc"))
# tests
# make sure that we are testing for category inclusion properly
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
assert not ci.equals(list("aabca"))
# Same categories, but different order
# Unordered
assert ci.equals(CategoricalIndex(list("aabca")))
# Ordered
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
assert ci.equals(ci.copy())
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
assert not ci.equals(list("aabca"))
assert not ci.equals(CategoricalIndex(list("aabca")))
assert ci.equals(ci.copy())
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
assert not ci.equals(list("aabca") + [np.nan])
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
assert ci.equals(ci.copy())
def test_equals_categorical_unordered(self):
# https://github.com/pandas-dev/pandas/issues/16603
a = CategoricalIndex(["A"], categories=["A", "B"])
b = CategoricalIndex(["A"], categories=["B", "A"])
c = CategoricalIndex(["C"], categories=["B", "A"])
assert a.equals(b)
assert not a.equals(c)
assert not b.equals(c)
def test_equals_non_category(self):
# GH#37667 Case where other contains a value not among ci's
# categories ("D") and also contains np.nan
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
other = Index(["A", "B", "D", np.nan])
assert not ci.equals(other)
def test_equals_multiindex(self):
# dont raise NotImplementedError when calling is_dtype_compat
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
ci = mi.to_flat_index().astype("category")
assert not ci.equals(mi)
def test_equals_string_dtype(self, any_string_dtype):
# GH#55364
idx = CategoricalIndex(list("abc"), name="B")
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
assert idx.equals(other)

View File

@ -0,0 +1,54 @@
import numpy as np
import pytest
from pandas import CategoricalIndex
import pandas._testing as tm
class TestFillNA:
def test_fillna_categorical(self):
# GH#11343
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
# fill by value in categories
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
tm.assert_index_equal(idx.fillna(1.0), exp)
cat = idx._data
# fill by value not in categories raises TypeError on EA, casts on CI
msg = "Cannot setitem on a Categorical with a new category"
with pytest.raises(TypeError, match=msg):
cat.fillna(2.0)
result = idx.fillna(2.0)
expected = idx.astype(object).fillna(2.0)
tm.assert_index_equal(result, expected)
def test_fillna_copies_with_no_nas(self):
# Nothing to fill, should still get a copy for the Categorical method,
# but OK to get a view on CategoricalIndex method
ci = CategoricalIndex([0, 1, 1])
result = ci.fillna(0)
assert result is not ci
assert tm.shares_memory(result, ci)
# But at the EA level we always get a copy.
cat = ci._data
result = cat.fillna(0)
assert result._ndarray is not cat._ndarray
assert result._ndarray.base is None
assert not tm.shares_memory(result, cat)
def test_fillna_validates_with_no_nas(self):
# We validate the fill value even if fillna is a no-op
ci = CategoricalIndex([2, 3, 3])
cat = ci._data
msg = "Cannot setitem on a Categorical with a new category"
res = ci.fillna(False)
# nothing to fill, so we dont cast
tm.assert_index_equal(res, ci)
# Same check directly on the Categorical
with pytest.raises(TypeError, match=msg):
cat.fillna(False)

View File

@ -0,0 +1,120 @@
"""
Tests for CategoricalIndex.__repr__ and related methods.
"""
import pytest
from pandas._config import using_string_dtype
import pandas._config.config as cf
from pandas import CategoricalIndex
import pandas._testing as tm
class TestCategoricalIndexRepr:
def test_format_different_scalar_lengths(self):
# GH#35439
idx = CategoricalIndex(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
msg = r"CategoricalIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
def test_string_categorical_index_repr(self):
# short
idx = CategoricalIndex(["a", "bb", "ccc"])
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
...
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("abcdefghijklmmo"))
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', 'm', 'o'],
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# short
idx = CategoricalIndex(["", "いい", "ううう"])
expected = """CategoricalIndex(['', 'いい', 'ううう'], categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["", "いい", "ううう"] * 10)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '',
'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["", "いい", "ううう"] * 100)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '',
...
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
expected = """CategoricalIndex(['', '', '', '', '', '', '', '', '', '', '', '',
'', '', ''],
categories=['', '', '', '', ..., '', '', '', ''], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
# short
idx = CategoricalIndex(["", "いい", "ううう"])
expected = """CategoricalIndex(['', 'いい', 'ううう'], categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# multiple lines
idx = CategoricalIndex(["", "いい", "ううう"] * 10)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう',
'', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected
# truncated
idx = CategoricalIndex(["", "いい", "ううう"] * 100)
expected = """CategoricalIndex(['', 'いい', 'ううう', '', 'いい', 'ううう', '', 'いい',
'ううう', '',
...
'ううう', '', 'いい', 'ううう', '', 'いい', 'ううう',
'', 'いい', 'ううう'],
categories=['', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
assert repr(idx) == expected
# larger categories
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
expected = """CategoricalIndex(['', '', '', '', '', '', '', '', '', '',
'', '', '', '', ''],
categories=['', '', '', '', ..., '', '', '', ''], ordered=False, dtype='category')""" # noqa: E501
assert repr(idx) == expected

View File

@ -0,0 +1,420 @@
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
IntervalIndex,
Timestamp,
)
import pandas._testing as tm
class TestTake:
def test_take_fill_value(self):
# GH 12631
# numeric category
idx = CategoricalIndex([1, 2, 3], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = CategoricalIndex([2, 1, 3], name="xxx")
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# object category
idx = CategoricalIndex(
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
)
result = idx.take(np.array([1, 0, -1]))
expected = CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = CategoricalIndex(
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = CategoricalIndex(
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
)
tm.assert_index_equal(result, expected)
tm.assert_categorical_equal(result.values, expected.values)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_fill_value_datetime(self):
# datetime category
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
idx = CategoricalIndex(idx)
result = idx.take(np.array([1, 0, -1]))
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = CategoricalIndex(expected)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
expected = CategoricalIndex(expected, categories=exp_cats)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
)
expected = CategoricalIndex(expected)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_invalid_kwargs(self):
idx = CategoricalIndex([1, 2, 3], name="foo")
indices = [1, 0, -1]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
class TestGetLoc:
def test_get_loc(self):
# GH 12531
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
idx1 = Index(list("abcde"))
assert cidx1.get_loc("a") == idx1.get_loc("a")
assert cidx1.get_loc("e") == idx1.get_loc("e")
for i in [cidx1, idx1]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")
# non-unique
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
idx2 = Index(list("aacded"))
# results in bool array
res = cidx2.get_loc("d")
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
tm.assert_numpy_array_equal(
res, np.array([False, False, False, True, False, True])
)
# unique element results in scalar
res = cidx2.get_loc("e")
assert res == idx2.get_loc("e")
assert res == 4
for i in [cidx2, idx2]:
with pytest.raises(KeyError, match="'NOT-EXIST'"):
i.get_loc("NOT-EXIST")
# non-unique, sliceable
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
idx3 = Index(list("aabbb"))
# results in slice
res = cidx3.get_loc("a")
assert res == idx3.get_loc("a")
assert res == slice(0, 2, None)
res = cidx3.get_loc("b")
assert res == idx3.get_loc("b")
assert res == slice(2, 5, None)
for i in [cidx3, idx3]:
with pytest.raises(KeyError, match="'c'"):
i.get_loc("c")
def test_get_loc_unique(self):
cidx = CategoricalIndex(list("abc"))
result = cidx.get_loc("b")
assert result == 1
def test_get_loc_monotonic_nonunique(self):
cidx = CategoricalIndex(list("abbc"))
result = cidx.get_loc("b")
expected = slice(1, 3, None)
assert result == expected
def test_get_loc_nonmonotonic_nonunique(self):
cidx = CategoricalIndex(list("abcb"))
result = cidx.get_loc("b")
expected = np.array([False, True, False, True], dtype=bool)
tm.assert_numpy_array_equal(result, expected)
def test_get_loc_nan(self):
# GH#41933
ci = CategoricalIndex(["A", "B", np.nan])
res = ci.get_loc(np.nan)
assert res == 2
class TestGetIndexer:
def test_get_indexer_base(self):
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
expected = np.arange(len(idx), dtype=np.intp)
actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)
with pytest.raises(ValueError, match="Invalid fill method"):
idx.get_indexer(idx, method="invalid")
def test_get_indexer_requires_unique(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
oidx = Index(np.array(ci))
msg = "Reindexing only valid with uniquely valued Index objects"
for n in [1, 2, 5, len(ci)]:
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
with pytest.raises(InvalidIndexError, match=msg):
ci.get_indexer(finder)
# see gh-17323
#
# Even when indexer is equal to the
# members in the index, we should
# respect duplicates instead of taking
# the fast-track path.
for finder in [list("aabbca"), list("aababca")]:
with pytest.raises(InvalidIndexError, match=msg):
ci.get_indexer(finder)
def test_get_indexer_non_unique(self):
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
idx2 = CategoricalIndex(list("abf"))
for indexer in [idx2, list("abf"), Index(list("abf"))]:
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(indexer)
r1, _ = idx1.get_indexer_non_unique(indexer)
expected = np.array([0, 1, 2, -1], dtype=np.intp)
tm.assert_almost_equal(r1, expected)
def test_get_indexer_method(self):
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
idx2 = CategoricalIndex(list("abf"))
msg = "method pad not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="pad")
msg = "method backfill not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="backfill")
msg = "method nearest not yet implemented for CategoricalIndex"
with pytest.raises(NotImplementedError, match=msg):
idx2.get_indexer(idx1, method="nearest")
def test_get_indexer_array(self):
arr = np.array(
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
dtype=object,
)
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
result = ci.get_indexer(arr)
expected = np.array([0, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_same_order(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_same_categories_different_order(self):
# https://github.com/pandas-dev/pandas/issues/19551
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
expected = np.array([1, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_nans_in_index_and_target(self):
# GH 45361
ci = CategoricalIndex([1, 2, np.nan, 3])
other1 = [2, 3, 4, np.nan]
res1 = ci.get_indexer(other1)
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res1, expected1)
other2 = [1, 4, 2, 3]
res2 = ci.get_indexer(other2)
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(res2, expected2)
class TestWhere:
def test_where(self, listlike_box):
klass = listlike_box
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * (len(i) - 1)
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_where_non_categories(self):
ci = CategoricalIndex(["a", "b", "c", "d"])
mask = np.array([True, False, True, False])
result = ci.where(mask, 2)
expected = Index(["a", 2, "c", 2], dtype=object)
tm.assert_index_equal(result, expected)
msg = "Cannot setitem on a Categorical with a new category"
with pytest.raises(TypeError, match=msg):
# Test the Categorical method directly
ci._data._where(mask, 2)
class TestContains:
def test_contains(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
assert "a" in ci
assert "z" not in ci
assert "e" not in ci
assert np.nan not in ci
# assert codes NOT in index
assert 0 not in ci
assert 1 not in ci
def test_contains_nan(self):
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
assert np.nan in ci
@pytest.mark.parametrize("unwrap", [True, False])
def test_contains_na_dtype(self, unwrap):
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
pi = dti.to_period("D")
tdi = dti - dti[-1]
ci = CategoricalIndex(dti)
obj = ci
if unwrap:
obj = ci._data
assert np.nan in obj
assert None in obj
assert pd.NaT in obj
assert np.datetime64("NaT") in obj
assert np.timedelta64("NaT") not in obj
obj2 = CategoricalIndex(tdi)
if unwrap:
obj2 = obj2._data
assert np.nan in obj2
assert None in obj2
assert pd.NaT in obj2
assert np.datetime64("NaT") not in obj2
assert np.timedelta64("NaT") in obj2
obj3 = CategoricalIndex(pi)
if unwrap:
obj3 = obj3._data
assert np.nan in obj3
assert None in obj3
assert pd.NaT in obj3
assert np.datetime64("NaT") not in obj3
assert np.timedelta64("NaT") not in obj3
@pytest.mark.parametrize(
"item, expected",
[
(pd.Interval(0, 1), True),
(1.5, True),
(pd.Interval(0.5, 1.5), False),
("a", False),
(Timestamp(1), False),
(pd.Timedelta(1), False),
],
ids=str,
)
def test_contains_interval(self, item, expected):
# GH 23705
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
result = item in ci
assert result is expected
def test_contains_list(self):
# GH#21729
idx = CategoricalIndex([1, 2, 3])
assert "a" not in idx
with pytest.raises(TypeError, match="unhashable type"):
["a"] in idx
with pytest.raises(TypeError, match="unhashable type"):
["a", "b"] in idx

View File

@ -0,0 +1,144 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"data, categories",
[
(list("abcbca"), list("cab")),
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
],
ids=["string", "interval"],
)
def test_map_str(data, categories, ordered):
# GH 31202 - override base class since we want to maintain categorical/ordered
index = CategoricalIndex(data, categories=categories, ordered=ordered)
result = index.map(str)
expected = CategoricalIndex(
map(str, data), categories=map(str, categories), ordered=ordered
)
tm.assert_index_equal(result, expected)
def test_map():
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
result = ci.map(lambda x: x.lower())
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
tm.assert_index_equal(result, exp)
ci = CategoricalIndex(
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
)
result = ci.map(lambda x: x.lower())
exp = CategoricalIndex(
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
)
tm.assert_index_equal(result, exp)
# GH 12766: Return an index not an array
tm.assert_index_equal(
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
)
# change categories dtype
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
def f(x):
return {"A": 10, "B": 20, "C": 30}.get(x)
result = ci.map(f)
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
tm.assert_index_equal(result, exp)
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
tm.assert_index_equal(result, exp)
result = ci.map({"A": 10, "B": 20, "C": 30})
tm.assert_index_equal(result, exp)
def test_map_with_categorical_series():
# GH 12756
a = Index([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(b), exp)
exp = Index(["odd", "even", "odd", np.nan])
tm.assert_index_equal(a.map(c), exp)
@pytest.mark.parametrize(
("data", "f", "expected"),
(
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
(
[1, 1, np.nan],
Series([False, False]),
CategoricalIndex([False, False, np.nan]),
),
(
[1, 2, np.nan],
Series([False, False, False]),
Index([False, False, np.nan]),
),
),
)
def test_map_with_nan_ignore(data, f, expected): # GH 24241
values = CategoricalIndex(data)
result = values.map(f, na_action="ignore")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
("data", "f", "expected"),
(
([1, 1, np.nan], pd.isna, Index([False, False, True])),
([1, 2, np.nan], pd.isna, Index([False, False, True])),
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
(
[1, 1, np.nan],
Series([False, False]),
CategoricalIndex([False, False, np.nan]),
),
(
[1, 2, np.nan],
Series([False, False, False]),
Index([False, False, np.nan]),
),
),
)
def test_map_with_nan_none(data, f, expected): # GH 24241
values = CategoricalIndex(data)
result = values.map(f, na_action=None)
tm.assert_index_equal(result, expected)
def test_map_with_dict_or_series():
orig_values = ["a", "B", 1, "a"]
new_values = ["one", 2, 3.0, "one"]
cur_index = CategoricalIndex(orig_values, name="XXX")
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
mapper = Series(new_values[:-1], index=orig_values[:-1])
result = cur_index.map(mapper)
# Order of categories in result can be different
tm.assert_index_equal(result, expected)
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
result = cur_index.map(mapper)
# Order of categories in result can be different
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,78 @@
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalIndex,
Index,
Interval,
)
import pandas._testing as tm
class TestReindex:
def test_reindex_list_non_unique(self):
# GH#11586
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"])
with pytest.raises(ValueError, match=msg):
ci.reindex(["a", "c"])
def test_reindex_categorical_non_unique(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"])
with pytest.raises(ValueError, match=msg):
ci.reindex(Categorical(["a", "c"]))
def test_reindex_list_non_unique_unused_category(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with pytest.raises(ValueError, match=msg):
ci.reindex(["a", "c"])
def test_reindex_categorical_non_unique_unused_category(self):
msg = "cannot reindex on an axis with duplicate labels"
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
with pytest.raises(ValueError, match=msg):
ci.reindex(Categorical(["a", "c"]))
def test_reindex_duplicate_target(self):
# See GH25459
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
res, indexer = cat.reindex(["a", "c", "c"])
exp = Index(["a", "c", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
res, indexer = cat.reindex(
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
)
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
def test_reindex_empty_index(self):
# See GH16770
c = CategoricalIndex([])
res, indexer = c.reindex(["a", "b"])
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
def test_reindex_categorical_added_category(self):
# GH 42424
ci = CategoricalIndex(
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
ordered=True,
)
ci_add = CategoricalIndex(
[
Interval(0, 1, closed="right"),
Interval(1, 2, closed="right"),
Interval(2, 3, closed="right"),
Interval(3, 4, closed="right"),
],
ordered=True,
)
result, _ = ci.reindex(ci_add)
expected = ci_add
tm.assert_index_equal(expected, result)

View File

@ -0,0 +1,18 @@
import numpy as np
import pytest
from pandas import (
CategoricalIndex,
Index,
)
import pandas._testing as tm
@pytest.mark.parametrize("na_value", [None, np.nan])
def test_difference_with_na(na_value):
# GH 57318
ci = CategoricalIndex(["a", "b", "c", None])
other = Index(["c", na_value])
result = ci.difference(other)
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,41 @@
import numpy as np
import pytest
from pandas import (
Series,
array,
)
@pytest.fixture(params=[None, False])
def sort(request):
"""
Valid values for the 'sort' parameter used in the Index
setops methods (intersection, union, etc.)
Caution:
Don't confuse this one with the "sort" fixture used
for DataFrame.append or concat. That one has
parameters [True, False].
We can't combine them as sort=True is not permitted
in the Index setops methods.
"""
return request.param
@pytest.fixture(params=["D", "3D", "-3D", "h", "2h", "-2h", "min", "2min", "s", "-3s"])
def freq_sample(request):
"""
Valid values for 'freq' parameter used to create date_range and
timedelta_range..
"""
return request.param
@pytest.fixture(params=[list, tuple, np.array, array, Series])
def listlike_box(request):
"""
Types that may be passed as the indexer to searchsorted.
"""
return request.param

View File

@ -0,0 +1,89 @@
import numpy as np
import pytest
from pandas import (
PeriodIndex,
Series,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class DropDuplicates:
def test_drop_duplicates_metadata(self, idx):
# GH#10115
result = idx.drop_duplicates()
tm.assert_index_equal(idx, result)
assert idx.freq == result.freq
idx_dup = idx.append(idx)
result = idx_dup.drop_duplicates()
expected = idx
if not isinstance(idx, PeriodIndex):
# freq is reset except for PeriodIndex
assert idx_dup.freq is None
assert result.freq is None
expected = idx._with_freq(None)
else:
assert result.freq == expected.freq
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected, index",
[
(
"first",
np.concatenate(([False] * 10, [True] * 5)),
np.arange(0, 10, dtype=np.int64),
),
(
"last",
np.concatenate(([True] * 5, [False] * 10)),
np.arange(5, 15, dtype=np.int64),
),
(
False,
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
np.arange(5, 10, dtype=np.int64),
),
],
)
def test_drop_duplicates(self, keep, expected, index, idx):
# to check Index/Series compat
idx = idx.append(idx[:5])
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
expected = idx[~expected]
result = idx.drop_duplicates(keep=keep)
tm.assert_index_equal(result, expected)
result = Series(idx).drop_duplicates(keep=keep)
expected = Series(expected, index=index)
tm.assert_series_equal(result, expected)
class TestDropDuplicatesPeriodIndex(DropDuplicates):
@pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
def freq(self, request):
return request.param
@pytest.fixture
def idx(self, freq):
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
@pytest.fixture
def idx(self, freq_sample):
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
@pytest.fixture
def idx(self, freq_sample):
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")

View File

@ -0,0 +1,181 @@
"""
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
"""
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalIndex,
DatetimeIndex,
Index,
PeriodIndex,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class EqualsTests:
def test_not_equals_numeric(self, index):
assert not index.equals(Index(index.asi8))
assert not index.equals(Index(index.asi8.astype("u8")))
assert not index.equals(Index(index.asi8).astype("f8"))
def test_equals(self, index):
assert index.equals(index)
assert index.equals(index.astype(object))
assert index.equals(CategoricalIndex(index))
assert index.equals(CategoricalIndex(index.astype(object)))
def test_not_equals_non_arraylike(self, index):
assert not index.equals(list(index))
def test_not_equals_strings(self, index):
other = Index([str(x) for x in index], dtype=object)
assert not index.equals(other)
assert not index.equals(CategoricalIndex(other))
def test_not_equals_misc_strs(self, index):
other = Index(list("abc"))
assert not index.equals(other)
class TestPeriodIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return period_range("2013-01-01", periods=5, freq="D")
# TODO: de-duplicate with other test_equals2 methods
@pytest.mark.parametrize("freq", ["D", "M"])
def test_equals2(self, freq):
# GH#13107
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = PeriodIndex._simple_new(
idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
)
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
class TestDatetimeIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return date_range("2013-01-01", periods=5)
def test_equals2(self):
# GH#13107
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# same internal, different tz
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
assert not idx.equals(idx3)
assert not idx.equals(idx3.copy())
assert not idx.equals(idx3.astype(object))
assert not idx.astype(object).equals(idx3)
assert not idx.equals(list(idx3))
assert not idx.equals(pd.Series(idx3))
# check that we do not raise when comparing with OutOfBounds objects
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
assert not idx.equals(oob)
assert not idx2.equals(oob)
assert not idx3.equals(oob)
# check that we do not raise when comparing with OutOfBounds dt64
oob2 = oob.map(np.datetime64)
assert not idx.equals(oob2)
assert not idx2.equals(oob2)
assert not idx3.equals(oob2)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_not_equals_bday(self, freq):
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
assert not rng.equals(list(rng))
class TestTimedeltaIndexEquals(EqualsTests):
@pytest.fixture
def index(self):
return timedelta_range("1 day", periods=10)
def test_equals2(self):
# GH#13107
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.astype(object).equals(idx)
assert idx.astype(object).equals(idx.astype(object))
assert not idx.equals(list(idx))
assert not idx.equals(pd.Series(idx))
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
assert not idx.equals(idx2)
assert not idx.equals(idx2.copy())
assert not idx.equals(idx2.astype(object))
assert not idx.astype(object).equals(idx2)
assert not idx.astype(object).equals(idx2.astype(object))
assert not idx.equals(list(idx2))
assert not idx.equals(pd.Series(idx2))
# Check that we dont raise OverflowError on comparisons outside the
# implementation range GH#28532
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
assert not idx.equals(oob)
assert not idx2.equals(oob)
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
assert (oob == oob2).all()
assert not idx.equals(oob2)
assert not idx2.equals(oob2)
oob3 = oob.map(np.timedelta64)
assert (oob3 == oob).all()
assert not idx.equals(oob3)
assert not idx2.equals(oob3)

View File

@ -0,0 +1,45 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
)
import pandas._testing as tm
dtlike_dtypes = [
np.dtype("timedelta64[ns]"),
np.dtype("datetime64[ns]"),
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
pd.PeriodDtype("ns"),
]
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
def construct(dtype):
if dtype is dtlike_dtypes[-1]:
# PeriodArray will try to cast ints to strings
return DatetimeIndex(vals).astype(dtype)
return Index(vals, dtype=dtype)
left = construct(ldtype)
right = construct(rdtype)
result = left.get_indexer_non_unique(right)
if ldtype is rdtype:
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
ex2 = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(result[0], ex1)
tm.assert_numpy_array_equal(result[1], ex2)
else:
no_matches = np.array([-1] * 6, dtype=np.intp)
missing = np.arange(6, dtype=np.intp)
tm.assert_numpy_array_equal(result[0], no_matches)
tm.assert_numpy_array_equal(result[1], missing)

View File

@ -0,0 +1,46 @@
from pandas import (
Index,
NaT,
date_range,
)
def test_is_monotonic_with_nat():
# GH#31437
# PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
# in particular never be monotonic when we have NaT
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")
tdi = Index(dti.view("timedelta64[ns]"))
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert obj.is_monotonic_increasing
assert obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique
dti1 = dti.insert(0, NaT)
pi1 = dti1.to_period("D")
tdi1 = Index(dti1.view("timedelta64[ns]"))
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique
dti2 = dti.insert(3, NaT)
pi2 = dti2.to_period("h")
tdi2 = Index(dti2.view("timedelta64[ns]"))
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
if isinstance(obj, Index):
# i.e. not Engines
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_increasing
assert not obj.is_monotonic_decreasing
assert obj.is_unique

View File

@ -0,0 +1,53 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
NaT,
PeriodIndex,
TimedeltaIndex,
)
import pandas._testing as tm
class NATests:
def test_nat(self, index_without_na):
empty_index = index_without_na[:0]
index_with_na = index_without_na.copy(deep=True)
index_with_na._data[1] = NaT
assert empty_index._na_value is NaT
assert index_with_na._na_value is NaT
assert index_without_na._na_value is NaT
idx = index_without_na
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
assert idx.hasnans is False
idx = index_with_na
assert idx._can_hold_na
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
assert idx.hasnans is True
class TestDatetimeIndexNA(NATests):
@pytest.fixture
def index_without_na(self, tz_naive_fixture):
tz = tz_naive_fixture
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
class TestTimedeltaIndexNA(NATests):
@pytest.fixture
def index_without_na(self):
return TimedeltaIndex(["1 days", "2 days"])
class TestPeriodIndexNA(NATests):
@pytest.fixture
def index_without_na(self):
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")

View File

@ -0,0 +1,315 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Index,
NaT,
PeriodIndex,
TimedeltaIndex,
timedelta_range,
)
import pandas._testing as tm
def check_freq_ascending(ordered, orig, ascending):
"""
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
when the original index is generated (or generate-able) with
period_range/date_range/timedelta_range.
"""
if isinstance(ordered, PeriodIndex):
assert ordered.freq == orig.freq
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
if ascending:
assert ordered.freq.n == orig.freq.n
else:
assert ordered.freq.n == -1 * orig.freq.n
def check_freq_nonmonotonic(ordered, orig):
"""
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
when the original index is _not_ generated (or generate-able) with
period_range/date_range//timedelta_range.
"""
if isinstance(ordered, PeriodIndex):
assert ordered.freq == orig.freq
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
assert ordered.freq is None
class TestSortValues:
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
def non_monotonic_idx(self, request):
if request.param is DatetimeIndex:
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
elif request.param is PeriodIndex:
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
return dti.to_period("D")
else:
return TimedeltaIndex(
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
)
def test_argmin_argmax(self, non_monotonic_idx):
assert non_monotonic_idx.argmin() == 1
assert non_monotonic_idx.argmax() == 0
def test_sort_values(self, non_monotonic_idx):
idx = non_monotonic_idx
ordered = idx.sort_values()
assert ordered.is_monotonic_increasing
ordered = idx.sort_values(ascending=False)
assert ordered[::-1].is_monotonic_increasing
ordered, dexer = idx.sort_values(return_indexer=True)
assert ordered.is_monotonic_increasing
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
assert ordered[::-1].is_monotonic_increasing
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
def check_sort_values_with_freq(self, idx):
ordered = idx.sort_values()
tm.assert_index_equal(ordered, idx)
check_freq_ascending(ordered, idx, True)
ordered = idx.sort_values(ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
check_freq_ascending(ordered, idx, False)
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, idx)
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
check_freq_ascending(ordered, idx, True)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
expected = idx[::-1]
tm.assert_index_equal(ordered, expected)
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
check_freq_ascending(ordered, idx, False)
@pytest.mark.parametrize("freq", ["D", "h"])
def test_sort_values_with_freq_timedeltaindex(self, freq):
# GH#10295
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize(
"idx",
[
DatetimeIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
),
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
name="tzidx",
tz="Asia/Tokyo",
),
],
)
def test_sort_values_with_freq_datetimeindex(self, idx):
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
def test_sort_values_with_freq_periodindex(self, freq):
# here with_freq refers to being period_range-like
idx = PeriodIndex(
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
)
self.check_sort_values_with_freq(idx)
@pytest.mark.parametrize(
"idx",
[
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
Index([2011, 2012, 2013], name="idx"), # for compatibility check
],
)
def test_sort_values_with_freq_periodindex2(self, idx):
# here with_freq indicates this is period_range-like
self.check_sort_values_with_freq(idx)
def check_sort_values_without_freq(self, idx, expected):
ordered = idx.sort_values(na_position="first")
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
if not idx.isna().any():
ordered = idx.sort_values()
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
check_freq_nonmonotonic(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
if not idx.isna().any():
ordered, indexer = idx.sort_values(return_indexer=True)
tm.assert_index_equal(ordered, expected)
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, exp)
check_freq_nonmonotonic(ordered, idx)
def test_sort_values_without_freq_timedeltaindex(self):
# GH#10295
idx = TimedeltaIndex(
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
)
expected = TimedeltaIndex(
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
)
self.check_sort_values_without_freq(idx, expected)
@pytest.mark.parametrize(
"index_dates,expected_dates",
[
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
),
(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
),
],
)
def test_sort_values_without_freq_datetimeindex(
self, index_dates, expected_dates, tz_naive_fixture
):
tz = tz_naive_fixture
# without freq
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
self.check_sort_values_without_freq(idx, expected)
@pytest.mark.parametrize(
"idx,expected",
[
(
PeriodIndex(
[
"2011-01-01",
"2011-01-03",
"2011-01-05",
"2011-01-02",
"2011-01-01",
],
freq="D",
name="idx1",
),
PeriodIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-03",
"2011-01-05",
],
freq="D",
name="idx1",
),
),
(
PeriodIndex(
[
"2011-01-01",
"2011-01-03",
"2011-01-05",
"2011-01-02",
"2011-01-01",
],
freq="D",
name="idx2",
),
PeriodIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-03",
"2011-01-05",
],
freq="D",
name="idx2",
),
),
(
PeriodIndex(
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
freq="D",
name="idx3",
),
PeriodIndex(
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
freq="D",
name="idx3",
),
),
(
PeriodIndex(
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
),
PeriodIndex(
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
),
),
(
# For compatibility check
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
),
],
)
def test_sort_values_without_freq_periodindex(self, idx, expected):
# here without_freq means not generateable by period_range
self.check_sort_values_without_freq(idx, expected)
def test_sort_values_without_freq_periodindex_nat(self):
# doesn't quite fit into check_sort_values_without_freq
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
ordered = idx.sort_values(na_position="first")
tm.assert_index_equal(ordered, expected)
check_freq_nonmonotonic(ordered, idx)
ordered = idx.sort_values(ascending=False)
tm.assert_index_equal(ordered, expected[::-1])
check_freq_nonmonotonic(ordered, idx)
def test_order_stability_compat():
# GH#35922. sort_values is stable both for normal and datetime-like Index
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
tm.assert_numpy_array_equal(indexer1, indexer2)

View File

@ -0,0 +1,103 @@
import numpy as np
from pandas import (
DatetimeIndex,
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestValueCounts:
# GH#7735
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
tz = tz_naive_fixture
orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
self._check_value_counts_with_repeats(orig)
def test_value_counts_unique_timedeltaindex(self):
orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
self._check_value_counts_with_repeats(orig)
def test_value_counts_unique_periodindex(self):
orig = period_range("2011-01-01 09:00", freq="h", periods=10)
self._check_value_counts_with_repeats(orig)
def _check_value_counts_with_repeats(self, orig):
# create repeated values, 'n'th element is repeated by n+1 times
idx = type(orig)(
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
)
exp_idx = orig[::-1]
if not isinstance(exp_idx, PeriodIndex):
exp_idx = exp_idx._with_freq(None)
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
tm.assert_index_equal(idx.unique(), orig)
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
tz = tz_naive_fixture
idx = DatetimeIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
tz=tz,
)
self._check_value_counts_dropna(idx)
def test_value_counts_unique_timedeltaindex2(self):
idx = TimedeltaIndex(
[
"1 days 09:00:00",
"1 days 09:00:00",
"1 days 09:00:00",
"1 days 08:00:00",
"1 days 08:00:00",
NaT,
]
)
self._check_value_counts_dropna(idx)
def test_value_counts_unique_periodindex2(self):
idx = PeriodIndex(
[
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 09:00",
"2013-01-01 08:00",
"2013-01-01 08:00",
NaT,
],
freq="h",
)
self._check_value_counts_dropna(idx)
def _check_value_counts_dropna(self, idx):
exp_idx = idx[[2, 3]]
expected = Series([3, 2], index=exp_idx, name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(), expected)
exp_idx = idx[[2, 3, -1]]
expected = Series([3, 2, 1], index=exp_idx, name="count")
for obj in [idx, Series(idx)]:
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
tm.assert_index_equal(idx.unique(), exp_idx)

View File

@ -0,0 +1,30 @@
from datetime import timedelta
from pandas import (
Index,
Timestamp,
date_range,
isna,
)
class TestAsOf:
def test_asof_partial(self):
index = date_range("2010-01-01", periods=2, freq="ME")
expected = Timestamp("2010-02-28")
result = index.asof("2010-02")
assert result == expected
assert not isinstance(result, Index)
def test_asof(self):
index = date_range("2020-01-01", periods=10)
dt = index[0]
assert index.asof(dt) == dt
assert isna(index.asof(dt - timedelta(1)))
dt = index[-1]
assert index.asof(dt + timedelta(1)) == dt
dt = index[0].to_pydatetime()
assert isinstance(index.asof(dt), Timestamp)

View File

@ -0,0 +1,338 @@
from datetime import datetime
import dateutil
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
NaT,
PeriodIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndex:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_astype_asobject_around_dst_transition(self, tzstr):
# GH#1345
# dates around a dst transition
rng = date_range("2/13/2010", "5/6/2010", tz=tzstr)
objs = rng.astype(object)
for i, x in enumerate(objs):
exval = rng[i]
assert x == exval
assert x.tzinfo == exval.tzinfo
objs = rng.astype(object)
for i, x in enumerate(objs):
exval = rng[i]
assert x == exval
assert x.tzinfo == exval.tzinfo
def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
)
result = idx.astype(object)
expected = Index(
[Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
result = idx.astype(np.int64)
expected = Index(
[1463356800000000000] + [-9223372036854775808] * 3,
dtype=np.int64,
name="idx",
)
tm.assert_index_equal(result, expected)
def test_astype2(self):
rng = date_range("1/1/2000", periods=10, name="idx")
result = rng.astype("i8")
tm.assert_index_equal(result, Index(rng.asi8, name="idx"))
tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_uint(self):
arr = date_range("2000", periods=2, name="idx")
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
arr.astype("uint64")
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
arr.astype("uint32")
def test_astype_with_tz(self):
# with tz
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# deprecated
rng.astype("datetime64[ns]")
with pytest.raises(TypeError, match=msg):
# check DatetimeArray while we're here deprecated
rng._data.astype("datetime64[ns]")
def test_astype_tzaware_to_tzaware(self):
# GH 18951: tz-aware to tz-aware
idx = date_range("20170101", periods=4, tz="US/Pacific")
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
def test_astype_tznaive_to_tzaware(self):
# GH 18951: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
idx = idx._with_freq(None) # tz_localize does not preserve freq
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx.astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx._data.astype("datetime64[ns, US/Eastern]")
def test_astype_str_nat(self, using_infer_string):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
result = idx.astype(str)
if using_infer_string:
expected = Index(["2016-05-16", None, None, None], dtype="str")
else:
expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
tm.assert_index_equal(result, expected)
def test_astype_str(self):
# test astype string - #10442
dti = date_range("2012-01-01", periods=4, name="test_name")
result = dti.astype(str)
expected = Index(
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
name="test_name",
dtype="str",
)
tm.assert_index_equal(result, expected)
def test_astype_str_tz_and_name(self):
# test astype string with tz and name
dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern")
result = dti.astype(str)
expected = Index(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
name="test_name",
dtype="str",
)
tm.assert_index_equal(result, expected)
def test_astype_str_freq_and_name(self):
# test astype string with freqH and name
dti = date_range("1/1/2011", periods=3, freq="h", name="test_name")
result = dti.astype(str)
expected = Index(
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
name="test_name",
dtype="str",
)
tm.assert_index_equal(result, expected)
def test_astype_str_freq_and_tz(self):
# test astype string with freqH and timezone
dti = date_range(
"3/6/2012 00:00", periods=2, freq="h", tz="Europe/London", name="test_name"
)
result = dti.astype(str)
expected = Index(
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
dtype="str",
name="test_name",
)
tm.assert_index_equal(result, expected)
def test_astype_datetime64(self):
# GH 13149, GH 13209
idx = DatetimeIndex(
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
)
result = idx.astype("datetime64[ns]")
tm.assert_index_equal(result, idx)
assert result is not idx
result = idx.astype("datetime64[ns]", copy=False)
tm.assert_index_equal(result, idx)
assert result is idx
idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], tz="EST", name="idx")
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# dt64tz->dt64 deprecated
result = idx_tz.astype("datetime64[ns]")
def test_astype_object(self):
rng = date_range("1/1/2000", periods=20)
casted = rng.astype("O")
exp_values = list(rng)
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
assert casted.tolist() == exp_values
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_astype_object_tz(self, tz):
idx = date_range(start="2013-01-01", periods=4, freq="ME", name="idx", tz=tz)
expected_list = [
Timestamp("2013-01-31", tz=tz),
Timestamp("2013-02-28", tz=tz),
Timestamp("2013-03-31", tz=tz),
Timestamp("2013-04-30", tz=tz),
]
expected = Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype_object_with_nat(self):
idx = DatetimeIndex(
[datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)],
name="idx",
)
expected_list = [
Timestamp("2013-01-01"),
Timestamp("2013-01-02"),
NaT,
Timestamp("2013-01-04"),
]
expected = Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
@pytest.mark.parametrize(
"dtype",
[float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"],
)
def test_astype_raises(self, dtype):
# GH 13149, GH 13209
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
msg = "Cannot cast DatetimeIndex to dtype"
if dtype == "datetime64":
msg = "Casting to unit-less dtype 'datetime64' is not supported"
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_index_convert_to_datetime_array(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="US/Eastern")
rng_utc = date_range("20090415", "20090519", tz="utc")
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_explicit_pytz(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
rng_utc = date_range("20090415", "20090519", tz=pytz.utc)
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_dateutil(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc())
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
@pytest.mark.parametrize(
"tz, dtype",
[["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]],
)
def test_integer_index_astype_datetime(self, tz, dtype):
# GH 20997, 20964, 24559
val = [Timestamp("2018-01-01", tz=tz).as_unit("ns")._value]
result = Index(val, name="idx").astype(dtype)
expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx").as_unit("ns")
tm.assert_index_equal(result, expected)
def test_dti_astype_period(self):
idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx")
res = idx.astype("period[M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
tm.assert_index_equal(res, exp)
res = idx.astype("period[3M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
tm.assert_index_equal(res, exp)
class TestAstype:
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_category(self, tz):
obj = date_range("2000", periods=2, tz=tz, name="idx")
result = obj.astype("category")
dti = DatetimeIndex(["2000-01-01", "2000-01-02"], tz=tz).as_unit("ns")
expected = pd.CategoricalIndex(
dti,
name="idx",
)
tm.assert_index_equal(result, expected)
result = obj._data.astype("category")
expected = expected.values
tm.assert_categorical_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_array_fallback(self, tz):
obj = date_range("2000", periods=2, tz=tz, name="idx")
result = obj.astype(bool)
expected = Index(np.array([True, True]), name="idx")
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,141 @@
import pytest
from pandas import (
DatetimeIndex,
Series,
date_range,
)
import pandas._testing as tm
class TestDelete:
def test_delete(self, unit):
idx = date_range(
start="2000-01-01", periods=5, freq="ME", name="idx", unit=unit
)
# preserve freq
expected_0 = date_range(
start="2000-02-01", periods=4, freq="ME", name="idx", unit=unit
)
expected_4 = date_range(
start="2000-01-01", periods=4, freq="ME", name="idx", unit=unit
)
# reset freq to None
expected_1 = DatetimeIndex(
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
freq=None,
name="idx",
).as_unit(unit)
cases = {
0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError), match="out of bounds"):
# either depending on numpy version
idx.delete(5)
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
def test_delete2(self, tz):
idx = date_range(
start="2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz
)
expected = date_range(
start="2000-01-01 10:00", periods=9, freq="h", name="idx", tz=tz
)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "h"
assert result.tz == expected.tz
expected = date_range(
start="2000-01-01 09:00", periods=9, freq="h", name="idx", tz=tz
)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "h"
assert result.tz == expected.tz
def test_delete_slice(self, unit):
idx = date_range(
start="2000-01-01", periods=10, freq="D", name="idx", unit=unit
)
# preserve freq
expected_0_2 = date_range(
start="2000-01-04", periods=7, freq="D", name="idx", unit=unit
)
expected_7_9 = date_range(
start="2000-01-01", periods=7, freq="D", name="idx", unit=unit
)
# reset freq to None
expected_3_5 = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
],
freq=None,
name="idx",
).as_unit(unit)
cases = {
(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# TODO: belongs in Series.drop tests?
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
def test_delete_slice2(self, tz, unit):
dti = date_range(
"2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz, unit=unit
)
ts = Series(
1,
index=dti,
)
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = dti[5:]
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = dti[::2]._with_freq(None)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz

View File

@ -0,0 +1,125 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Index,
date_range,
factorize,
)
import pandas._testing as tm
class TestDatetimeIndexFactorize:
def test_factorize(self):
idx1 = DatetimeIndex(
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
# tz must be preserved
idx1 = idx1.tz_localize("Asia/Tokyo")
exp_idx = exp_idx.tz_localize("Asia/Tokyo")
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
idx2 = DatetimeIndex(
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
def test_factorize_preserves_freq(self):
# GH#38120 freq should be preserved
idx3 = date_range("2000-01", periods=4, freq="ME", tz="Asia/Tokyo")
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
arr, idx = idx3.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
assert idx.freq == idx3.freq
arr, idx = factorize(idx3)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
assert idx.freq == idx3.freq
def test_factorize_tz(self, tz_naive_fixture, index_or_series):
tz = tz_naive_fixture
# GH#13750
base = date_range("2016-11-05", freq="h", periods=100, tz=tz)
idx = base.repeat(5)
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
expected = base._with_freq(None)
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq
def test_factorize_dst(self, index_or_series):
# GH#13750
idx = date_range("2016-11-06", freq="h", periods=12, tz="US/Eastern")
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
if index_or_series is Index:
assert res.freq == idx.freq
idx = date_range("2016-06-13", freq="h", periods=12, tz="US/Eastern")
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
if index_or_series is Index:
assert res.freq == idx.freq
@pytest.mark.parametrize("sort", [True, False])
def test_factorize_no_freq_non_nano(self, tz_naive_fixture, sort):
# GH#51978 case that does not go through the fastpath based on
# non-None freq
tz = tz_naive_fixture
idx = date_range("2016-11-06", freq="h", periods=5, tz=tz)[[0, 4, 1, 3, 2]]
exp_codes, exp_uniques = idx.factorize(sort=sort)
res_codes, res_uniques = idx.as_unit("s").factorize(sort=sort)
tm.assert_numpy_array_equal(res_codes, exp_codes)
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))
res_codes, res_uniques = idx.as_unit("s").to_series().factorize(sort=sort)
tm.assert_numpy_array_equal(res_codes, exp_codes)
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))

View File

@ -0,0 +1,62 @@
import pytest
import pandas as pd
import pandas._testing as tm
class TestDatetimeIndexFillNA:
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
def test_fillna_datetime64(self, tz):
# GH 11343
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"]
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# tz mismatch
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00", tz=tz),
pd.Timestamp("2011-01-01 11:00"),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
# object
exp = pd.Index(
[pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz)
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# object
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
"x",
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)

View File

@ -0,0 +1,265 @@
from datetime import datetime
import numpy as np
import pytest
import pytz
from pandas import (
NA,
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestInsert:
@pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA])
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)
idx = DatetimeIndex(["2017-01-01"], tz=tz)
expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
if tz is not None and isinstance(null, np.datetime64):
expected = Index([null, idx[0]], dtype=object)
res = idx.insert(0, null)
tm.assert_index_equal(res, expected)
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_invalid_na(self, tz):
idx = DatetimeIndex(["2017-01-01"], tz=tz)
item = np.timedelta64("NaT")
result = idx.insert(0, item)
expected = Index([item] + list(idx), dtype=object)
tm.assert_index_equal(result, expected)
def test_insert_empty_preserves_freq(self, tz_naive_fixture):
# GH#33573
tz = tz_naive_fixture
dti = DatetimeIndex([], tz=tz, freq="D")
item = Timestamp("2017-04-05").tz_localize(tz)
result = dti.insert(0, item)
assert result.freq == dti.freq
# But not when we insert an item that doesn't conform to freq
dti = DatetimeIndex([], tz=tz, freq="W-THU")
result = dti.insert(0, item)
assert result.freq is None
def test_insert(self, unit):
idx = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-02"], name="idx"
).as_unit(unit)
result = idx.insert(2, datetime(2000, 1, 5))
exp = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
).as_unit(unit)
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[
datetime(2000, 1, 4),
"inserted",
datetime(2000, 1, 1),
datetime(2000, 1, 2),
],
name="idx",
)
assert not isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_insert2(self, unit):
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
# preserve freq
expected_0 = DatetimeIndex(
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq="ME",
).as_unit(unit)
expected_3 = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
name="idx",
freq="ME",
).as_unit(unit)
# reset freq to None
expected_1_nofreq = DatetimeIndex(
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq=None,
).as_unit(unit)
expected_3_nofreq = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
).as_unit(unit)
cases = [
(0, datetime(1999, 12, 31), expected_0),
(-3, datetime(1999, 12, 31), expected_0),
(3, datetime(2000, 4, 30), expected_3),
(1, datetime(2000, 1, 31), expected_1_nofreq),
(3, datetime(2000, 1, 2), expected_3_nofreq),
]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
def test_insert3(self, unit):
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
# reset freq to None
result = idx.insert(3, datetime(2000, 1, 2))
expected = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
).as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
def test_insert4(self, unit):
for tz in ["US/Pacific", "Asia/Singapore"]:
idx = date_range(
"1/1/2000 09:00", periods=6, freq="h", tz=tz, name="idx", unit=unit
)
# preserve freq
expected = date_range(
"1/1/2000 09:00", periods=7, freq="h", tz=tz, name="idx", unit=unit
)
for d in [
Timestamp("2000-01-01 15:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-01 14:00",
"2000-01-01 10:00",
],
name="idx",
tz=tz,
freq=None,
).as_unit(unit)
# reset freq to None
for d in [
Timestamp("2000-01-01 10:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.tz == expected.tz
assert result.freq is None
# TODO: also changes DataFrame.__setitem__ with expansion
def test_insert_mismatched_tzawareness(self):
# see GH#7299
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
# mismatched tz-awareness
item = Timestamp("2000-01-04")
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
# mismatched tz-awareness
item = datetime(2000, 1, 4)
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
# TODO: also changes DataFrame.__setitem__ with expansion
def test_insert_mismatched_tz(self):
# see GH#7299
# pre-2.0 with mismatched tzs we would cast to object
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
# mismatched tz -> cast to object (could reasonably cast to same tz or UTC)
item = Timestamp("2000-01-04", tz="US/Eastern")
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]),
name="idx",
)
assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)
item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]),
name="idx",
)
assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)]
)
def test_insert_mismatched_types_raises(self, tz_aware_fixture, item):
# GH#33703 dont cast these to dt64
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz)
result = dti.insert(1, item)
if isinstance(item, np.ndarray):
assert item.item() == 0
expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9)
else:
expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9)
tm.assert_index_equal(result, expected)
def test_insert_castable_str(self, tz_aware_fixture):
# GH#33703
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
value = "2019-11-05"
result = dti.insert(0, value)
ts = Timestamp(value).tz_localize(tz)
expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9)
tm.assert_index_equal(result, expected)
def test_insert_non_castable_str(self, tz_aware_fixture):
# GH#33703
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
value = "foo"
result = dti.insert(0, value)
expected = Index(["foo"] + list(dti), dtype=object, name=9)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,28 @@
from pandas import (
DataFrame,
DatetimeIndex,
date_range,
)
import pandas._testing as tm
def test_isocalendar_returns_correct_values_close_to_new_year_with_tz():
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
result = dates.isocalendar()
expected_data_frame = DataFrame(
[[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]],
columns=["year", "week", "day"],
index=dates,
dtype="UInt32",
)
tm.assert_frame_equal(result, expected_data_frame)
def test_dti_timestamp_isocalendar_fields():
idx = date_range("2020-01-01", periods=10)
expected = tuple(idx.isocalendar().iloc[-1].to_list())
result = idx[-1].isocalendar()
assert result == expected

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
DatetimeIndex,
Index,
MultiIndex,
Period,
date_range,
)
import pandas._testing as tm
class TestMap:
def test_map(self):
rng = date_range("1/1/2000", periods=10)
f = lambda x: x.strftime("%Y%m%d")
result = rng.map(f)
exp = Index([f(x) for x in rng])
tm.assert_index_equal(result, exp)
def test_map_fallthrough(self, capsys):
# GH#22067, check we don't get warnings about silently ignored errors
dti = date_range("2017-01-01", "2018-01-01", freq="B")
dti.map(lambda x: Period(year=x.year, month=x.month, freq="M"))
captured = capsys.readouterr()
assert captured.err == ""
def test_map_bug_1677(self):
index = DatetimeIndex(["2012-04-25 09:30:00.393000"])
f = index.asof
result = index.map(f)
expected = Index([f(index[0])])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("name", [None, "name"])
def test_index_map(self, name):
# see GH#20990
count = 6
index = date_range("2018-01-01", periods=count, freq="ME", name=name).map(
lambda x: (x.year, x.month)
)
exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name])
tm.assert_index_equal(index, exp_index)

View File

@ -0,0 +1,95 @@
from dateutil.tz import tzlocal
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DatetimeIndex,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestNormalize:
def test_normalize(self):
rng = date_range("1/1/2000 9:30", periods=10, freq="D")
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D")
tm.assert_index_equal(result, expected)
arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype(
"datetime64[ns]"
)
rng_ns = DatetimeIndex(arr_ns)
rng_ns_normalized = rng_ns.normalize()
arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype(
"datetime64[ns]"
)
expected = DatetimeIndex(arr_ns)
tm.assert_index_equal(rng_ns_normalized, expected)
assert result.is_normalized
assert not rng.is_normalized
def test_normalize_nat(self):
dti = DatetimeIndex([NaT, Timestamp("2018-01-01 01:00:00")])
result = dti.normalize()
expected = DatetimeIndex([NaT, Timestamp("2018-01-01")])
tm.assert_index_equal(result, expected)
def test_normalize_tz(self):
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern")
result = rng.normalize() # does not preserve freq
expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern")
tm.assert_index_equal(result, expected._with_freq(None))
assert result.is_normalized
assert not rng.is_normalized
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC")
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC")
tm.assert_index_equal(result, expected)
assert result.is_normalized
assert not rng.is_normalized
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
result = rng.normalize() # does not preserve freq
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
tm.assert_index_equal(result, expected._with_freq(None))
assert result.is_normalized
assert not rng.is_normalized
@td.skip_if_windows
@pytest.mark.parametrize(
"timezone",
[
"US/Pacific",
"US/Eastern",
"UTC",
"Asia/Kolkata",
"Asia/Shanghai",
"Australia/Canberra",
],
)
def test_normalize_tz_local(self, timezone):
# GH#13459
with tm.set_timezone(timezone):
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
expected = expected._with_freq(None)
tm.assert_index_equal(result, expected)
assert result.is_normalized
assert not rng.is_normalized

View File

@ -0,0 +1,83 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestRepeat:
def test_repeat_range(self, tz_naive_fixture):
rng = date_range("1/1/2000", "1/1/2001")
result = rng.repeat(5)
assert result.freq is None
assert len(result) == 5 * len(rng)
def test_repeat_range2(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat_range3(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="2D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat_range4(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz).as_unit(unit)
exp = DatetimeIndex(
[
"2001-01-01",
"2001-01-01",
"2001-01-01",
"NaT",
"NaT",
"NaT",
"2003-01-01",
"2003-01-01",
"2003-01-01",
],
tz=tz,
).as_unit(unit)
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
reps = 2
msg = "the 'axis' parameter is not supported"
rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz, unit=unit)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:30:00", tz=tz),
Timestamp("2016-01-01 00:30:00", tz=tz),
]
).as_unit(unit)
res = rng.repeat(reps)
tm.assert_index_equal(res, expected_rng)
assert res.freq is None
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
with pytest.raises(ValueError, match=msg):
np.repeat(rng, reps, axis=1)

View File

@ -0,0 +1,31 @@
from dateutil.tz import tzlocal
import pytest
from pandas.compat import IS64
from pandas import date_range
@pytest.mark.parametrize(
"freq,expected",
[
("YE", "day"),
("QE", "day"),
("ME", "day"),
("D", "day"),
("h", "hour"),
("min", "minute"),
("s", "second"),
("ms", "millisecond"),
("us", "microsecond"),
],
)
def test_dti_resolution(request, tz_naive_fixture, freq, expected):
tz = tz_naive_fixture
if freq == "YE" and not IS64 and isinstance(tz, tzlocal):
request.applymarker(
pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
)
idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
assert idx.resolution == expected

View File

@ -0,0 +1,221 @@
import pytest
from pandas._libs.tslibs import to_offset
from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
from pandas import (
DatetimeIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexRound:
def test_round_daily(self):
dti = date_range("20130101 09:10:11", periods=5)
result = dti.round("D")
expected = date_range("20130101", periods=5)
tm.assert_index_equal(result, expected)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
result = dti.round("D")
expected = date_range("20130101", periods=5).tz_localize("US/Eastern")
tm.assert_index_equal(result, expected)
result = dti.round("s")
tm.assert_index_equal(result, dti)
@pytest.mark.parametrize(
"freq, error_msg",
[
("YE", "<YearEnd: month=12> is a non-fixed frequency"),
("ME", "<MonthEnd> is a non-fixed frequency"),
("foobar", "Invalid frequency: foobar"),
],
)
def test_round_invalid(self, freq, error_msg):
dti = date_range("20130101 09:10:11", periods=5)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
with pytest.raises(ValueError, match=error_msg):
dti.round(freq)
def test_round(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz, unit=unit)
elt = rng[1]
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 01:00:00", tz=tz),
Timestamp("2016-01-01 02:00:00", tz=tz),
Timestamp("2016-01-01 02:00:00", tz=tz),
]
).as_unit(unit)
expected_elt = expected_rng[1]
result = rng.round(freq="h")
tm.assert_index_equal(result, expected_rng)
assert elt.round(freq="h") == expected_elt
msg = INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
rng.round(freq="foo")
with pytest.raises(ValueError, match=msg):
elt.round(freq="foo")
msg = "<MonthEnd> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
rng.round(freq="ME")
with pytest.raises(ValueError, match=msg):
elt.round(freq="ME")
def test_round2(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH#14440 & GH#15578
index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz).as_unit("ns")
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz).as_unit("ns")
tm.assert_index_equal(result, expected)
for freq in ["us", "ns"]:
tm.assert_index_equal(index, index.round(freq))
def test_round3(self, tz_naive_fixture):
tz = tz_naive_fixture
index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz).as_unit("ns")
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz).as_unit("ns")
tm.assert_index_equal(result, expected)
def test_round4(self, tz_naive_fixture):
index = DatetimeIndex(["2016-10-17 12:00:00.001501031"], dtype="M8[ns]")
result = index.round("10ns")
expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"], dtype="M8[ns]")
tm.assert_index_equal(result, expected)
ts = "2016-10-17 12:00:00.001501031"
dti = DatetimeIndex([ts], dtype="M8[ns]")
with tm.assert_produces_warning(False):
dti.round("1010ns")
def test_no_rounding_occurs(self, tz_naive_fixture):
# GH 21262
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:02:00", tz=tz),
Timestamp("2016-01-01 00:04:00", tz=tz),
Timestamp("2016-01-01 00:06:00", tz=tz),
Timestamp("2016-01-01 00:08:00", tz=tz),
]
).as_unit("ns")
result = rng.round(freq="2min")
tm.assert_index_equal(result, expected_rng)
@pytest.mark.parametrize(
"test_input, rounder, freq, expected",
[
(["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]),
(["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]),
(
["2117-01-01 00:00:45.000000012"],
"floor",
"10ns",
["2117-01-01 00:00:45.000000010"],
),
(
["1823-01-01 00:00:01.000000012"],
"ceil",
"10ns",
["1823-01-01 00:00:01.000000020"],
),
(["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]),
(["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]),
(["2018-01-01 00:15:00"], "ceil", "15min", ["2018-01-01 00:15:00"]),
(["2018-01-01 00:15:00"], "floor", "15min", ["2018-01-01 00:15:00"]),
(["1823-01-01 03:00:00"], "ceil", "3h", ["1823-01-01 03:00:00"]),
(["1823-01-01 03:00:00"], "floor", "3h", ["1823-01-01 03:00:00"]),
(
("NaT", "1823-01-01 00:00:01"),
"floor",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
(
("NaT", "1823-01-01 00:00:01"),
"ceil",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
],
)
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = DatetimeIndex(list(test_input))
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex(list(expected))
assert expected.equals(result)
@pytest.mark.parametrize(
"start, index_freq, periods",
[("2018-01-01", "12h", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)],
)
@pytest.mark.parametrize(
"round_freq",
[
"2ns",
"3ns",
"4ns",
"5ns",
"6ns",
"7ns",
"250ns",
"500ns",
"750ns",
"1us",
"19us",
"250us",
"500us",
"750us",
"1s",
"2s",
"3s",
"12h",
"1D",
],
)
def test_round_int64(self, start, index_freq, periods, round_freq):
dt = date_range(start=start, freq=index_freq, periods=periods)
unit = to_offset(round_freq).nanos
# test floor
result = dt.floor(round_freq)
diff = dt.asi8 - result.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), f"floor not a {round_freq} multiple"
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
# test ceil
result = dt.ceil(round_freq)
diff = result.asi8 - dt.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), f"ceil not a {round_freq} multiple"
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
# test round
result = dt.round(round_freq)
diff = abs(result.asi8 - dt.asi8)
mod = result.asi8 % unit
assert (mod == 0).all(), f"round not a {round_freq} multiple"
assert (diff <= unit // 2).all(), "round error"
if unit % 2 == 0:
assert (
result.asi8[diff == unit // 2] % 2 == 0
).all(), "round half to even error"

View File

@ -0,0 +1,169 @@
from datetime import datetime
import pytest
import pytz
from pandas.errors import NullFrequencyError
import pandas as pd
from pandas import (
DatetimeIndex,
Series,
date_range,
)
import pandas._testing as tm
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexShift:
# -------------------------------------------------------------
# DatetimeIndex.shift is used in integer addition
def test_dti_shift_tzaware(self, tz_naive_fixture, unit):
# GH#9903
tz = tz_naive_fixture
idx = DatetimeIndex([], name="xxx", tz=tz).as_unit(unit)
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
tm.assert_index_equal(idx.shift(3, freq="h"), idx)
idx = DatetimeIndex(
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
exp = DatetimeIndex(
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(3, freq="h"), exp)
exp = DatetimeIndex(
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(-3, freq="h"), exp)
def test_dti_shift_freqs(self, unit):
# test shift for DatetimeIndex and non DatetimeIndex
# GH#8083
drange = date_range("20130101", periods=5, unit=unit)
result = drange.shift(1)
expected = DatetimeIndex(
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(-1)
expected = DatetimeIndex(
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(3, freq="2D")
expected = DatetimeIndex(
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
def test_dti_shift_int(self, unit):
rng = date_range("1/1/2000", periods=20, unit=unit)
result = rng + 5 * rng.freq
expected = rng.shift(5)
tm.assert_index_equal(result, expected)
result = rng - 5 * rng.freq
expected = rng.shift(-5)
tm.assert_index_equal(result, expected)
def test_dti_shift_no_freq(self, unit):
# GH#19147
dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None).as_unit(unit)
with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"):
dti.shift(2)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_shift_localized(self, tzstr, unit):
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI", unit=unit)
dr_tz = dr.tz_localize(tzstr)
result = dr_tz.shift(1, "10min")
assert result.tz == dr_tz.tz
def test_dti_shift_across_dst(self, unit):
# GH 8616
idx = date_range(
"2013-11-03", tz="America/Chicago", periods=7, freq="h", unit=unit
)
ser = Series(index=idx[:-1], dtype=object)
result = ser.shift(freq="h")
expected = Series(index=idx[1:], dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"shift, result_time",
[
[0, "2014-11-14 00:00:00"],
[-1, "2014-11-13 23:00:00"],
[1, "2014-11-14 01:00:00"],
],
)
def test_dti_shift_near_midnight(self, shift, result_time, unit):
# GH 8616
dt = datetime(2014, 11, 14, 0)
dt_est = pytz.timezone("EST").localize(dt)
idx = DatetimeIndex([dt_est]).as_unit(unit)
ser = Series(data=[1], index=idx)
result = ser.shift(shift, freq="h")
exp_index = DatetimeIndex([result_time], tz="EST").as_unit(unit)
expected = Series(1, index=exp_index)
tm.assert_series_equal(result, expected)
def test_shift_periods(self, unit):
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
idx = date_range(start=START, end=END, periods=3, unit=unit)
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_shift_bday(self, freq, unit):
rng = date_range(START, END, freq=freq, unit=unit)
shifted = rng.shift(5)
assert shifted[0] == rng[5]
assert shifted.freq == rng.freq
shifted = rng.shift(-5)
assert shifted[5] == rng[0]
assert shifted.freq == rng.freq
shifted = rng.shift(0)
assert shifted[0] == rng[0]
assert shifted.freq == rng.freq
def test_shift_bmonth(self, unit):
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
shifted = rng.shift(1, freq=pd.offsets.BDay())
assert shifted[0] == rng[0] + pd.offsets.BDay()
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
shifted = rng.shift(1, freq=pd.offsets.CDay())
assert shifted[0] == rng[0] + pd.offsets.CDay()
def test_shift_empty(self, unit):
# GH#14811
dti = date_range(start="2016-10-21", end="2016-10-21", freq="BME", unit=unit)
result = dti.shift(1)
tm.assert_index_equal(result, dti)

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
DatetimeIndex,
date_range,
)
import pandas._testing as tm
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
@pytest.mark.parametrize("name", [None, "my_dti"])
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
def test_dti_snap(name, tz, unit):
dti = DatetimeIndex(
[
"1/1/2002",
"1/2/2002",
"1/3/2002",
"1/4/2002",
"1/5/2002",
"1/6/2002",
"1/7/2002",
],
name=name,
tz=tz,
freq="D",
)
dti = dti.as_unit(unit)
result = dti.snap(freq="W-MON")
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
expected = expected.repeat([3, 4])
expected = expected.as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
assert result.freq is None
assert expected.freq is None
result = dti.snap(freq="B")
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
expected = expected.repeat([1, 1, 1, 2, 2])
expected = expected.as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
assert result.freq is None
assert expected.freq is None

View File

@ -0,0 +1,28 @@
from pandas import (
DataFrame,
Index,
date_range,
)
import pandas._testing as tm
class TestToFrame:
def test_to_frame_datetime_tz(self):
# GH#25809
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
result = idx.to_frame()
expected = DataFrame(idx, index=idx)
tm.assert_frame_equal(result, expected)
def test_to_frame_respects_none_name(self):
# GH#44212 if we explicitly pass name=None, then that should be respected,
# not changed to 0
# GH-45448 this is first deprecated to only change in the future
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
result = idx.to_frame(name=None)
exp_idx = Index([None], dtype=object)
tm.assert_index_equal(exp_idx, result.columns)
result = idx.rename("foo").to_frame(name=None)
exp_idx = Index([None], dtype=object)
tm.assert_index_equal(exp_idx, result.columns)

View File

@ -0,0 +1,45 @@
import numpy as np
from pandas import (
Index,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDateTimeIndexToJulianDate:
def test_1700(self):
dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_2000(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_hour(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="h")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_minute(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="min")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_second(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="s")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)

View File

@ -0,0 +1,225 @@
import dateutil.tz
from dateutil.tz import tzlocal
import pytest
import pytz
from pandas._libs.tslibs.ccalendar import MONTHS
from pandas._libs.tslibs.offsets import MonthEnd
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
from pandas import (
DatetimeIndex,
Period,
PeriodIndex,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
class TestToPeriod:
def test_dti_to_period(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
pi1 = dti.to_period()
pi2 = dti.to_period(freq="D")
pi3 = dti.to_period(freq="3D")
assert pi1[0] == Period("Jan 2005", freq="M")
assert pi2[0] == Period("1/31/2005", freq="D")
assert pi3[0] == Period("1/31/2005", freq="3D")
assert pi1[-1] == Period("Nov 2005", freq="M")
assert pi2[-1] == Period("11/30/2005", freq="D")
assert pi3[-1], Period("11/30/2005", freq="3D")
tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M"))
tm.assert_index_equal(
pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D")
)
tm.assert_index_equal(
pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D")
)
@pytest.mark.parametrize("month", MONTHS)
def test_to_period_quarterly(self, month):
# make sure we can make the round trip
freq = f"Q-{month}"
rng = period_range("1989Q3", "1991Q3", freq=freq)
stamps = rng.to_timestamp()
result = stamps.to_period(freq)
tm.assert_index_equal(rng, result)
@pytest.mark.parametrize("off", ["BQE", "QS", "BQS"])
def test_to_period_quarterlyish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "QE-DEC"
@pytest.mark.parametrize("off", ["BYE", "YS", "BYS"])
def test_to_period_annualish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "YE-DEC"
def test_to_period_monthish(self):
offsets = ["MS", "BME"]
for off in offsets:
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freqstr == "M"
rng = date_range("01-Jan-2012", periods=8, freq="ME")
prng = rng.to_period()
assert prng.freqstr == "M"
with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
date_range("01-Jan-2012", periods=8, freq="EOM")
@pytest.mark.parametrize(
"freq_offset, freq_period",
[
("2ME", "2M"),
(MonthEnd(2), MonthEnd(2)),
],
)
def test_dti_to_period_2monthish(self, freq_offset, freq_period):
dti = date_range("2020-01-01", periods=3, freq=freq_offset)
pi = dti.to_period()
tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq_period))
@pytest.mark.parametrize(
"freq, freq_depr",
[
("2ME", "2M"),
("2QE", "2Q"),
("2QE-SEP", "2Q-SEP"),
("1YE", "1Y"),
("2YE-MAR", "2Y-MAR"),
("1YE", "1A"),
("2YE-MAR", "2A-MAR"),
],
)
def test_to_period_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
# GH#9586
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
f"in a future version, please use '{freq[1:]}' instead."
rng = date_range("01-Jan-2012", periods=8, freq=freq)
prng = rng.to_period()
with tm.assert_produces_warning(FutureWarning, match=msg):
assert prng.freq == freq_depr
def test_to_period_infer(self):
# https://github.com/pandas-dev/pandas/issues/33358
rng = date_range(
start="2019-12-22 06:40:00+00:00",
end="2019-12-22 08:45:00+00:00",
freq="5min",
)
with tm.assert_produces_warning(UserWarning):
pi1 = rng.to_period("5min")
with tm.assert_produces_warning(UserWarning):
pi2 = rng.to_period()
tm.assert_index_equal(pi1, pi2)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_period_dt64_round_trip(self):
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period()
tm.assert_index_equal(pi.to_timestamp(), dti)
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period(freq="h")
tm.assert_index_equal(pi.to_timestamp(), dti)
def test_to_period_millisecond(self):
index = DatetimeIndex(
[
Timestamp("2007-01-01 10:11:12.123456Z"),
Timestamp("2007-01-01 10:11:13.789123Z"),
]
)
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="ms")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123Z", "ms")
assert period[1] == Period("2007-01-01 10:11:13.789Z", "ms")
def test_to_period_microsecond(self):
index = DatetimeIndex(
[
Timestamp("2007-01-01 10:11:12.123456Z"),
Timestamp("2007-01-01 10:11:13.789123Z"),
]
)
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="us")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "us")
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "us")
@pytest.mark.parametrize(
"tz",
["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()],
)
def test_to_period_tz(self, tz):
ts = date_range("1/1/2000", "2/1/2000", tz=tz)
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
# filter warning about freq deprecation
result = ts.to_period()[0]
expected = ts[0].to_period(ts.freq)
assert result == expected
expected = date_range("1/1/2000", "2/1/2000").to_period()
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period(ts.freq)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"])
def test_to_period_tz_utc_offset_consistency(self, tz):
# GH#22905
ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
with tm.assert_produces_warning(UserWarning):
result = ts.to_period()[0]
expected = ts[0].to_period(ts.freq)
assert result == expected
def test_to_period_nofreq(self):
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"])
msg = "You must pass a freq argument as current index has none."
with pytest.raises(ValueError, match=msg):
idx.to_period()
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer")
assert idx.freqstr == "D"
expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D")
tm.assert_index_equal(idx.to_period(), expected)
# GH#7606
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert idx.freqstr is None
tm.assert_index_equal(idx.to_period(), expected)
@pytest.mark.parametrize("freq", ["2BMS", "1SME-15"])
def test_to_period_offsets_not_supported(self, freq):
# GH#56243
msg = f"{freq[1:]} is not supported as period frequency"
ts = date_range("1/1/2012", periods=4, freq=freq)
with pytest.raises(ValueError, match=msg):
ts.to_period()

View File

@ -0,0 +1,51 @@
from datetime import (
datetime,
timezone,
)
import dateutil.parser
import dateutil.tz
from dateutil.tz import tzlocal
import numpy as np
from pandas import (
DatetimeIndex,
date_range,
to_datetime,
)
import pandas._testing as tm
from pandas.tests.indexes.datetimes.test_timezones import FixedOffset
fixed_off = FixedOffset(-420, "-07:00")
class TestToPyDatetime:
def test_dti_to_pydatetime(self):
dt = dateutil.parser.parse("2012-06-13T01:39:00Z")
dt = dt.replace(tzinfo=tzlocal())
arr = np.array([dt], dtype=object)
result = to_datetime(arr, utc=True)
assert result.tz is timezone.utc
rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal())
arr = rng.to_pydatetime()
result = to_datetime(arr, utc=True)
assert result.tz is timezone.utc
def test_dti_to_pydatetime_fizedtz(self):
dates = np.array(
[
datetime(2000, 1, 1, tzinfo=fixed_off),
datetime(2000, 1, 2, tzinfo=fixed_off),
datetime(2000, 1, 3, tzinfo=fixed_off),
]
)
dti = DatetimeIndex(dates)
result = dti.to_pydatetime()
tm.assert_numpy_array_equal(dates, result)
result = dti._mpl_repr()
tm.assert_numpy_array_equal(dates, result)

View File

@ -0,0 +1,18 @@
import numpy as np
from pandas import (
DatetimeIndex,
Series,
)
import pandas._testing as tm
class TestToSeries:
def test_to_series(self):
naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
idx = naive.tz_localize("US/Pacific")
expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
result = idx.to_series(index=[0, 1])
assert expected.dtype == idx.dtype
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,283 @@
from datetime import datetime
import dateutil.tz
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
class TestTZConvert:
def test_tz_convert_nat(self):
# GH#5546
dates = [NaT]
idx = DatetimeIndex(dates)
idx = idx.tz_localize("US/Pacific")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern"))
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC"))
dates = ["2010-12-01 00:00", "2010-12-02 00:00", NaT]
idx = DatetimeIndex(dates)
idx = idx.tz_localize("US/Pacific")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
expected = ["2010-12-01 03:00", "2010-12-02 03:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
idx = idx + offsets.Hour(5)
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
idx = idx.tz_convert("US/Pacific")
expected = ["2010-12-01 05:00", "2010-12-02 05:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
idx = idx + np.timedelta64(3, "h")
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
expected = ["2010-12-01 11:00", "2010-12-02 11:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_tz_convert_compat_timestamp(self, prefix):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
conv = idx[0].tz_convert(prefix + "US/Pacific")
expected = idx.tz_convert(prefix + "US/Pacific")[0]
assert conv == expected
def test_dti_tz_convert_hour_overflow_dst(self):
# Regression test for GH#13306
# sorted case US/Eastern -> UTC
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"]
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# sorted case UTC -> US/Eastern
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"]
tt = DatetimeIndex(ts).tz_localize("UTC")
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case US/Eastern -> UTC
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"]
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case UTC -> US/Eastern
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"]
tt = DatetimeIndex(ts).tz_localize("UTC")
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz):
# Regression test for GH#13306
# sorted case US/Eastern -> UTC
ts = [
Timestamp("2008-05-12 09:50:00", tz=tz),
Timestamp("2008-12-12 09:50:35", tz=tz),
Timestamp("2009-05-12 09:50:32", tz=tz),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# sorted case UTC -> US/Eastern
ts = [
Timestamp("2008-05-12 13:50:00", tz="UTC"),
Timestamp("2008-12-12 14:50:35", tz="UTC"),
Timestamp("2009-05-12 13:50:32", tz="UTC"),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case US/Eastern -> UTC
ts = [
Timestamp("2008-05-12 09:50:00", tz=tz),
Timestamp("2008-12-12 09:50:35", tz=tz),
Timestamp("2008-05-12 09:50:32", tz=tz),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case UTC -> US/Eastern
ts = [
Timestamp("2008-05-12 13:50:00", tz="UTC"),
Timestamp("2008-12-12 14:50:35", tz="UTC"),
Timestamp("2008-05-12 13:50:32", tz="UTC"),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
@pytest.mark.parametrize("freq, n", [("h", 1), ("min", 60), ("s", 3600)])
def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n):
# Regression test for tslib.tz_convert(vals, tz1, tz2).
# See GH#4496 for details.
idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq)
idx = idx.tz_localize("UTC")
idx = idx.tz_convert("Europe/Moscow")
expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1]))
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
def test_dti_tz_convert_dst(self):
for freq, n in [("h", 1), ("min", 60), ("s", 3600)]:
# Start DST
idx = date_range(
"2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC"
)
idx = idx.tz_convert("US/Eastern")
expected = np.repeat(
np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
idx = date_range(
"2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
expected = np.repeat(
np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
# End DST
idx = date_range(
"2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC"
)
idx = idx.tz_convert("US/Eastern")
expected = np.repeat(
np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
idx = date_range(
"2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
expected = np.repeat(
np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
# daily
# Start DST
idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC")
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32))
idx = date_range(
"2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32))
# End DST
idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC")
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32))
idx = date_range(
"2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32))
def test_tz_convert_roundtrip(self, tz_aware_fixture):
tz = tz_aware_fixture
idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME", tz="UTC")
exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME")
idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC")
exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D")
idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="h", tz="UTC")
exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="h")
idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="min", tz="UTC")
exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="min")
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]:
converted = idx.tz_convert(tz)
reset = converted.tz_convert(None)
tm.assert_index_equal(reset, expected)
assert reset.tzinfo is None
expected = converted.tz_convert("UTC").tz_localize(None)
expected = expected._with_freq("infer")
tm.assert_index_equal(reset, expected)
def test_dti_tz_convert_tzlocal(self):
# GH#13583
# tz_convert doesn't affect to internal
dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC")
dti2 = dti.tz_convert(dateutil.tz.tzlocal())
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
dti2 = dti.tz_convert(None)
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
@pytest.mark.parametrize(
"tz",
[
"US/Eastern",
"dateutil/US/Eastern",
pytz.timezone("US/Eastern"),
gettz("US/Eastern"),
],
)
def test_dti_tz_convert_utc_to_local_no_modify(self, tz):
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tz)
# Values are unmodified
tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8)
assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz))
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_tz_convert_unsorted(self, tzstr):
dr = date_range("2012-03-09", freq="h", periods=100, tz="utc")
dr = dr.tz_convert(tzstr)
result = dr[::-1].hour
exp = dr.hour[::-1]
tm.assert_almost_equal(result, exp)

View File

@ -0,0 +1,402 @@
from datetime import (
datetime,
timedelta,
)
import dateutil.tz
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas import (
DatetimeIndex,
Timestamp,
bdate_range,
date_range,
offsets,
to_datetime,
)
import pandas._testing as tm
try:
from zoneinfo import ZoneInfo
except ImportError:
# Cannot assign to a type [misc]
ZoneInfo = None # type: ignore[misc, assignment]
easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")]
if ZoneInfo is not None:
try:
tz = ZoneInfo("US/Eastern")
except KeyError:
# no tzdata
pass
else:
easts.append(tz)
class TestTZLocalize:
def test_tz_localize_invalidates_freq(self):
# we only preserve freq in unambiguous cases
# if localized to US/Eastern, this crosses a DST transition
dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="h")
assert dti.freq == "h"
result = dti.tz_localize(None) # no-op
assert result.freq == "h"
result = dti.tz_localize("UTC") # unambiguous freq preservation
assert result.freq == "h"
result = dti.tz_localize("US/Eastern", nonexistent="shift_forward")
assert result.freq is None
assert result.inferred_freq is None # i.e. we are not _too_ strict here
# Case where we _can_ keep freq because we're length==1
dti2 = dti[:1]
result = dti2.tz_localize("US/Eastern")
assert result.freq == "h"
def test_tz_localize_utc_copies(self, utc_fixture):
# GH#46460
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
index = DatetimeIndex(times)
res = index.tz_localize(utc_fixture)
assert not tm.shares_memory(res, index)
res2 = index._data.tz_localize(utc_fixture)
assert not tm.shares_memory(index._data, res2)
def test_dti_tz_localize_nonexistent_raise_coerce(self):
# GH#13057
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
index = DatetimeIndex(times)
tz = "US/Eastern"
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
index.tz_localize(tz=tz)
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
index.tz_localize(tz=tz, nonexistent="raise")
result = index.tz_localize(tz=tz, nonexistent="NaT")
test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"]
dti = to_datetime(test_times, utc=True)
expected = dti.tz_convert("US/Eastern")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer(self, tz):
# November 6, 2011, fall back, repeat 2 AM hour
# With no repeated hours, we cannot infer the transition
dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour())
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dr.tz_localize(tz)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer2(self, tz, unit):
# With repeated hours, we can infer the transition
dr = date_range(
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
)
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di = DatetimeIndex(times).as_unit(unit)
result = di.tz_localize(tz, ambiguous="infer")
expected = dr._with_freq(None)
tm.assert_index_equal(result, expected)
result2 = DatetimeIndex(times, tz=tz, ambiguous="infer").as_unit(unit)
tm.assert_index_equal(result2, expected)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer3(self, tz):
# When there is no dst transition, nothing special happens
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
localized = dr.tz_localize(tz)
localized_infer = dr.tz_localize(tz, ambiguous="infer")
tm.assert_index_equal(localized, localized_infer)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_times(self, tz):
# March 13, 2011, spring forward, skip from 2 AM to 3 AM
dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour())
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"):
dr.tz_localize(tz)
# after dst transition, it works
dr = date_range(
datetime(2011, 3, 13, 3, 30), periods=3, freq=offsets.Hour(), tz=tz
)
# November 6, 2011, fall back, repeat 2 AM hour
dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour())
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dr.tz_localize(tz)
# UTC is OK
dr = date_range(
datetime(2011, 3, 13), periods=48, freq=offsets.Minute(30), tz=pytz.utc
)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
idx = DatetimeIndex(strdates)
conv = idx.tz_localize(tzstr)
fromdates = DatetimeIndex(strdates, tz=tzstr)
assert conv.tz == fromdates.tz
tm.assert_numpy_array_equal(conv.values, fromdates.values)
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_tz_localize(self, prefix):
tzstr = prefix + "US/Eastern"
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
dti2 = dti.tz_localize(tzstr)
dti_utc = date_range(
start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
)
tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
dti3 = dti2.tz_convert(prefix + "US/Pacific")
tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dti.tz_localize(tzstr)
dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"):
dti.tz_localize(tzstr)
@pytest.mark.parametrize(
"tz",
[
"US/Eastern",
"dateutil/US/Eastern",
pytz.timezone("US/Eastern"),
gettz("US/Eastern"),
],
)
def test_dti_tz_localize_utc_conversion(self, tz):
# Localizing to time zone should:
# 1) check for DST ambiguities
# 2) convert to UTC
rng = date_range("3/10/2012", "3/11/2012", freq="30min")
converted = rng.tz_localize(tz)
expected_naive = rng + offsets.Hour(5)
tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8)
# DST ambiguity, this should fail
rng = date_range("3/11/2012", "3/12/2012", freq="30min")
# Is this really how it should fail??
with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"):
rng.tz_localize(tz)
def test_dti_tz_localize_roundtrip(self, tz_aware_fixture):
# note: this tz tests that a tz-naive index can be localized
# and de-localized successfully, when there are no DST transitions
# in the range.
idx = date_range(start="2014-06-01", end="2014-08-30", freq="15min")
tz = tz_aware_fixture
localized = idx.tz_localize(tz)
# can't localize a tz-aware object
with pytest.raises(
TypeError, match="Already tz-aware, use tz_convert to convert"
):
localized.tz_localize(tz)
reset = localized.tz_localize(None)
assert reset.tzinfo is None
expected = idx._with_freq(None)
tm.assert_index_equal(reset, expected)
def test_dti_tz_localize_naive(self):
rng = date_range("1/1/2011", periods=100, freq="h")
conv = rng.tz_localize("US/Pacific")
exp = date_range("1/1/2011", periods=100, freq="h", tz="US/Pacific")
tm.assert_index_equal(conv, exp._with_freq(None))
def test_dti_tz_localize_tzlocal(self):
# GH#13583
offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1))
offset = int(offset.total_seconds() * 1000000000)
dti = date_range(start="2001-01-01", end="2001-03-01")
dti2 = dti.tz_localize(dateutil.tz.tzlocal())
tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8)
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
dti2 = dti.tz_localize(None)
tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_nat(self, tz):
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di = DatetimeIndex(times)
localized = di.tz_localize(tz, ambiguous="NaT")
times = [
"11/06/2011 00:00",
np.nan,
np.nan,
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di_test = DatetimeIndex(times, tz="US/Eastern")
# left dtype is datetime64[ns, US/Eastern]
# right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')]
tm.assert_numpy_array_equal(di_test.values, localized.values)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_flags(self, tz, unit):
# November 6, 2011, fall back, repeat 2 AM hour
# Pass in flags to determine right dst transition
dr = date_range(
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
)
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
# Test tz_localize
di = DatetimeIndex(times).as_unit(unit)
is_dst = [1, 1, 0, 0, 0]
localized = di.tz_localize(tz, ambiguous=is_dst)
expected = dr._with_freq(None)
tm.assert_index_equal(expected, localized)
result = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
tm.assert_index_equal(result, expected)
localized = di.tz_localize(tz, ambiguous=np.array(is_dst))
tm.assert_index_equal(dr, localized)
localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool"))
tm.assert_index_equal(dr, localized)
# Test constructor
localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
tm.assert_index_equal(dr, localized)
# Test duplicate times where inferring the dst fails
times += times
di = DatetimeIndex(times).as_unit(unit)
# When the sizes are incompatible, make sure error is raised
msg = "Length of ambiguous bool-array must be the same size as vals"
with pytest.raises(Exception, match=msg):
di.tz_localize(tz, ambiguous=is_dst)
# When sizes are compatible and there are repeats ('infer' won't work)
is_dst = np.hstack((is_dst, is_dst))
localized = di.tz_localize(tz, ambiguous=is_dst)
dr = dr.append(dr)
tm.assert_index_equal(dr, localized)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_flags2(self, tz, unit):
# When there is no dst transition, nothing special happens
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
is_dst = np.array([1] * 10)
localized = dr.tz_localize(tz)
localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst)
tm.assert_index_equal(localized, localized_is_dst)
def test_dti_tz_localize_bdate_range(self):
dr = bdate_range("1/1/2009", "1/1/2010")
dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc)
localized = dr.tz_localize(pytz.utc)
tm.assert_index_equal(dr_utc, localized)
@pytest.mark.parametrize(
"start_ts, tz, end_ts, shift",
[
["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 01:59:59.999999999",
"backward",
],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 03:20:00",
timedelta(hours=1),
],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 01:20:00",
timedelta(hours=-1),
],
["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 01:59:59.999999999",
"backward",
],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 03:33:00",
timedelta(hours=1),
],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 01:33:00",
timedelta(hours=-1),
],
],
)
@pytest.mark.parametrize("tz_type", ["", "dateutil/"])
def test_dti_tz_localize_nonexistent_shift(
self, start_ts, tz, end_ts, shift, tz_type, unit
):
# GH#8917
tz = tz_type + tz
if isinstance(shift, str):
shift = "shift_" + shift
dti = DatetimeIndex([Timestamp(start_ts)]).as_unit(unit)
result = dti.tz_localize(tz, nonexistent=shift)
expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz).as_unit(unit)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("offset", [-1, 1])
def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, warsaw):
# GH#8917
tz = warsaw
dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")])
msg = "The provided timedelta will relocalize on a nonexistent time"
with pytest.raises(ValueError, match=msg):
dti.tz_localize(tz, nonexistent=timedelta(seconds=offset))

View File

@ -0,0 +1,77 @@
from datetime import (
datetime,
timedelta,
)
from pandas import (
DatetimeIndex,
NaT,
Timestamp,
)
import pandas._testing as tm
def test_unique(tz_naive_fixture):
idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture)
expected = idx[:1]
result = idx.unique()
tm.assert_index_equal(result, expected)
# GH#21737
# Ensure the underlying data is consistent
assert result[0] == expected[0]
def test_index_unique(rand_series_with_duplicate_datetimeindex):
dups = rand_series_with_duplicate_datetimeindex
index = dups.index
uniques = index.unique()
expected = DatetimeIndex(
[
datetime(2000, 1, 2),
datetime(2000, 1, 3),
datetime(2000, 1, 4),
datetime(2000, 1, 5),
],
dtype=index.dtype,
)
assert uniques.dtype == index.dtype # sanity
tm.assert_index_equal(uniques, expected)
assert index.nunique() == 4
# GH#2563
assert isinstance(uniques, DatetimeIndex)
dups_local = index.tz_localize("US/Eastern")
dups_local.name = "foo"
result = dups_local.unique()
expected = DatetimeIndex(expected, name="foo")
expected = expected.tz_localize("US/Eastern")
assert result.tz is not None
assert result.name == "foo"
tm.assert_index_equal(result, expected)
def test_index_unique2():
# NaT, note this is excluded
arr = [1370745748 + t for t in range(20)] + [NaT._value]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_index_unique3():
arr = [
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
] + [NaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex):
index = rand_series_with_duplicate_datetimeindex.index
assert not index.is_unique

View File

@ -0,0 +1,56 @@
# Arithmetic tests specific to DatetimeIndex are generally about `freq`
# rentention or inference. Other arithmetic tests belong in
# tests/arithmetic/test_datetime64.py
import pytest
from pandas import (
Timedelta,
TimedeltaIndex,
Timestamp,
date_range,
timedelta_range,
)
import pandas._testing as tm
class TestDatetimeIndexArithmetic:
def test_add_timedelta_preserves_freq(self):
# GH#37295 should hold for any DTI with freq=None or Tick freq
tz = "Canada/Eastern"
dti = date_range(
start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
freq="D",
)
result = dti + Timedelta(days=1)
assert result.freq == dti.freq
def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
# GH#48818
dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
res = dti - dti[0]
expected = timedelta_range("0 Days", "11 Days")
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq
@pytest.mark.xfail(
reason="The inherited freq is incorrect bc dti.freq is incorrect "
"https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
)
def test_sub_datetime_preserves_freq_across_dst(self):
# GH#48818
ts = Timestamp("2016-03-11", tz="US/Pacific")
dti = date_range(ts, periods=4)
res = dti - dti[0]
expected = TimedeltaIndex(
[
Timedelta(days=0),
Timedelta(days=1),
Timedelta(days=2),
Timedelta(days=2, hours=23),
]
)
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,216 @@
import datetime as dt
from datetime import date
import re
import numpy as np
import pytest
from pandas.compat.numpy import np_long
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
class TestDatetimeIndex:
def test_is_(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
assert dti.is_(dti)
assert dti.is_(dti.view())
assert not dti.is_(dti.copy())
def test_time_overflow_for_32bit_machines(self):
# GH8943. On some machines NumPy defaults to np.int32 (for example,
# 32-bit Linux machines). In the function _generate_regular_range
# found in tseries/index.py, `periods` gets multiplied by `strides`
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
# and since those machines won't promote np.int32 to np.int64, we get
# overflow.
periods = np_long(1000)
idx1 = date_range(start="2000", periods=periods, freq="s")
assert len(idx1) == periods
idx2 = date_range(end="2000", periods=periods, freq="s")
assert len(idx2) == periods
def test_nat(self):
assert DatetimeIndex([np.nan])[0] is pd.NaT
def test_week_of_month_frequency(self):
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
d1 = date(2002, 9, 1)
d2 = date(2013, 10, 27)
d3 = date(2012, 9, 30)
idx1 = DatetimeIndex([d1, d2])
idx2 = DatetimeIndex([d3])
result_append = idx1.append(idx2)
expected = DatetimeIndex([d1, d2, d3])
tm.assert_index_equal(result_append, expected)
result_union = idx1.union(idx2)
expected = DatetimeIndex([d1, d3, d2])
tm.assert_index_equal(result_union, expected)
def test_append_nondatetimeindex(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])
result = rng.append(idx)
assert isinstance(result[0], Timestamp)
def test_misc_coverage(self):
rng = date_range("1/1/2000", periods=5)
result = rng.groupby(rng.day)
assert isinstance(next(iter(result.values()))[0], Timestamp)
# TODO: belongs in frame groupby tests?
def test_groupby_function_tuple_1677(self):
df = DataFrame(
np.random.default_rng(2).random(100),
index=date_range("1/1/2000", periods=100),
)
monthly_group = df.groupby(lambda x: (x.year, x.month))
result = monthly_group.mean()
assert isinstance(result.index[0], tuple)
def assert_index_parameters(self, index):
assert index.freq == "40960ns"
assert index.inferred_freq == "40960ns"
def test_ns_index(self):
nsamples = 400
ns = int(1e9 / 24414)
dtstart = np.datetime64("2012-09-20T00:00:00")
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
freq = ns * offsets.Nano()
index = DatetimeIndex(dt, freq=freq, name="time")
self.assert_index_parameters(index)
new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
self.assert_index_parameters(new_index)
def test_asarray_tz_naive(self):
# This shouldn't produce a warning.
idx = date_range("2000", periods=2)
# M8[ns] by default
result = np.asarray(idx)
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# optionally, object
result = np.asarray(idx, dtype=object)
expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
tm.assert_numpy_array_equal(result, expected)
def test_asarray_tz_aware(self):
tz = "US/Central"
idx = date_range("2000", periods=2, tz=tz)
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
result = np.asarray(idx, dtype="datetime64[ns]")
tm.assert_numpy_array_equal(result, expected)
# Old behavior with no warning
result = np.asarray(idx, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# Future behavior with no warning
expected = np.array(
[Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
)
result = np.asarray(idx, dtype=object)
tm.assert_numpy_array_equal(result, expected)
def test_CBH_deprecated(self):
msg = "'CBH' is deprecated and will be removed in a future version."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(
dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
)
result = DatetimeIndex(
[
"2022-12-12 09:00:00",
"2022-12-12 10:00:00",
"2022-12-12 11:00:00",
"2022-12-12 12:00:00",
"2022-12-12 13:00:00",
"2022-12-12 14:00:00",
"2022-12-12 15:00:00",
"2022-12-12 16:00:00",
],
dtype="datetime64[ns]",
freq="cbh",
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq_depr, expected_values, expected_freq",
[
(
"AS-AUG",
["2021-08-01", "2022-08-01", "2023-08-01"],
"YS-AUG",
),
(
"1BAS-MAY",
["2021-05-03", "2022-05-02", "2023-05-01"],
"1BYS-MAY",
),
],
)
def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
# GH#55479
freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(
dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
)
result = DatetimeIndex(
expected_values,
dtype="datetime64[ns]",
freq=expected_freq,
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq, expected_values, freq_depr",
[
("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
("2BQE", ["2016-03-31"], "2BQ"),
("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
],
)
def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
# GH#52064
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
f"in a future version, please use '{freq[1:]}' instead."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
result = DatetimeIndex(
data=expected_values,
dtype="datetime64[ns]",
freq=freq,
)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,356 @@
from datetime import datetime
import dateutil.tz
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex,
NaT,
Series,
)
import pandas._testing as tm
@pytest.fixture(params=["s", "ms", "us", "ns"])
def unit(request):
return request.param
def test_get_values_for_csv():
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
# First, with no arguments.
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
result = index._get_values_for_csv()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index._get_values_for_csv(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
result = index._get_values_for_csv(date_format="%m-%Y-%d")
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
result = index._get_values_for_csv(na_rep="NaT")
tm.assert_numpy_array_equal(result, expected)
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
result = index._get_values_for_csv(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
expected = np.array(
["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)
# invalid format
result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
expected = np.array(["foo", "NaT", "foo"], dtype=object)
tm.assert_numpy_array_equal(result, expected)
class TestDatetimeIndexRendering:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_with_timezone_repr(self, tzstr):
rng = pd.date_range("4/13/2010", "5/6/2010")
rng_eastern = rng.tz_localize(tzstr)
rng_repr = repr(rng_eastern)
assert "2010-04-13 00:00:00" in rng_repr
def test_dti_repr_dates(self):
text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
assert "['2013-01-01'," in text
assert ", '2014-01-01']" in text
def test_dti_repr_mixed(self):
text = str(
pd.to_datetime(
[datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
)
)
assert "'2013-01-01 00:00:00'," in text
assert "'2014-01-01 00:00:00']" in text
def test_dti_repr_short(self):
dr = pd.date_range(start="1/1/2012", periods=1)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=2)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=3)
repr(dr)
@pytest.mark.parametrize(
"dates, freq, expected_repr",
[
(
["2012-01-01 00:00:00"],
"60min",
(
"DatetimeIndex(['2012-01-01 00:00:00'], "
"dtype='datetime64[ns]', freq='60min')"
),
),
(
["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
"60min",
"DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
"dtype='datetime64[ns]', freq='60min')",
),
(
["2012-01-01"],
"24h",
"DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
),
],
)
def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
# GH53634
dti = DatetimeIndex(dates, freq).as_unit(unit)
actual_repr = repr(dti)
assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
def test_dti_representation(self, unit):
idxs = []
idxs.append(DatetimeIndex([], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
)
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
)
idxs.append(
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
)
exp = []
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
"'2011-01-01 10:00:00-05:00', 'NaT'], "
"dtype='datetime64[ns, US/Eastern]', freq=None)"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
"'2011-01-01 10:00:00+00:00', 'NaT'], "
"dtype='datetime64[ns, UTC]', freq=None)"
""
)
with pd.option_context("display.width", 300):
for index, expected in zip(idxs, exp):
index = index.as_unit(unit)
expected = expected.replace("[ns", f"[{unit}")
result = repr(index)
assert result == expected
result = str(index)
assert result == expected
# TODO: this is a Series.__repr__ test
def test_dti_representation_to_series(self, unit):
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
exp4 = (
"0 2011-01-01\n"
"1 2011-01-02\n"
"2 2011-01-03\n"
"dtype: datetime64[ns]"
)
exp5 = (
"0 2011-01-01 09:00:00+09:00\n"
"1 2011-01-01 10:00:00+09:00\n"
"2 2011-01-01 11:00:00+09:00\n"
"dtype: datetime64[ns, Asia/Tokyo]"
)
exp6 = (
"0 2011-01-01 09:00:00-05:00\n"
"1 2011-01-01 10:00:00-05:00\n"
"2 NaT\n"
"dtype: datetime64[ns, US/Eastern]"
)
exp7 = (
"0 2011-01-01 09:00:00\n"
"1 2011-01-02 10:15:00\n"
"dtype: datetime64[ns]"
)
with pd.option_context("display.width", 300):
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
):
ser = Series(idx.as_unit(unit))
result = repr(ser)
assert result == expected.replace("[ns", f"[{unit}")
def test_dti_summary(self):
# GH#9116
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
exp5 = (
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
"to 2011-01-01 11:00:00+09:00\n"
"Freq: h"
)
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
):
result = idx._summary()
assert result == expected
@pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_repr_etc_smoke(self, tz, freq):
# only really care that it works
dti = pd.bdate_range(
datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
)
repr(dti)
dti._summary()
dti[2:2]._summary()
class TestFormat:
def test_format(self):
# GH#35439
idx = pd.date_range("20130101", periods=5)
expected = [f"{x:%Y-%m-%d}" for x in idx]
msg = r"DatetimeIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
def test_format_with_name_time_info(self):
# bug I fixed 12/20/2011
dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dates.format(name=True)
assert formatted[0] == "something"
def test_format_datetime_with_time(self):
dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = dti.format()
expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
assert len(result) == 2
assert result == expected
def test_format_datetime(self):
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
assert formatted[0] == "2003-01-01 12:00:00"
assert formatted[1] == "NaT"
def test_format_date(self):
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
assert formatted[0] == "2003-01-01"
assert formatted[1] == "NaT"
def test_format_date_tz(self):
dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format()
assert formatted[0] == "2013-01-01 00:00:00+00:00"
dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format()
assert formatted[0] == "2013-01-01 00:00:00+00:00"
def test_format_date_explicit_date_format(self):
dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
assert formatted[0] == "02-01-2003"
assert formatted[1] == "UT"

View File

@ -0,0 +1,61 @@
import pytest
from pandas import (
DatetimeIndex,
date_range,
)
from pandas.tseries.offsets import (
BDay,
DateOffset,
Day,
Hour,
)
class TestFreq:
def test_freq_setter_errors(self):
# GH#20678
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
# setting with an incompatible freq
msg = (
"Inferred frequency 2D from passed values does not conform to "
"passed frequency 5D"
)
with pytest.raises(ValueError, match=msg):
idx._data.freq = "5D"
# setting with non-freq string
with pytest.raises(ValueError, match="Invalid frequency"):
idx._data.freq = "foo"
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_freq_setter(self, values, freq, tz):
# GH#20678
idx = DatetimeIndex(values, tz=tz)
# can set to an offset, converting from string if necessary
idx._data.freq = freq
assert idx.freq == freq
assert isinstance(idx.freq, DateOffset)
# can reset to None
idx._data.freq = None
assert idx.freq is None
def test_freq_view_safe(self):
# Setting the freq for one DatetimeIndex shouldn't alter the freq
# for another that views the same data
dti = date_range("2016-01-01", periods=5)
dta = dti._data
dti2 = DatetimeIndex(dta)._with_freq(None)
assert dti2.freq is None
# Original was not altered
assert dti.freq == "D"
assert dta.freq == "D"

View File

@ -0,0 +1,717 @@
from datetime import (
date,
datetime,
time,
timedelta,
)
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.compat.numpy import np_long
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Timestamp,
bdate_range,
date_range,
notna,
)
import pandas._testing as tm
from pandas.tseries.frequencies import to_offset
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestGetItem:
def test_getitem_slice_keeps_name(self):
# GH4226
st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
dr = date_range(st, et, freq="h", name="timebucket")
assert dr[1:].name == dr.name
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_getitem(self, tz):
idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
result = idx[0]
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx[0:5]
expected = date_range(
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = date_range(
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = date_range(
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
dtype=idx.dtype,
freq="-1D",
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_getitem(self, freq):
rng = bdate_range(START, END, freq=freq)
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
tm.assert_index_equal(smaller, exp)
assert smaller.freq == exp.freq
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == to_offset(freq) * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np_long(4)]
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_getitem_matplotlib_hackaround(self, freq):
rng = bdate_range(START, END, freq=freq)
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
# GH#30588 multi-dimensional indexing deprecated
rng[:, None]
def test_getitem_int_list(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
dti2 = dti[[1, 3, 5]]
v1 = dti2[0]
v2 = dti2[1]
v3 = dti2[2]
assert v1 == Timestamp("2/28/2005")
assert v2 == Timestamp("4/30/2005")
assert v3 == Timestamp("6/30/2005")
# getitem with non-slice drops freq
assert dti2.freq is None
class TestWhere:
def test_where_doesnt_retain_freq(self):
dti = date_range("20130101", periods=3, freq="D", name="idx")
cond = [True, True, False]
expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
result = dti.where(cond, dti[::-1])
tm.assert_index_equal(result, expected)
def test_where_other(self):
# other is ndarray or Index
i = date_range("20130101", periods=3, tz="US/Eastern")
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=arr)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2._values)
tm.assert_index_equal(result, i2)
def test_where_invalid_dtypes(self):
dti = date_range("20130101", periods=3, tz="US/Eastern")
tail = dti[2:].tolist()
i2 = Index([pd.NaT, pd.NaT] + tail)
mask = notna(i2)
# passing tz-naive ndarray to tzaware DTI
result = dti.where(mask, i2.values)
expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
tm.assert_index_equal(result, expected)
# passing tz-aware DTI to tznaive DTI
naive = dti.tz_localize(None)
result = naive.where(mask, i2)
expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
tm.assert_index_equal(result, expected)
pi = i2.tz_localize(None).to_period("D")
result = dti.where(mask, pi)
expected = Index([pi[0], pi[1]] + tail, dtype=object)
tm.assert_index_equal(result, expected)
tda = i2.asi8.view("timedelta64[ns]")
result = dti.where(mask, tda)
expected = Index([tda[0], tda[1]] + tail, dtype=object)
assert isinstance(expected[0], np.timedelta64)
tm.assert_index_equal(result, expected)
result = dti.where(mask, i2.asi8)
expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
assert isinstance(expected[0], int)
tm.assert_index_equal(result, expected)
# non-matching scalar
td = pd.Timedelta(days=4)
result = dti.where(mask, td)
expected = Index([td, td] + tail, dtype=object)
assert expected[0] is td
tm.assert_index_equal(result, expected)
def test_where_mismatched_nat(self, tz_aware_fixture):
tz = tz_aware_fixture
dti = date_range("2013-01-01", periods=3, tz=tz)
cond = np.array([True, False, True])
tdnat = np.timedelta64("NaT", "ns")
expected = Index([dti[0], tdnat, dti[2]], dtype=object)
assert expected[1] is tdnat
result = dti.where(cond, tdnat)
tm.assert_index_equal(result, expected)
def test_where_tz(self):
i = date_range("20130101", periods=3, tz="US/Eastern")
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)
class TestTake:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_take_dont_lose_meta(self, tzstr):
rng = date_range("1/1/2000", periods=20, tz=tzstr)
result = rng.take(range(5))
assert result.tz == rng.tz
assert result.freq == rng.freq
def test_take_nan_first_datetime(self):
index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
result = index.take([-1, 0, 1])
expected = DatetimeIndex([index[-1], index[0], index[1]])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_take(self, tz):
# GH#10295
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
result = idx.take([0])
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx.take([0, 1, 2])
expected = date_range(
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = date_range(
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = date_range(
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = DatetimeIndex(
["2011-01-04", "2011-01-03", "2011-01-06"],
dtype=idx.dtype,
freq=None,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = DatetimeIndex(
["2011-01-29", "2011-01-03", "2011-01-06"],
dtype=idx.dtype,
freq=None,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
# TODO: This method came from test_datetime; de-dup with version above
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
def test_take2(self, tz):
dates = [
datetime(2010, 1, 1, 14),
datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17),
datetime(2010, 1, 1, 21),
]
idx = date_range(
start="2010-01-01 09:00",
end="2010-02-01 09:00",
freq="h",
tz=tz,
name="idx",
)
expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, DatetimeIndex)
assert taken.freq is None
assert taken.tz == expected.tz
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "out of bounds"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_fill_value_with_timezone(self):
idx = DatetimeIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
result = idx.take(np.array([1, 0, -1]))
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "out of bounds"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
class TestGetLoc:
def test_get_loc_key_unit_mismatch(self):
idx = date_range("2000-01-01", periods=3)
key = idx[1].as_unit("ms")
loc = idx.get_loc(key)
assert loc == 1
assert key in idx
def test_get_loc_key_unit_mismatch_not_castable(self):
dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
dti = DatetimeIndex(dta)
key = dta[0].as_unit("ns") + pd.Timedelta(1)
with pytest.raises(
KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
):
dti.get_loc(key)
assert key not in dti
def test_get_loc_time_obj(self):
# time indexing
idx = date_range("2000-01-01", periods=24, freq="h")
result = idx.get_loc(time(12))
expected = np.array([12])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
result = idx.get_loc(time(12, 30))
expected = np.array([])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize("offset", [-10, 10])
def test_get_loc_time_obj2(self, monkeypatch, offset):
# GH#8667
size_cutoff = 50
n = size_cutoff + offset
key = time(15, 11, 30)
start = key.hour * 3600 + key.minute * 60 + key.second
step = 24 * 3600
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
idx = date_range("2014-11-26", periods=n, freq="s")
ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
locs = np.arange(start, n, step, dtype=np.intp)
result = ts.index.get_loc(key)
tm.assert_numpy_array_equal(result, locs)
tm.assert_series_equal(ts[key], ts.iloc[locs])
left, right = ts.copy(), ts.copy()
left[key] *= -10
right.iloc[locs] *= -10
tm.assert_series_equal(left, right)
def test_get_loc_time_nat(self):
# GH#35114
# Case where key's total microseconds happens to match iNaT % 1e6 // 1000
tic = time(minute=12, second=43, microsecond=145224)
dti = DatetimeIndex([pd.NaT])
loc = dti.get_loc(tic)
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(loc, expected)
def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
assert index.get_loc(pd.NaT) == 1
assert index.get_loc(None) == 1
assert index.get_loc(np.nan) == 1
assert index.get_loc(pd.NA) == 1
assert index.get_loc(np.datetime64("NaT")) == 1
with pytest.raises(KeyError, match="NaT"):
index.get_loc(np.timedelta64("NaT"))
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
def test_get_loc_timedelta_invalid_key(self, key):
# GH#20464
dti = date_range("1970-01-01", periods=10)
msg = "Cannot index DatetimeIndex with [Tt]imedelta"
with pytest.raises(TypeError, match=msg):
dti.get_loc(key)
def test_get_loc_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
with pytest.raises(KeyError, match="2000"):
index.get_loc("1/1/2000")
def test_get_loc_year_str(self):
rng = date_range("1/1/2000", "1/1/2010")
result = rng.get_loc("2009")
expected = slice(3288, 3653)
assert result == expected
class TestContains:
def test_dti_contains_with_duplicates(self):
d = datetime(2011, 12, 5, 20, 30)
ix = DatetimeIndex([d, d])
assert d in ix
@pytest.mark.parametrize(
"vals",
[
[0, 1, 0],
[0, 0, -1],
[0, -1, -1],
["2015", "2015", "2016"],
["2015", "2015", "2014"],
],
)
def test_contains_nonunique(self, vals):
# GH#9512
idx = DatetimeIndex(vals)
assert idx[0] in idx
class TestGetIndexer:
def test_get_indexer_date_objs(self):
rng = date_range("1/1/2000", periods=20)
result = rng.get_indexer(rng.map(lambda x: x.date()))
expected = rng.get_indexer(rng)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer(self):
idx = date_range("2000-01-01", periods=3)
exp = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
np.array([0, -1, 1], dtype=np.intp),
)
tol_raw = [
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour").to_timedelta64(),
]
tm.assert_numpy_array_equal(
idx.get_indexer(
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
),
np.array([0, -1, 1], dtype=np.intp),
)
tol_bad = [
pd.Timedelta("2 hour").to_timedelta64(),
pd.Timedelta("1 hour").to_timedelta64(),
"foo",
]
msg = "Could not convert 'foo' to NumPy timedelta"
with pytest.raises(ValueError, match=msg):
idx.get_indexer(target, "nearest", tolerance=tol_bad)
with pytest.raises(ValueError, match="abbreviation w/o a number"):
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
@pytest.mark.parametrize(
"target",
[
[date(2020, 1, 1), Timestamp("2020-01-02")],
[Timestamp("2020-01-01"), date(2020, 1, 2)],
],
)
def test_get_indexer_mixed_dtypes(self, target):
# https://github.com/pandas-dev/pandas/issues/33741
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
result = values.get_indexer(target)
expected = np.array([0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"target, positions",
[
([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
],
)
def test_get_indexer_out_of_bounds_date(self, target, positions):
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
result = values.get_indexer(target)
expected = np.array(positions, dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_pad_requires_monotonicity(self):
rng = date_range("1/1/2000", "3/1/2000", freq="B")
# neither monotonic increasing or decreasing
rng2 = rng[[1, 0, 2]]
msg = "index must be monotonic increasing or decreasing"
with pytest.raises(ValueError, match=msg):
rng2.get_indexer(rng, method="pad")
class TestMaybeCastSliceBound:
def test_maybe_cast_slice_bounds_empty(self):
# GH#14354
empty_idx = date_range(freq="1h", periods=0, end="2015")
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
exp = Timestamp("2015-01-02 23:59:59.999999999")
assert right == exp
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
exp = Timestamp("2015-01-02 00:00:00")
assert left == exp
def test_maybe_cast_slice_duplicate_monotonic(self):
# https://github.com/pandas-dev/pandas/issues/16515
idx = DatetimeIndex(["2017", "2017"])
result = idx._maybe_cast_slice_bound("2017-01-01", "left")
expected = Timestamp("2017-01-01")
assert result == expected
class TestGetSliceBounds:
@pytest.mark.parametrize("box", [date, datetime, Timestamp])
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
def test_get_slice_bounds_datetime_within(
self, box, side, expected, tz_aware_fixture
):
# GH 35690
tz = tz_aware_fixture
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
key = box(year=2000, month=1, day=7)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.get_slice_bound(key, side=side)
else:
result = index.get_slice_bound(key, side=side)
assert result == expected
@pytest.mark.parametrize("box", [datetime, Timestamp])
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
def test_get_slice_bounds_datetime_outside(
self, box, side, year, expected, tz_aware_fixture
):
# GH 35690
tz = tz_aware_fixture
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
key = box(year=year, month=1, day=7)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.get_slice_bound(key, side=side)
else:
result = index.get_slice_bound(key, side=side)
assert result == expected
@pytest.mark.parametrize("box", [datetime, Timestamp])
def test_slice_datetime_locs(self, box, tz_aware_fixture):
# GH 34077
tz = tz_aware_fixture
index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
key = box(2010, 1, 1)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.slice_locs(key, box(2010, 1, 2))
else:
result = index.slice_locs(key, box(2010, 1, 2))
expected = (0, 1)
assert result == expected
class TestIndexerBetweenTime:
def test_indexer_between_time(self):
# GH#11818
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
with pytest.raises(ValueError, match=msg):
rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
@pytest.mark.parametrize("unit", ["us", "ms", "s"])
def test_indexer_between_time_non_nano(self, unit):
# For simple cases like this, the non-nano indexer_between_time
# should match the nano result
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
arr_nano = rng._data._ndarray
arr = arr_nano.astype(f"M8[{unit}]")
dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
dti = DatetimeIndex(dta)
assert dti.dtype == arr.dtype
tic = time(1, 25)
toc = time(2, 29)
result = dti.indexer_between_time(tic, toc)
expected = rng.indexer_between_time(tic, toc)
tm.assert_numpy_array_equal(result, expected)
# case with non-zero micros in arguments
tic = time(1, 25, 0, 45678)
toc = time(2, 29, 0, 1234)
result = dti.indexer_between_time(tic, toc)
expected = rng.indexer_between_time(tic, toc)
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,76 @@
import dateutil.tz
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
date_range,
to_datetime,
)
from pandas.core.arrays import datetimes
class TestDatetimeIndexIteration:
@pytest.mark.parametrize(
"tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
)
def test_iteration_preserves_nanoseconds(self, tz):
# GH#19603
index = DatetimeIndex(
["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
)
for i, ts in enumerate(index):
assert ts == index[i] # pylint: disable=unnecessary-list-index-lookup
def test_iter_readonly(self):
# GH#28055 ints_to_pydatetime with readonly array
arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
arr.setflags(write=False)
dti = to_datetime(arr)
list(dti)
def test_iteration_preserves_tz(self):
# see GH#8890
index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result == expected
def test_iteration_preserves_tz2(self):
index = date_range(
"2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
)
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result._repr_base == expected._repr_base
assert result == expected
def test_iteration_preserves_tz3(self):
# GH#9100
index = DatetimeIndex(
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
)
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result._repr_base == expected._repr_base
assert result == expected
@pytest.mark.parametrize("offset", [-5, -1, 0, 1])
def test_iteration_over_chunksize(self, offset, monkeypatch):
# GH#21012
chunksize = 5
index = date_range(
"2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
)
num = 0
with monkeypatch.context() as m:
m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
for stamp in index:
assert index[num] == stamp
num += 1
assert num == len(index)

View File

@ -0,0 +1,153 @@
from datetime import (
datetime,
timezone,
)
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Timestamp,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm
from pandas.tseries.offsets import (
BDay,
BMonthEnd,
)
class TestJoin:
def test_does_not_convert_mixed_integer(self):
df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
cols = df.columns.join(df.index, how="outer")
joined = cols.join(df.columns)
assert cols.dtype == np.dtype("O")
assert cols.dtype == joined.dtype
tm.assert_numpy_array_equal(cols.values, joined.values)
def test_join_self(self, join_type):
index = date_range("1/1/2000", periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def test_join_with_period_index(self, join_type):
df = DataFrame(
np.ones((10, 2)),
index=date_range("2020-01-01", periods=10),
columns=period_range("2020-01-01", periods=2),
)
s = df.iloc[:5, 0]
expected = df.columns.astype("O").join(s.index, how=join_type)
result = df.columns.join(s.index, how=join_type)
tm.assert_index_equal(expected, result)
def test_join_object_index(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])
result = rng.join(idx, how="outer")
assert isinstance(result[0], Timestamp)
def test_join_utc_convert(self, join_type):
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
left = rng.tz_convert("US/Eastern")
right = rng.tz_convert("Europe/Berlin")
result = left.join(left[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz == left.tz
result = left.join(right[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz is timezone.utc
def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
empty = Index([])
result = dti.union(empty, sort=sort)
if using_infer_string:
assert isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, dti)
else:
expected = dti.astype("O")
tm.assert_index_equal(result, expected)
result = dti.join(empty)
assert isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, dti)
def test_join_nonunique(self):
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
rs = idx1.join(idx2, how="outer")
assert rs.is_monotonic_increasing
@pytest.mark.parametrize("freq", ["B", "C"])
def test_outer_join(self, freq):
# should just behave as union
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
rng = date_range(start=start, end=end, freq=freq)
# overlapping
left = rng[:10]
right = rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
other = date_range(start, end, freq=BMonthEnd())
the_join = rng.join(other, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
def test_naive_aware_conflicts(self):
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
naive = date_range(start, end, freq=BDay(), tz=None)
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
msg = "tz-naive.*tz-aware"
with pytest.raises(TypeError, match=msg):
naive.join(aware)
with pytest.raises(TypeError, match=msg):
aware.join(naive)
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_join_preserves_freq(self, tz):
# GH#32157
dti = date_range("2016-01-01", periods=10, tz=tz)
result = dti[:5].join(dti[5:], how="outer")
assert result.freq == dti.freq
tm.assert_index_equal(result, dti)
result = dti[:5].join(dti[6:], how="outer")
assert result.freq is None
expected = dti.delete(5)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,13 @@
import numpy as np
from pandas import date_range
import pandas._testing as tm
class TestSplit:
def test_split_non_utc(self):
# GH#14042
indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
result = np.split(indices, indices_or_sections=[])[0]
expected = indices._with_freq(None)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,56 @@
from datetime import datetime
import pytest
from pandas import (
DatetimeIndex,
Index,
bdate_range,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexOps:
def test_infer_freq(self, freq_sample):
# GH 11018
idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
result = DatetimeIndex(idx.asi8, freq="infer")
tm.assert_index_equal(idx, result)
assert result.freq == freq_sample
@pytest.mark.parametrize("freq", ["B", "C"])
class TestBusinessDatetimeIndex:
@pytest.fixture
def rng(self, freq):
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
return bdate_range(START, END, freq=freq)
def test_comparison(self, rng):
d = rng[10]
comp = rng > d
assert comp[11]
assert not comp[9]
def test_copy(self, rng):
cp = rng.copy()
tm.assert_index_equal(cp, rng)
def test_identical(self, rng):
t1 = rng.copy()
t2 = rng.copy()
assert t1.identical(t2)
# name
t1 = t1.rename("foo")
assert t1.equals(t2)
assert not t1.identical(t2)
t2 = t2.rename("foo")
assert t1.identical(t2)
# freq
t2v = Index(t2.values)
assert t1.equals(t2v)
assert not t1.identical(t2v)

View File

@ -0,0 +1,466 @@
""" test partial slicing on Series/Frame """
from datetime import datetime
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSlicing:
def test_string_index_series_name_converted(self):
# GH#1644
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
index=date_range("1/1/2000", periods=10),
)
result = df.loc["1/3/2000"]
assert result.name == df.index[2]
result = df.T["1/3/2000"]
assert result.name == df.index[2]
def test_stringified_slice_with_tz(self):
# GH#2658
start = "2013-01-07"
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
df = DataFrame(np.arange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
def test_return_type_doesnt_depend_on_monotonicity(self):
# GH#24892 we get Series back regardless of whether our DTI is monotonic
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
ser = Series(range(3), index=dti)
# non-monotonic index
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
# key with resolution strictly lower than "min"
key = "2015-5-14 00"
# monotonic increasing index
result = ser.loc[key]
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
# monotonic decreasing index
result = ser.iloc[::-1].loc[key]
expected = ser.iloc[::-1][:-1]
tm.assert_series_equal(result, expected)
# non-monotonic index
result2 = ser2.loc[key]
expected2 = ser2.iloc[::2]
tm.assert_series_equal(result2, expected2)
def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
# GH#24892 we get Series back regardless of whether our DTI is monotonic
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
ser = Series(range(3), index=dti)
# non-monotonic index
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
# key with resolution strictly *higher) than "min"
key = "2015-5-14 00:00:00"
# monotonic increasing index
result = ser.loc[key]
assert result == 1
# monotonic decreasing index
result = ser.iloc[::-1].loc[key]
assert result == 1
# non-monotonic index
result2 = ser2.loc[key]
assert result2 == 0
def test_monotone_DTI_indexing_bug(self):
# GH 19362
# Testing accessing the first element in a monotonic descending
# partial string indexing.
df = DataFrame(list(range(5)))
date_list = [
"2018-01-02",
"2017-02-10",
"2016-03-10",
"2015-03-15",
"2014-03-16",
]
date_index = DatetimeIndex(date_list)
df["date"] = date_index
expected = DataFrame({0: list(range(5)), "date": date_index})
tm.assert_frame_equal(df, expected)
# We get a slice because df.index's resolution is hourly and we
# are slicing with a daily-resolution string. If both were daily,
# we would get a single item back
dti = date_range("20170101 01:00:00", periods=3)
df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
result = df.loc["2017-01-03"]
tm.assert_frame_equal(result, expected)
result2 = df.iloc[::-1].loc["2017-01-03"]
expected2 = expected.iloc[::-1]
tm.assert_frame_equal(result2, expected2)
def test_slice_year(self):
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
result = s["2005"]
expected = s[s.index.year == 2005]
tm.assert_series_equal(result, expected)
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
result = df.loc["2005"]
expected = df[df.index.year == 2005]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"partial_dtime",
[
"2019",
"2019Q4",
"Dec 2019",
"2019-12-31",
"2019-12-31 23",
"2019-12-31 23:59",
],
)
def test_slice_end_of_period_resolution(self, partial_dtime):
# GH#31064
dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
ser = Series(range(10), index=dti)
result = ser[partial_dtime]
expected = ser.iloc[:5]
tm.assert_series_equal(result, expected)
def test_slice_quarter(self):
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2001Q1"]) == 90
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
assert len(df.loc["1Q01"]) == 90
def test_slice_month(self):
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2005-11"]) == 30
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
assert len(df.loc["2005-11"]) == 30
tm.assert_series_equal(s["2005-11"], s["11-2005"])
def test_partial_slice(self):
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-05":"2006-02"]
expected = s["20050501":"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-05":]
expected = s["20050501":]
tm.assert_series_equal(result, expected)
result = s[:"2006-02"]
expected = s[:"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-1-1"]
assert result == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
s["2004-12-31"]
def test_partial_slice_daily(self):
rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-31"]
tm.assert_series_equal(result, s.iloc[:24])
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
s["2004-12-31 00"]
def test_partial_slice_hourly(self):
rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[: 60 * 4])
result = s["2005-1-1 20"]
tm.assert_series_equal(result, s.iloc[:60])
assert s["2005-1-1 20:00"] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
s["2004-12-31 00:15"]
def test_partial_slice_minutely(self):
rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1 23:59"]
tm.assert_series_equal(result, s.iloc[:60])
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[:60])
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
s["2004-12-31 00:00:00"]
def test_partial_slice_second_precision(self):
rng = date_range(
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
periods=20,
freq="us",
)
s = Series(np.arange(20), rng)
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
s["2005-1-1 00:00:00"]
def test_partial_slicing_dataframe(self):
# GH14856
# Test various combinations of string slicing resolution vs.
# index resolution
# - If string resolution is less precise than index resolution,
# string is considered a slice
# - If string resolution is equal to or more precise than index
# resolution, string is considered an exact match
formats = [
"%Y",
"%Y-%m",
"%Y-%m-%d",
"%Y-%m-%d %H",
"%Y-%m-%d %H:%M",
"%Y-%m-%d %H:%M:%S",
]
resolutions = ["year", "month", "day", "hour", "minute", "second"]
for rnum, resolution in enumerate(resolutions[2:], 2):
# we check only 'day', 'hour', 'minute' and 'second'
unit = Timedelta("1 " + resolution)
middate = datetime(2012, 1, 1, 0, 0, 0)
index = DatetimeIndex([middate - unit, middate, middate + unit])
values = [1, 2, 3]
df = DataFrame({"a": values}, index, dtype=np.int64)
assert df.index.resolution == resolution
# Timestamp with the same resolution as index
# Should be exact match for Series (return scalar)
# and raise KeyError for Frame
for timestamp, expected in zip(index, values):
ts_string = timestamp.strftime(formats[rnum])
# make ts_string as precise as index
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == expected
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Timestamp with resolution less precise than index
for fmt in formats[:rnum]:
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
ts_string = index[element].strftime(fmt)
# Series should return slice
result = df["a"][ts_string]
expected = df["a"][theslice]
tm.assert_series_equal(result, expected)
# pre-2.0 df[ts_string] was overloaded to interpret this
# as slicing along index
with pytest.raises(KeyError, match=ts_string):
df[ts_string]
# Timestamp with resolution more precise than index
# Compatible with existing key
# Should return scalar for Series
# and raise KeyError for Frame
for fmt in formats[rnum + 1 :]:
ts_string = index[1].strftime(fmt)
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == 2
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Not compatible with existing key
# Should raise KeyError
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
ts = index[1] + Timedelta("1 " + res)
ts_string = ts.strftime(fmt)
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df["a"][ts_string]
with pytest.raises(KeyError, match=msg):
df[ts_string]
def test_partial_slicing_with_multiindex(self):
# GH 4758
# partial string indexing with a multi-index buggy
df = DataFrame(
{
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
"val": [1, 2, 3, 4],
},
index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
)
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
expected = DataFrame(
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
)
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
tm.assert_frame_equal(result, expected)
expected = df_multi.loc[
(Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
]
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
tm.assert_series_equal(result, expected)
# partial string indexing on first level, scalar indexing on the other two
result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
expected = df_multi.iloc[:1].droplevel([1, 2])
tm.assert_frame_equal(result, expected)
def test_partial_slicing_with_multiindex_series(self):
# GH 4294
# partial slice on a series mi
ser = Series(
range(250),
index=MultiIndex.from_product(
[date_range("2000-1-1", periods=50), range(5)]
),
)
s2 = ser[:-1].copy()
expected = s2["2000-1-4"]
result = s2[Timestamp("2000-1-4")]
tm.assert_series_equal(result, expected)
result = ser[Timestamp("2000-1-4")]
expected = ser["2000-1-4"]
tm.assert_series_equal(result, expected)
df2 = DataFrame(ser)
expected = df2.xs("2000-1-4")
result = df2.loc[Timestamp("2000-1-4")]
tm.assert_frame_equal(result, expected)
def test_partial_slice_requires_monotonicity(self):
# Disallowed since 2.0 (GH 37819)
ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
nonmonotonic = ser.iloc[[3, 5, 4]]
timestamp = Timestamp("2014-01-10")
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
nonmonotonic["2014-01-10":]
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic[timestamp:]
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
nonmonotonic.loc["2014-01-10":]
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic.loc[timestamp:]
def test_loc_datetime_length_one(self):
# GH16071
df = DataFrame(
columns=["1"],
index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
)
result = df.loc[datetime(2016, 10, 1) :]
tm.assert_frame_equal(result, df)
result = df.loc["2016-10-01T00:00:00":]
tm.assert_frame_equal(result, df)
@pytest.mark.parametrize(
"start",
[
"2018-12-02 21:50:00+00:00",
Timestamp("2018-12-02 21:50:00+00:00"),
Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
],
)
@pytest.mark.parametrize(
"end",
[
"2018-12-02 21:52:00+00:00",
Timestamp("2018-12-02 21:52:00+00:00"),
Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
],
)
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
# GH 24076
idx = date_range(
start="2018-12-02 14:50:00-07:00",
end="2018-12-02 14:50:00-07:00",
freq="1min",
)
df = DataFrame(1, index=idx, columns=["A"])
result = df[start:end]
expected = df.iloc[0:3, :]
tm.assert_frame_equal(result, expected)
# GH 16785
start = str(start)
end = str(end)
with pytest.raises(ValueError, match="Both dates must"):
df[start : end[:-4] + "1:00"]
with pytest.raises(ValueError, match="The index must be timezone"):
df = df.tz_localize(None)
df[start:end]
def test_slice_reduce_to_series(self):
# GH 27516
df = DataFrame(
{"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
)
expected = Series(
range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,45 @@
import pytest
from pandas import (
NaT,
date_range,
to_datetime,
)
import pandas._testing as tm
class TestPickle:
def test_pickle(self):
# GH#4606
idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
idx_p = tm.round_trip_pickle(idx)
assert idx_p[0] == idx[0]
assert idx_p[1] is NaT
assert idx_p[2] == idx[2]
def test_pickle_dont_infer_freq(self):
# GH#11002
# don't infer freq
idx = date_range("1750-1-1", "2050-1-1", freq="7D")
idx_p = tm.round_trip_pickle(idx)
tm.assert_index_equal(idx, idx_p)
def test_pickle_after_set_freq(self):
dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
dti = dti._with_freq(None)
res = tm.round_trip_pickle(dti)
tm.assert_index_equal(res, dti)
def test_roundtrip_pickle_with_tz(self):
# GH#8367
# round-trip of timezone
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
unpickled = tm.round_trip_pickle(index)
tm.assert_index_equal(index, unpickled)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_pickle_unpickle(self, freq):
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
unpickled = tm.round_trip_pickle(rng)
assert unpickled.freq == freq

View File

@ -0,0 +1,56 @@
from datetime import timedelta
import numpy as np
from pandas import (
DatetimeIndex,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexReindex:
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
# GH#7774
index = date_range("2013-01-01", periods=3, tz="US/Eastern")
assert str(index.reindex([])[0].tz) == "US/Eastern"
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
def test_reindex_with_same_tz_nearest(self):
# GH#32740
rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
result1, result2 = rng_a.reindex(
rng_b, method="nearest", tolerance=timedelta(seconds=20)
)
expected_list1 = [
"2010-01-01 00:00:00",
"2010-01-01 01:05:27.272727272",
"2010-01-01 02:10:54.545454545",
"2010-01-01 03:16:21.818181818",
"2010-01-01 04:21:49.090909090",
"2010-01-01 05:27:16.363636363",
"2010-01-01 06:32:43.636363636",
"2010-01-01 07:38:10.909090909",
"2010-01-01 08:43:38.181818181",
"2010-01-01 09:49:05.454545454",
"2010-01-01 10:54:32.727272727",
"2010-01-01 12:00:00",
"2010-01-01 13:05:27.272727272",
"2010-01-01 14:10:54.545454545",
"2010-01-01 15:16:21.818181818",
"2010-01-01 16:21:49.090909090",
"2010-01-01 17:27:16.363636363",
"2010-01-01 18:32:43.636363636",
"2010-01-01 19:38:10.909090909",
"2010-01-01 20:43:38.181818181",
"2010-01-01 21:49:05.454545454",
"2010-01-01 22:54:32.727272727",
"2010-01-02 00:00:00",
]
expected1 = DatetimeIndex(
expected_list1, dtype="datetime64[ns, UTC]", freq=None
)
expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
tm.assert_index_equal(result1, expected1)
tm.assert_numpy_array_equal(result2, expected2)

View File

@ -0,0 +1,329 @@
"""
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
"""
import calendar
from datetime import (
date,
datetime,
time,
)
import locale
import unicodedata
import numpy as np
import pytest
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestDatetimeIndexOps:
def test_dti_no_millisecond_field(self):
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex.millisecond
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex([]).millisecond
def test_dti_time(self):
rng = date_range("1/1/2000", freq="12min", periods=10)
result = Index(rng).time
expected = [t.time() for t in rng]
assert (result == expected).all()
def test_dti_date(self):
rng = date_range("1/1/2000", freq="12h", periods=10)
result = Index(rng).date
expected = [t.date() for t in rng]
assert (result == expected).all()
@pytest.mark.parametrize(
"dtype",
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
)
def test_dti_date2(self, dtype):
# Regression test for GH#21230
expected = np.array([date(2018, 6, 4), NaT])
index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
result = index.date
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
)
def test_dti_time2(self, dtype):
# Regression test for GH#21267
expected = np.array([time(10, 20, 30), NaT])
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
result = index.time
tm.assert_numpy_array_equal(result, expected)
def test_dti_timetz(self, tz_naive_fixture):
# GH#21358
tz = timezones.maybe_get_tz(tz_naive_fixture)
expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
result = index.timetz
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"field",
[
"dayofweek",
"day_of_week",
"dayofyear",
"day_of_year",
"quarter",
"days_in_month",
"is_month_start",
"is_month_end",
"is_quarter_start",
"is_quarter_end",
"is_year_start",
"is_year_end",
],
)
def test_dti_timestamp_fields(self, field):
# extra fields from DatetimeIndex like quarter and week
idx = date_range("2020-01-01", periods=10)
expected = getattr(idx, field)[-1]
result = getattr(Timestamp(idx[-1]), field)
assert result == expected
def test_dti_nanosecond(self):
dti = DatetimeIndex(np.arange(10))
expected = Index(np.arange(10, dtype=np.int32))
tm.assert_index_equal(dti.nanosecond, expected)
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_hour_tzaware(self, prefix):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
assert (rng.hour == 0).all()
# a more unusual time zone, GH#1946
dr = date_range(
"2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
)
expected = Index(np.arange(10, dtype=np.int32))
tm.assert_index_equal(dr.hour, expected)
# GH#12806
# error: Unsupported operand types for + ("List[None]" and "List[str]")
@pytest.mark.parametrize(
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
)
def test_day_name_month_name(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
# GH#11128
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert dti.day_name(locale=time_locale)[day] == name
assert dti.day_name(locale=None)[day] == eng_name
ts = Timestamp(datetime(2016, 4, day))
assert ts.day_name(locale=time_locale) == name
dti = dti.append(DatetimeIndex([NaT]))
assert np.isnan(dti.day_name(locale=time_locale)[-1])
ts = Timestamp(NaT)
assert np.isnan(ts.day_name(locale=time_locale))
# GH#12805
dti = date_range(freq="ME", start="2012", end="2013")
result = dti.month_name(locale=time_locale)
expected = Index([month.capitalize() for month in expected_months])
# work around different normalization schemes GH#22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_index_equal(result, expected)
for item, expected in zip(dti, expected_months):
result = item.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", result)
assert result == expected
dti = dti.append(DatetimeIndex([NaT]))
assert np.isnan(dti.month_name(locale=time_locale)[-1])
def test_dti_week(self):
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
expected = [52, 1, 1]
assert dates.isocalendar().week.tolist() == expected
assert [d.weekofyear for d in dates] == expected
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_dti_fields(self, tz):
# GH#13303
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
assert dti.year[0] == 1998
assert dti.month[0] == 1
assert dti.day[0] == 1
assert dti.hour[0] == 0
assert dti.minute[0] == 0
assert dti.second[0] == 0
assert dti.microsecond[0] == 0
assert dti.dayofweek[0] == 3
assert dti.dayofyear[0] == 1
assert dti.dayofyear[120] == 121
assert dti.isocalendar().week.iloc[0] == 1
assert dti.isocalendar().week.iloc[120] == 18
assert dti.quarter[0] == 1
assert dti.quarter[120] == 2
assert dti.days_in_month[0] == 31
assert dti.days_in_month[90] == 30
assert dti.is_month_start[0]
assert not dti.is_month_start[1]
assert dti.is_month_start[31]
assert dti.is_quarter_start[0]
assert dti.is_quarter_start[90]
assert dti.is_year_start[0]
assert not dti.is_year_start[364]
assert not dti.is_month_end[0]
assert dti.is_month_end[30]
assert not dti.is_month_end[31]
assert dti.is_month_end[364]
assert not dti.is_quarter_end[0]
assert not dti.is_quarter_end[30]
assert dti.is_quarter_end[89]
assert dti.is_quarter_end[364]
assert not dti.is_year_end[0]
assert dti.is_year_end[364]
assert len(dti.year) == 365
assert len(dti.month) == 365
assert len(dti.day) == 365
assert len(dti.hour) == 365
assert len(dti.minute) == 365
assert len(dti.second) == 365
assert len(dti.microsecond) == 365
assert len(dti.dayofweek) == 365
assert len(dti.dayofyear) == 365
assert len(dti.isocalendar()) == 365
assert len(dti.quarter) == 365
assert len(dti.is_month_start) == 365
assert len(dti.is_month_end) == 365
assert len(dti.is_quarter_start) == 365
assert len(dti.is_quarter_end) == 365
assert len(dti.is_year_start) == 365
assert len(dti.is_year_end) == 365
dti.name = "name"
# non boolean accessors -> return Index
for accessor in DatetimeArray._field_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, Index)
assert res.name == "name"
# boolean accessors -> return array
for accessor in DatetimeArray._bool_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, np.ndarray)
# test boolean indexing
res = dti[dti.is_quarter_start]
exp = dti[[0, 90, 181, 273]]
tm.assert_index_equal(res, exp)
res = dti[dti.is_leap_year]
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
tm.assert_index_equal(res, exp)
def test_dti_is_year_quarter_start(self):
dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
assert sum(dti.is_quarter_start) == 0
assert sum(dti.is_quarter_end) == 4
assert sum(dti.is_year_start) == 0
assert sum(dti.is_year_end) == 1
def test_dti_is_month_start(self):
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert dti.is_month_start[0] == 1
def test_dti_is_month_start_custom(self):
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
msg = "Custom business days is not supported by is_month_start"
with pytest.raises(ValueError, match=msg):
dti.is_month_start

View File

@ -0,0 +1,666 @@
from datetime import (
datetime,
timezone,
)
import numpy as np
import pytest
import pytz
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
bdate_range,
date_range,
)
import pandas._testing as tm
from pandas.tseries.offsets import (
BMonthEnd,
Minute,
MonthEnd,
)
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexSetOps:
tz = [
None,
"UTC",
"Asia/Tokyo",
"US/Eastern",
"dateutil/Asia/Singapore",
"dateutil/US/Pacific",
]
# TODO: moved from test_datetimelike; dedup with version below
def test_union2(self, sort):
everything = date_range("2020-01-01", periods=10)
first = everything[:5]
second = everything[5:]
union = first.union(second, sort=sort)
tm.assert_index_equal(union, everything)
@pytest.mark.parametrize("box", [np.array, Series, list])
def test_union3(self, sort, box):
everything = date_range("2020-01-01", periods=10)
first = everything[:5]
second = everything[5:]
# GH 10149 support listlike inputs other than Index objects
expected = first.union(second, sort=sort)
case = box(second.values)
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", tz)
def test_union(self, tz, sort):
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other3 = DatetimeIndex([], tz=tz).as_unit("ns")
expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
expected3_notsorted = rng3
for rng, other, exp, exp_notsorted in [
(rng1, other1, expected1, expected1_notsorted),
(rng2, other2, expected2, expected2_notsorted),
(rng3, other3, expected3, expected3_notsorted),
]:
result_union = rng.union(other, sort=sort)
tm.assert_index_equal(result_union, exp)
result_union = other.union(rng, sort=sort)
if sort is None:
tm.assert_index_equal(result_union, exp)
else:
tm.assert_index_equal(result_union, exp_notsorted)
def test_union_coverage(self, sort):
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
result = ordered.union(idx, sort=sort)
tm.assert_index_equal(result, ordered)
result = ordered[:0].union(ordered, sort=sort)
tm.assert_index_equal(result, ordered)
assert result.freq == ordered.freq
def test_union_bug_1730(self, sort):
rng_a = date_range("1/1/2012", periods=4, freq="3h")
rng_b = date_range("1/1/2012", periods=4, freq="4h")
result = rng_a.union(rng_b, sort=sort)
exp = list(rng_a) + list(rng_b[1:])
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
def test_union_bug_1745(self, sort):
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
right = DatetimeIndex(
[
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
result = left.union(right, sort=sort)
exp = DatetimeIndex(
[
"2012-05-11 15:19:49.695000",
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
if sort is None:
exp = exp.sort_values()
tm.assert_index_equal(result, exp)
def test_union_bug_4564(self, sort):
from pandas import DateOffset
left = date_range("2013-01-01", "2013-02-01")
right = left + DateOffset(minutes=15)
result = left.union(right, sort=sort)
exp = list(left) + list(right)
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
def test_union_freq_both_none(self, sort):
# GH11086
expected = bdate_range("20150101", periods=10)
expected._data.freq = None
result = expected.union(expected, sort=sort)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_union_freq_infer(self):
# When taking the union of two DatetimeIndexes, we infer
# a freq even if the arguments don't have freq. This matches
# TimedeltaIndex behavior.
dti = date_range("2016-01-01", periods=5)
left = dti[[0, 1, 3, 4]]
right = dti[[2, 3, 1]]
assert left.freq is None
assert right.freq is None
result = left.union(right)
tm.assert_index_equal(result, dti)
assert result.freq == "D"
def test_union_dataframe_index(self):
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
df = DataFrame({"s1": s1, "s2": s2})
exp = date_range("1/1/1980", "1/1/2012", freq="MS")
tm.assert_index_equal(df.index, exp)
def test_union_with_DatetimeIndex(self, sort):
i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
# Works
i1.union(i2, sort=sort)
# Fails with "AttributeError: can't set attribute"
i2.union(i1, sort=sort)
def test_union_same_timezone_different_units(self):
# GH 55238
idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
result = idx1.union(idx2)
expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
tm.assert_index_equal(result, expected)
# TODO: moved from test_datetimelike; de-duplicate with version below
def test_intersection2(self):
first = date_range("2020-01-01", periods=10)
second = first[5:]
intersect = first.intersection(second)
tm.assert_index_equal(intersect, second)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case)
tm.assert_index_equal(result, second)
third = Index(["a", "b", "c"])
result = first.intersection(third)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
)
def test_intersection(self, tz, sort):
# GH 4690 (with tz)
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
# if target has the same name, it is preserved
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
# if target name is different, it will be reset
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
for rng, expected in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
# non-monotonic
base = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
).as_unit("ns")
rng2 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
).as_unit("ns")
expected2 = DatetimeIndex(
["2011-01-04", "2011-01-02"], tz=tz, name="idx"
).as_unit("ns")
rng3 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
tz=tz,
name="other",
).as_unit("ns")
expected3 = DatetimeIndex(
["2011-01-04", "2011-01-02"], tz=tz, name=None
).as_unit("ns")
# GH 7880
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
assert expected4.freq is None
for rng, expected in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
# parametrize over both anchored and non-anchored freqs, as they
# have different code paths
@pytest.mark.parametrize("freq", ["min", "B"])
def test_intersection_empty(self, tz_aware_fixture, freq):
# empty same freq GH2129
tz = tz_aware_fixture
rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
result = rng[0:0].intersection(rng)
assert len(result) == 0
assert result.freq == rng.freq
result = rng.intersection(rng[0:0])
assert len(result) == 0
assert result.freq == rng.freq
# no overlap GH#33604
check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
result = rng[:3].intersection(rng[-3:])
tm.assert_index_equal(result, rng[:0])
if check_freq:
# We don't preserve freq on non-anchored offsets
assert result.freq == rng.freq
# swapped left and right
result = rng[-3:].intersection(rng[:3])
tm.assert_index_equal(result, rng[:0])
if check_freq:
# We don't preserve freq on non-anchored offsets
assert result.freq == rng.freq
def test_intersection_bug_1708(self):
from pandas import DateOffset
index_1 = date_range("1/1/2012", periods=4, freq="12h")
index_2 = index_1 + DateOffset(hours=1)
result = index_1.intersection(index_2)
assert len(result) == 0
@pytest.mark.parametrize("tz", tz)
def test_difference(self, tz, sort):
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
rng1 = DatetimeIndex(rng_dates, tz=tz)
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = DatetimeIndex(rng_dates, tz=tz)
rng2 = DatetimeIndex(rng_dates, tz=tz)
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
rng3 = DatetimeIndex(rng_dates, tz=tz)
other3 = DatetimeIndex([], tz=tz)
expected3 = DatetimeIndex(rng_dates, tz=tz)
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
]:
result_diff = rng.difference(other, sort)
if sort is None and len(other):
# We dont sort (yet?) when empty GH#24959
expected = expected.sort_values()
tm.assert_index_equal(result_diff, expected)
def test_difference_freq(self, sort):
# GH14323: difference of DatetimeIndex should not preserve frequency
index = date_range("20160920", "20160925", freq="D")
other = date_range("20160921", "20160924", freq="D")
expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
# preserve frequency when the difference is a contiguous
# subset of the original range
other = date_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
def test_datetimeindex_diff(self, sort):
dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
assert len(dti1.difference(dti2, sort)) == 2
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
def test_setops_preserve_freq(self, tz):
rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
result = rng[:50].union(rng[50:100])
assert result.name == rng.name
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].union(rng[30:100])
assert result.name == rng.name
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].union(rng[60:100])
assert result.name == rng.name
assert result.freq is None
assert result.tz == rng.tz
result = rng[:50].intersection(rng[25:75])
assert result.name == rng.name
assert result.freqstr == "D"
assert result.tz == rng.tz
nofreq = DatetimeIndex(list(rng[25:75]), name="other")
result = rng[:50].union(nofreq)
assert result.name is None
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].intersection(nofreq)
assert result.name is None
assert result.freq == rng.freq
assert result.tz == rng.tz
def test_intersection_non_tick_no_fastpath(self):
# GH#42104
dti = DatetimeIndex(
[
"2018-12-31",
"2019-03-31",
"2019-06-30",
"2019-09-30",
"2019-12-31",
"2020-03-31",
],
freq="QE-DEC",
)
result = dti[::2].intersection(dti[1::2])
expected = dti[:0]
tm.assert_index_equal(result, expected)
def test_dti_intersection(self):
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
left = rng[10:90][::-1]
right = rng[20:80][::-1]
assert left.tz == rng.tz
result = left.intersection(right)
assert result.tz == left.tz
# Note: not difference, as there is no symmetry requirement there
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
def test_dti_setop_aware(self, setop):
# non-overlapping
# GH#39328 as of 2.0 we cast these to UTC instead of object
rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
result = getattr(rng, setop)(rng2)
left = rng.tz_convert("UTC")
right = rng2.tz_convert("UTC")
expected = getattr(left, setop)(right)
tm.assert_index_equal(result, expected)
assert result.tz == left.tz
if len(result):
assert result[0].tz is timezone.utc
assert result[-1].tz is timezone.utc
def test_dti_union_mixed(self):
# GH#21671
rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
result = rng.union(rng2)
expected = Index(
[
Timestamp("2011-01-01"),
pd.NaT,
Timestamp("2012-01-01", tz="Asia/Tokyo"),
Timestamp("2012-01-02", tz="Asia/Tokyo"),
],
dtype=object,
)
tm.assert_index_equal(result, expected)
class TestBusinessDatetimeIndex:
def test_union(self, sort):
rng = bdate_range(START, END)
# overlapping
left = rng[:10]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
else:
expected = DatetimeIndex(list(right) + list(left))
tm.assert_index_equal(right.union(left, sort=sort), expected)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_union_not_cacheable(self, sort):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_union = rng1.union(rng2, sort=sort)
if sort is None:
tm.assert_index_equal(the_union, rng)
else:
expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
tm.assert_index_equal(the_union, expected)
rng1 = rng[10:]
rng2 = rng[15:35]
the_union = rng1.union(rng2, sort=sort)
expected = rng[10:]
tm.assert_index_equal(the_union, expected)
def test_intersection(self):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_int = rng1.intersection(rng2)
expected = rng[10:25]
tm.assert_index_equal(the_int, expected)
assert isinstance(the_int, DatetimeIndex)
assert the_int.freq == rng.freq
the_int = rng1.intersection(rng2)
tm.assert_index_equal(the_int, expected)
# non-overlapping
the_int = rng[:10].intersection(rng[10:])
expected = DatetimeIndex([]).as_unit("ns")
tm.assert_index_equal(the_int, expected)
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011")
b = bdate_range("12/10/2011", "12/20/2011")
result = a.intersection(b)
tm.assert_index_equal(result, b)
assert result.freq == b.freq
def test_intersection_list(self):
# GH#35876
# values is not an Index -> no name -> retain "a"
values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
idx = DatetimeIndex(values, name="a")
res = idx.intersection(values)
tm.assert_index_equal(res, idx)
def test_month_range_union_tz_pytz(self, sort):
tz = pytz.timezone("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
@td.skip_if_windows
def test_month_range_union_tz_dateutil(self, sort):
from pandas._libs.tslibs.timezones import dateutil_gettz
tz = dateutil_gettz("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
@pytest.mark.parametrize("sort", [False, None])
def test_intersection_duplicates(self, sort):
# GH#38196
idx1 = Index(
[
Timestamp("2019-12-13"),
Timestamp("2019-12-12"),
Timestamp("2019-12-12"),
]
)
result = idx1.intersection(idx1, sort=sort)
expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
tm.assert_index_equal(result, expected)
class TestCustomDatetimeIndex:
def test_union(self, sort):
# overlapping
rng = bdate_range(START, END, freq="C")
left = rng[:10]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_union = left.union(right, sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
result = a.intersection(b)
tm.assert_index_equal(result, b)
assert result.freq == b.freq
@pytest.mark.parametrize(
"tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
)
def test_intersection_dst_transition(self, tz):
# GH 46702: Europe/Berlin has DST transition
idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
result = idx1.intersection(idx2)
expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
tm.assert_index_equal(result, expected)
# GH#45863 same problem for union
index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
result = index1.union(index2)
expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,251 @@
"""
Tests for DatetimeIndex timezone-related methods
"""
from datetime import (
datetime,
timedelta,
timezone,
tzinfo,
)
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import (
conversion,
timezones,
)
import pandas as pd
from pandas import (
DatetimeIndex,
Timestamp,
bdate_range,
date_range,
isna,
to_datetime,
)
import pandas._testing as tm
class FixedOffset(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name) -> None:
self.__offset = timedelta(minutes=offset)
self.__name = name
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return timedelta(0)
fixed_off_no_name = FixedOffset(-330, None)
class TestDatetimeIndexTimezones:
# -------------------------------------------------------------
# Unsorted
def test_dti_drop_dont_lose_tz(self):
# GH#2621
ind = date_range("2012-12-01", periods=10, tz="utc")
ind = ind.drop(ind[-1])
assert ind.tz is not None
def test_dti_tz_conversion_freq(self, tz_naive_fixture):
# GH25241
t3 = DatetimeIndex(["2019-01-01 10:00"], freq="h")
assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq
t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="min")
assert t4.tz_convert(tz="UTC").freq == t4.freq
def test_drop_dst_boundary(self):
# see gh-18031
tz = "Europe/Brussels"
freq = "15min"
start = Timestamp("201710290100", tz=tz)
end = Timestamp("201710290300", tz=tz)
index = date_range(start=start, end=end, freq=freq)
expected = DatetimeIndex(
[
"201710290115",
"201710290130",
"201710290145",
"201710290200",
"201710290215",
"201710290230",
"201710290245",
"201710290200",
"201710290215",
"201710290230",
"201710290245",
"201710290300",
],
dtype="M8[ns, Europe/Brussels]",
freq=freq,
ambiguous=[
True,
True,
True,
True,
True,
True,
True,
False,
False,
False,
False,
False,
],
)
result = index.drop(index[0])
tm.assert_index_equal(result, expected)
def test_date_range_localize(self, unit):
rng = date_range(
"3/11/2012 03:00", periods=15, freq="h", tz="US/Eastern", unit=unit
)
rng2 = DatetimeIndex(
["3/11/2012 03:00", "3/11/2012 04:00"], dtype=f"M8[{unit}, US/Eastern]"
)
rng3 = date_range("3/11/2012 03:00", periods=15, freq="h", unit=unit)
rng3 = rng3.tz_localize("US/Eastern")
tm.assert_index_equal(rng._with_freq(None), rng3)
# DST transition time
val = rng[0]
exp = Timestamp("3/11/2012 03:00", tz="US/Eastern")
assert val.hour == 3
assert exp.hour == 3
assert val == exp # same UTC value
tm.assert_index_equal(rng[:2], rng2)
def test_date_range_localize2(self, unit):
# Right before the DST transition
rng = date_range(
"3/11/2012 00:00", periods=2, freq="h", tz="US/Eastern", unit=unit
)
rng2 = DatetimeIndex(
["3/11/2012 00:00", "3/11/2012 01:00"],
dtype=f"M8[{unit}, US/Eastern]",
freq="h",
)
tm.assert_index_equal(rng, rng2)
exp = Timestamp("3/11/2012 00:00", tz="US/Eastern")
assert exp.hour == 0
assert rng[0] == exp
exp = Timestamp("3/11/2012 01:00", tz="US/Eastern")
assert exp.hour == 1
assert rng[1] == exp
rng = date_range(
"3/11/2012 00:00", periods=10, freq="h", tz="US/Eastern", unit=unit
)
assert rng[2].hour == 3
def test_timestamp_equality_different_timezones(self):
utc_range = date_range("1/1/2000", periods=20, tz="UTC")
eastern_range = utc_range.tz_convert("US/Eastern")
berlin_range = utc_range.tz_convert("Europe/Berlin")
for a, b, c in zip(utc_range, eastern_range, berlin_range):
assert a == b
assert b == c
assert a == c
assert (utc_range == eastern_range).all()
assert (utc_range == berlin_range).all()
assert (berlin_range == eastern_range).all()
def test_dti_equals_with_tz(self):
left = date_range("1/1/2011", periods=100, freq="h", tz="utc")
right = date_range("1/1/2011", periods=100, freq="h", tz="US/Eastern")
assert not left.equals(right)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_nat(self, tzstr):
idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT])
assert isna(idx[1])
assert idx[0].tzinfo is not None
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_utc_box_timestamp_and_localize(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tzstr)
expected = rng[-1].astimezone(tz)
stamp = rng_eastern[-1]
assert stamp == expected
assert stamp.tzinfo == expected.tzinfo
# right tzinfo
rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tzstr)
# test not valid for dateutil timezones.
# assert 'EDT' in repr(rng_eastern[0].tzinfo)
assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr(
rng_eastern[0].tzinfo
)
@pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")])
def test_with_tz(self, tz):
# just want it to work
start = datetime(2011, 3, 12, tzinfo=pytz.utc)
dr = bdate_range(start, periods=50, freq=pd.offsets.Hour())
assert dr.tz is pytz.utc
# DateRange with naive datetimes
dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)
dr = bdate_range("1/1/2005", "1/1/2009", tz=tz)
# normalized
central = dr.tz_convert(tz)
assert central.tz is tz
naive = central[0].to_pydatetime().replace(tzinfo=None)
comp = conversion.localize_pydatetime(naive, tz).tzinfo
assert central[0].tz is comp
# compare vs a localized tz
naive = dr[0].to_pydatetime().replace(tzinfo=None)
comp = conversion.localize_pydatetime(naive, tz).tzinfo
assert central[0].tz is comp
# datetimes with tzinfo set
dr = bdate_range(
datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc)
)
msg = "Start and end cannot both be tz-aware with different timezones"
with pytest.raises(Exception, match=msg):
bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz)
@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
def test_dti_convert_tz_aware_datetime_datetime(self, tz):
# GH#1581
dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]
dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates]
result = DatetimeIndex(dates_aware).as_unit("ns")
assert timezones.tz_compare(result.tz, tz)
converted = to_datetime(dates_aware, utc=True).as_unit("ns")
ex_vals = np.array([Timestamp(x).as_unit("ns")._value for x in dates_aware])
tm.assert_numpy_array_equal(converted.asi8, ex_vals)
assert converted.tz is timezone.utc

View File

@ -0,0 +1,254 @@
import re
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
IntervalDtype,
)
from pandas import (
CategoricalIndex,
Index,
IntervalIndex,
NaT,
Timedelta,
Timestamp,
interval_range,
)
import pandas._testing as tm
class AstypeTests:
"""Tests common to IntervalIndex with any subtype"""
def test_astype_idempotent(self, index):
result = index.astype("interval")
tm.assert_index_equal(result, index)
result = index.astype(index.dtype)
tm.assert_index_equal(result, index)
def test_astype_object(self, index):
result = index.astype(object)
expected = Index(index.values, dtype="object")
tm.assert_index_equal(result, expected)
assert not result.equals(index)
def test_astype_category(self, index):
result = index.astype("category")
expected = CategoricalIndex(index.values)
tm.assert_index_equal(result, expected)
result = index.astype(CategoricalDtype())
tm.assert_index_equal(result, expected)
# non-default params
categories = index.dropna().unique().values[:-1]
dtype = CategoricalDtype(categories=categories, ordered=True)
result = index.astype(dtype)
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
[
"int64",
"uint64",
"float64",
"complex128",
"period[M]",
"timedelta64",
"timedelta64[ns]",
"datetime64",
"datetime64[ns]",
"datetime64[ns, US/Eastern]",
],
)
def test_astype_cannot_cast(self, index, dtype):
msg = "Cannot cast IntervalIndex to dtype"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_astype_invalid_dtype(self, index):
msg = "data type [\"']fake_dtype[\"'] not understood"
with pytest.raises(TypeError, match=msg):
index.astype("fake_dtype")
class TestIntSubtype(AstypeTests):
"""Tests specific to IntervalIndex with integer-like subtype"""
indexes = [
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize(
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
)
def test_subtype_conversion(self, index, subtype):
dtype = IntervalDtype(subtype, index.closed)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
)
def test_subtype_integer(self, subtype_start, subtype_end):
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
dtype = IntervalDtype(subtype_end, index.closed)
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype_end),
index.right.astype(subtype_end),
closed=index.closed,
)
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(reason="GH#15832")
def test_subtype_integer_errors(self):
# int64 -> uint64 fails with negative values
index = interval_range(-10, 10)
dtype = IntervalDtype("uint64", "right")
# Until we decide what the exception message _should_ be, we
# assert something that it should _not_ be.
# We should _not_ be getting a message suggesting that the -10
# has been wrapped around to a large-positive integer
msg = "^(?!(left side of interval must be <= right side))"
with pytest.raises(ValueError, match=msg):
index.astype(dtype)
class TestFloatSubtype(AstypeTests):
"""Tests specific to IntervalIndex with float subtype"""
indexes = [
interval_range(-10.0, 10.0, closed="neither"),
IntervalIndex.from_arrays(
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, subtype):
index = interval_range(0.0, 10.0)
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
# raises with NA
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(ValueError, match=msg):
index.insert(0, np.nan).astype(dtype)
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer_with_non_integer_borders(self, subtype):
index = interval_range(0.0, 3.0, freq=0.25)
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)
tm.assert_index_equal(result, expected)
def test_subtype_integer_errors(self):
# float64 -> uint64 fails with negative values
index = interval_range(-10.0, 10.0)
dtype = IntervalDtype("uint64", "right")
msg = re.escape(
"Cannot convert interval[float64, right] to interval[uint64, right]; "
"subtypes are incompatible"
)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_subtype_datetimelike(self, index, subtype):
dtype = IntervalDtype(subtype, "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_astype_category(self, index):
super().test_astype_category(index)
class TestDatetimelikeSubtype(AstypeTests):
"""Tests specific to IntervalIndex with datetime-like subtype"""
indexes = [
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
interval_range(Timedelta("0 days"), periods=10, closed="both"),
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
]
@pytest.fixture(params=indexes)
def index(self, request):
return request.param
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype, "right")
if subtype != "int64":
msg = (
r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] "
r"to interval\[uint64, .*\]"
)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
return
result = index.astype(dtype)
new_left = index.left.astype(subtype)
new_right = index.right.astype(subtype)
expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
tm.assert_index_equal(result, expected)
def test_subtype_float(self, index):
dtype = IntervalDtype("float64", "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
def test_subtype_datetimelike(self):
# datetime -> timedelta raises
dtype = IntervalDtype("timedelta64[ns]", "right")
msg = "Cannot convert .* to .*; subtypes are incompatible"
index = interval_range(Timestamp("2018-01-01"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)
# timedelta -> datetime raises
dtype = IntervalDtype("datetime64[ns]", "right")
index = interval_range(Timedelta("0 days"), periods=10)
with pytest.raises(TypeError, match=msg):
index.astype(dtype)

View File

@ -0,0 +1,535 @@
from functools import partial
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas.core.dtypes.common import is_unsigned_integer_dtype
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
Index,
Interval,
IntervalIndex,
date_range,
notna,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
import pandas.core.common as com
@pytest.fixture(params=[None, "foo"])
def name(request):
return request.param
class ConstructorTests:
"""
Common tests for all variations of IntervalIndex construction. Input data
to be supplied in breaks format, then converted by the subclass method
get_kwargs_from_breaks to the expected format.
"""
@pytest.fixture(
params=[
([3, 14, 15, 92, 653], np.int64),
(np.arange(10, dtype="int64"), np.int64),
(Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
(Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
(Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
(date_range("20180101", periods=10), "<M8[ns]"),
(
date_range("20180101", periods=10, tz="US/Eastern"),
"datetime64[ns, US/Eastern]",
),
(timedelta_range("1 day", periods=10), "<m8[ns]"),
]
)
def breaks_and_expected_subtype(self, request):
return request.param
def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
breaks, expected_subtype = breaks_and_expected_subtype
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
result = constructor(closed=closed, name=name, **result_kwargs)
assert result.closed == closed
assert result.name == name
assert result.dtype.subtype == expected_subtype
tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
@pytest.mark.parametrize(
"breaks, subtype",
[
(Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
(Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
(Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
(Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
(date_range("2017-01-01", periods=5), "int64"),
(timedelta_range("1 day", periods=5), "int64"),
],
)
def test_constructor_dtype(self, constructor, breaks, subtype):
# GH 19262: conversion via dtype parameter
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
expected = constructor(**expected_kwargs)
result_kwargs = self.get_kwargs_from_breaks(breaks)
iv_dtype = IntervalDtype(subtype, "right")
for dtype in (iv_dtype, str(iv_dtype)):
result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[
Index([0, 1, 2, 3, 4], dtype=np.int64),
Index([0, 1, 2, 3, 4], dtype=np.uint64),
Index([0, 1, 2, 3, 4], dtype=np.float64),
date_range("2017-01-01", periods=5),
timedelta_range("1 day", periods=5),
],
)
def test_constructor_pass_closed(self, constructor, breaks):
# not passing closed to IntervalDtype, but to IntervalArray constructor
iv_dtype = IntervalDtype(breaks.dtype)
result_kwargs = self.get_kwargs_from_breaks(breaks)
for dtype in (iv_dtype, str(iv_dtype)):
with tm.assert_produces_warning(None):
result = constructor(dtype=dtype, closed="left", **result_kwargs)
assert result.dtype.closed == "left"
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
def test_constructor_nan(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_subtype = np.float64
expected_values = np.array(breaks[:-1], dtype=object)
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)
@pytest.mark.parametrize(
"breaks",
[
[],
np.array([], dtype="int64"),
np.array([], dtype="uint64"),
np.array([], dtype="float64"),
np.array([], dtype="datetime64[ns]"),
np.array([], dtype="timedelta64[ns]"),
],
)
def test_constructor_empty(self, constructor, breaks, closed):
# GH 18421
result_kwargs = self.get_kwargs_from_breaks(breaks)
result = constructor(closed=closed, **result_kwargs)
expected_values = np.array([], dtype=object)
expected_subtype = getattr(breaks, "dtype", np.int64)
assert result.empty
assert result.closed == closed
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)
@pytest.mark.parametrize(
"breaks",
[
tuple("0123456789"),
list("abcdefghij"),
np.array(list("abcdefghij"), dtype=object),
np.array(list("abcdefghij"), dtype="<U1"),
],
)
def test_constructor_string(self, constructor, breaks):
# GH 19016
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
constructor(**self.get_kwargs_from_breaks(breaks))
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
def test_constructor_categorical_valid(self, constructor, cat_constructor):
# GH 21243/21253
breaks = np.arange(10, dtype="int64")
expected = IntervalIndex.from_breaks(breaks)
cat_breaks = cat_constructor(breaks)
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
result = constructor(**result_kwargs)
tm.assert_index_equal(result, expected)
def test_generic_errors(self, constructor):
# filler input data to be used when supplying invalid kwargs
filler = self.get_kwargs_from_breaks(range(10))
# invalid closed
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
with pytest.raises(ValueError, match=msg):
constructor(closed="invalid", **filler)
# unsupported dtype
msg = "dtype must be an IntervalDtype, got int64"
with pytest.raises(TypeError, match=msg):
constructor(dtype="int64", **filler)
# invalid dtype
msg = "data type [\"']invalid[\"'] not understood"
with pytest.raises(TypeError, match=msg):
constructor(dtype="invalid", **filler)
# no point in nesting periods in an IntervalIndex
periods = period_range("2000-01-01", periods=10)
periods_kwargs = self.get_kwargs_from_breaks(periods)
msg = "Period dtypes are not supported, use a PeriodIndex instead"
with pytest.raises(ValueError, match=msg):
constructor(**periods_kwargs)
# decreasing values
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
msg = "left side of interval must be <= right side"
with pytest.raises(ValueError, match=msg):
constructor(**decreasing_kwargs)
class TestFromArrays(ConstructorTests):
"""Tests specific to IntervalIndex.from_arrays"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_arrays
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_arrays
"""
return {"left": breaks[:-1], "right": breaks[1:]}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list("01234abcde"), ordered=True)
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_arrays(data[:-1], data[1:])
# unequal length
left = [0, 1, 2]
right = [2, 3]
msg = "left and right must have the same length"
with pytest.raises(ValueError, match=msg):
IntervalIndex.from_arrays(left, right)
@pytest.mark.parametrize(
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
)
def test_mixed_float_int(self, left_subtype, right_subtype):
"""mixed int/float left/right results in float for both sides"""
left = np.arange(9, dtype=left_subtype)
right = np.arange(1, 10, dtype=right_subtype)
result = IntervalIndex.from_arrays(left, right)
expected_left = Index(left, dtype=np.float64)
expected_right = Index(right, dtype=np.float64)
expected_subtype = np.float64
tm.assert_index_equal(result.left, expected_left)
tm.assert_index_equal(result.right, expected_right)
assert result.dtype.subtype == expected_subtype
@pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex])
def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls):
# GH#55714
left = date_range("2016-01-01", periods=3, unit="s")
right = date_range("2017-01-01", periods=3, unit="ms")
result = interval_cls.from_arrays(left, right)
expected = interval_cls.from_arrays(left.as_unit("ms"), right)
tm.assert_equal(result, expected)
# td64
left2 = left - left[0]
right2 = right - left[0]
result2 = interval_cls.from_arrays(left2, right2)
expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2)
tm.assert_equal(result2, expected2)
# dt64tz
left3 = left.tz_localize("UTC")
right3 = right.tz_localize("UTC")
result3 = interval_cls.from_arrays(left3, right3)
expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3)
tm.assert_equal(result3, expected3)
class TestFromBreaks(ConstructorTests):
"""Tests specific to IntervalIndex.from_breaks"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_breaks
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_breaks
"""
return {"breaks": breaks}
def test_constructor_errors(self):
# GH 19016: categorical data
data = Categorical(list("01234abcde"), ordered=True)
msg = (
"category, object, and string subtypes are not supported "
"for IntervalIndex"
)
with pytest.raises(TypeError, match=msg):
IntervalIndex.from_breaks(data)
def test_length_one(self):
"""breaks of length one produce an empty IntervalIndex"""
breaks = [0]
result = IntervalIndex.from_breaks(breaks)
expected = IntervalIndex.from_breaks([])
tm.assert_index_equal(result, expected)
def test_left_right_dont_share_data(self):
# GH#36310
breaks = np.arange(5)
result = IntervalIndex.from_breaks(breaks)._data
assert result._left.base is None or result._left.base is not result._right.base
class TestFromTuples(ConstructorTests):
"""Tests specific to IntervalIndex.from_tuples"""
@pytest.fixture
def constructor(self):
return IntervalIndex.from_tuples
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by IntervalIndex.from_tuples
"""
if is_unsigned_integer_dtype(breaks):
pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
if len(breaks) == 0:
return {"data": breaks}
tuples = list(zip(breaks[:-1], breaks[1:]))
if isinstance(breaks, (list, tuple)):
return {"data": tuples}
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
return {"data": breaks._constructor(tuples)}
return {"data": com.asarray_tuplesafe(tuples)}
def test_constructor_errors(self):
# non-tuple
tuples = [(0, 1), 2, (3, 4)]
msg = "IntervalIndex.from_tuples received an invalid item, 2"
with pytest.raises(TypeError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
# too few/many items
tuples = [(0, 1), (2,), (3, 4)]
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
tuples = [(0, 1), (2, 3, 4), (5, 6)]
with pytest.raises(ValueError, match=msg.format(t=tuples)):
IntervalIndex.from_tuples(tuples)
def test_na_tuples(self):
# tuple (NA, NA) evaluates the same as NA as an element
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
tm.assert_index_equal(idx_na_tuple, idx_na_element)
class TestClassConstructors(ConstructorTests):
"""Tests specific to the IntervalIndex/Index constructors"""
@pytest.fixture(
params=[IntervalIndex, partial(Index, dtype="interval")],
ids=["IntervalIndex", "Index"],
)
def klass(self, request):
# We use a separate fixture here to include Index.__new__ with dtype kwarg
return request.param
@pytest.fixture
def constructor(self):
return IntervalIndex
def get_kwargs_from_breaks(self, breaks, closed="right"):
"""
converts intervals in breaks format to a dictionary of kwargs to
specific to the format expected by the IntervalIndex/Index constructors
"""
if is_unsigned_integer_dtype(breaks):
pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
if len(breaks) == 0:
return {"data": breaks}
ivs = [
Interval(left, right, closed) if notna(left) else left
for left, right in zip(breaks[:-1], breaks[1:])
]
if isinstance(breaks, list):
return {"data": ivs}
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
return {"data": breaks._constructor(ivs)}
return {"data": np.array(ivs, dtype=object)}
def test_generic_errors(self, constructor):
"""
override the base class implementation since errors are handled
differently; checks unnecessary since caught at the Interval level
"""
def test_constructor_string(self):
# GH23013
# When forming the interval from breaks,
# the interval of strings is already forbidden.
pass
def test_constructor_errors(self, klass):
# mismatched closed within intervals with no constructor override
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
msg = "intervals must all be closed on the same side"
with pytest.raises(ValueError, match=msg):
klass(ivs)
# scalar
msg = (
r"(IntervalIndex|Index)\(...\) must be called with a collection of "
"some kind, 5 was passed"
)
with pytest.raises(TypeError, match=msg):
klass(5)
# not an interval; dtype depends on 32bit/windows builds
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
with pytest.raises(TypeError, match=msg):
klass([0, 1])
@pytest.mark.parametrize(
"data, closed",
[
([], "both"),
([np.nan, np.nan], "neither"),
(
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
"left",
),
(
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
"neither",
),
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
],
)
def test_override_inferred_closed(self, constructor, data, closed):
# GH 19370
if isinstance(data, IntervalIndex):
tuples = data.to_tuples()
else:
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
expected = IntervalIndex.from_tuples(tuples, closed=closed)
result = constructor(data, closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
)
def test_index_object_dtype(self, values_constructor):
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
values = values_constructor(intervals)
result = Index(values, dtype=object)
assert type(result) is Index
tm.assert_numpy_array_equal(result.values, np.array(values))
def test_index_mixed_closed(self):
# GH27172
intervals = [
Interval(0, 1, closed="left"),
Interval(1, 2, closed="right"),
Interval(2, 3, closed="neither"),
Interval(3, 4, closed="both"),
]
result = Index(intervals)
expected = Index(intervals, dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
# GH#46999
dates = date_range("2022", periods=3, tz=timezone)
dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
result = IntervalIndex.from_arrays(
["2022-01-01", "2022-01-02"],
["2022-01-02", "2022-01-03"],
closed=inclusive_endpoints_fixture,
dtype=dtype,
)
expected = IntervalIndex.from_arrays(
dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
)
tm.assert_index_equal(result, expected)
def test_dtype_closed_mismatch():
# GH#38394 closed specified in both dtype and IntervalIndex constructor
dtype = IntervalDtype(np.int64, "left")
msg = "closed keyword does not match dtype.closed"
with pytest.raises(ValueError, match=msg):
IntervalIndex([], dtype=dtype, closed="neither")
with pytest.raises(ValueError, match=msg):
IntervalArray([], dtype=dtype, closed="neither")
@pytest.mark.parametrize(
"dtype",
["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))],
)
def test_ea_dtype(dtype):
# GH#56765
bins = [(0.0, 0.4), (0.4, 0.6)]
interval_dtype = IntervalDtype(subtype=dtype, closed="left")
result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype)
assert result.dtype == interval_dtype
expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,36 @@
import numpy as np
from pandas import (
IntervalIndex,
date_range,
)
class TestEquals:
def test_equals(self, closed):
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
assert expected.equals(expected)
assert expected.equals(expected.copy())
assert not expected.equals(expected.astype(object))
assert not expected.equals(np.array(expected))
assert not expected.equals(list(expected))
assert not expected.equals([1, 2])
assert not expected.equals(np.array([1, 2]))
assert not expected.equals(date_range("20130101", periods=2))
expected_name1 = IntervalIndex.from_breaks(
np.arange(5), closed=closed, name="foo"
)
expected_name2 = IntervalIndex.from_breaks(
np.arange(5), closed=closed, name="bar"
)
assert expected.equals(expected_name1)
assert expected_name1.equals(expected_name2)
for other_closed in {"left", "right", "both", "neither"} - {closed}:
expected_other_closed = IntervalIndex.from_breaks(
np.arange(5), closed=other_closed
)
assert not expected.equals(expected_other_closed)

View File

@ -0,0 +1,119 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Interval,
IntervalIndex,
Series,
Timedelta,
Timestamp,
)
import pandas._testing as tm
class TestIntervalIndexRendering:
# TODO: this is a test for DataFrame/Series, not IntervalIndex
@pytest.mark.parametrize(
"constructor,expected",
[
(
Series,
(
"(0.0, 1.0] a\n"
"NaN b\n"
"(2.0, 3.0] c\n"
"dtype: object"
),
),
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
],
)
def test_repr_missing(self, constructor, expected, using_infer_string, request):
# GH 25984
if using_infer_string and constructor is Series:
request.applymarker(pytest.mark.xfail(reason="repr different"))
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
obj = constructor(list("abc"), index=index)
result = repr(obj)
assert result == expected
def test_repr_floats(self):
# GH 32553
markers = Series(
[1, 2],
index=IntervalIndex(
[
Interval(left, right)
for left, right in zip(
Index([329.973, 345.137], dtype="float64"),
Index([345.137, 360.191], dtype="float64"),
)
]
),
)
result = str(markers)
expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
assert result == expected
@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
@pytest.mark.parametrize(
"tuples, closed, expected_data",
[
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
(
[(0.5, 1.0), np.nan, (2.0, 3.0)],
"right",
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
),
(
[
(Timestamp("20180101"), Timestamp("20180102")),
np.nan,
((Timestamp("20180102"), Timestamp("20180103"))),
],
"both",
[
"[2018-01-01 00:00:00, 2018-01-02 00:00:00]",
"NaN",
"[2018-01-02 00:00:00, 2018-01-03 00:00:00]",
],
),
(
[
(Timedelta("0 days"), Timedelta("1 days")),
(Timedelta("1 days"), Timedelta("2 days")),
np.nan,
],
"neither",
[
"(0 days 00:00:00, 1 days 00:00:00)",
"(1 days 00:00:00, 2 days 00:00:00)",
"NaN",
],
),
],
)
def test_get_values_for_csv(self, tuples, closed, expected_data):
# GH 28210
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index._get_values_for_csv(na_rep="NaN")
expected = np.array(expected_data)
tm.assert_numpy_array_equal(result, expected)
def test_timestamp_with_timezone(self, unit):
# GH 55035
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
index = IntervalIndex.from_arrays(left, right)
result = repr(index)
expected = (
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
f"dtype='interval[datetime64[{unit}, UTC], right]')"
)
assert result == expected

View File

@ -0,0 +1,674 @@
import re
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
from pandas import (
NA,
CategoricalIndex,
DatetimeIndex,
Index,
Interval,
IntervalIndex,
MultiIndex,
NaT,
Timedelta,
Timestamp,
array,
date_range,
interval_range,
isna,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestGetItem:
def test_getitem(self, closed):
idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
assert idx[0] == Interval(0.0, 1.0, closed=closed)
assert idx[1] == Interval(1.0, 2.0, closed=closed)
assert isna(idx[2])
result = idx[0:1]
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
tm.assert_index_equal(result, expected)
result = idx[0:2]
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
tm.assert_index_equal(result, expected)
result = idx[1:3]
expected = IntervalIndex.from_arrays(
(1.0, np.nan), (2.0, np.nan), closed=closed
)
tm.assert_index_equal(result, expected)
def test_getitem_2d_deprecated(self):
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
idx = IntervalIndex.from_breaks(range(11), closed="right")
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
idx[:, None]
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
# GH#44051
idx[True]
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
# GH#44051
idx[False]
class TestWhere:
def test_where(self, listlike_box):
klass = listlike_box
idx = IntervalIndex.from_breaks(range(11), closed="right")
cond = [True] * len(idx)
expected = idx
result = expected.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * len(idx[1:])
expected = IntervalIndex([np.nan] + idx[1:].tolist())
result = idx.where(klass(cond))
tm.assert_index_equal(result, expected)
class TestTake:
def test_take(self, closed):
index = IntervalIndex.from_breaks(range(11), closed=closed)
result = index.take(range(10))
tm.assert_index_equal(result, index)
result = index.take([0, 0, 1])
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
tm.assert_index_equal(result, expected)
class TestGetLoc:
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
def test_get_loc_interval(self, closed, side):
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
# if get_loc is supplied an interval, it should only search
# for exact matches, not overlaps or covers, else KeyError.
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
if closed == side:
if bound == [0, 1]:
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
elif bound == [2, 3]:
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
else:
with pytest.raises(KeyError, match=msg):
idx.get_loc(Interval(*bound, closed=side))
else:
with pytest.raises(KeyError, match=msg):
idx.get_loc(Interval(*bound, closed=side))
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
def test_get_loc_scalar(self, closed, scalar):
# correct = {side: {query: answer}}.
# If query is not in the dict, that query should raise a KeyError
correct = {
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
"neither": {0.5: 0, 2.5: 1},
}
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
# if get_loc is supplied a scalar, it should return the index of
# the interval which contains the scalar, or KeyError.
if scalar in correct[closed].keys():
assert idx.get_loc(scalar) == correct[closed][scalar]
else:
with pytest.raises(KeyError, match=str(scalar)):
idx.get_loc(scalar)
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
def test_get_loc_length_one_scalar(self, scalar, closed):
# GH 20921
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
if scalar in index[0]:
result = index.get_loc(scalar)
assert result == 0
else:
with pytest.raises(KeyError, match=str(scalar)):
index.get_loc(scalar)
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
# GH 20921
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
interval = Interval(left, right, closed=other_closed)
if interval == index[0]:
result = index.get_loc(interval)
assert result == 0
else:
with pytest.raises(
KeyError,
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
):
index.get_loc(interval)
# Make consistent with test_interval_new.py (see #16316, #16386)
@pytest.mark.parametrize(
"breaks",
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
ids=lambda x: str(x.dtype),
)
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
# GH 20636
# nonoverlapping = IntervalIndex method and no i8 conversion
index = IntervalIndex.from_breaks(breaks)
value = index[0].mid
result = index.get_loc(value)
expected = 0
assert result == expected
interval = Interval(index[0].left, index[0].right)
result = index.get_loc(interval)
expected = 0
assert result == expected
@pytest.mark.parametrize(
"arrays",
[
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
(
date_range("20180101", periods=4, tz="US/Eastern"),
date_range("20180103", periods=4, tz="US/Eastern"),
),
(
timedelta_range("0 days", periods=4),
timedelta_range("2 days", periods=4),
),
],
ids=lambda x: str(x[0].dtype),
)
def test_get_loc_datetimelike_overlapping(self, arrays):
# GH 20636
index = IntervalIndex.from_arrays(*arrays)
value = index[0].mid + Timedelta("12 hours")
result = index.get_loc(value)
expected = slice(0, 2, None)
assert result == expected
interval = Interval(index[0].left, index[0].right)
result = index.get_loc(interval)
expected = 0
assert result == expected
@pytest.mark.parametrize(
"values",
[
date_range("2018-01-04", periods=4, freq="-1D"),
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
timedelta_range("3 days", periods=4, freq="-1D"),
np.arange(3.0, -1.0, -1.0),
np.arange(3, -1, -1),
],
ids=lambda x: str(x.dtype),
)
def test_get_loc_decreasing(self, values):
# GH 25860
index = IntervalIndex.from_arrays(values[1:], values[:-1])
result = index.get_loc(index[0])
expected = 0
assert result == expected
@pytest.mark.parametrize("key", [[5], (2, 3)])
def test_get_loc_non_scalar_errors(self, key):
# GH 31117
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
msg = str(key)
with pytest.raises(InvalidIndexError, match=msg):
idx.get_loc(key)
def test_get_indexer_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
expected = np.array([True, False, True])
for key in [None, np.nan, NA]:
assert key in index
result = index.get_loc(key)
tm.assert_numpy_array_equal(result, expected)
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
with pytest.raises(KeyError, match=str(key)):
index.get_loc(key)
class TestGetIndexer:
@pytest.mark.parametrize(
"query, expected",
[
([Interval(2, 4, closed="right")], [1]),
([Interval(2, 4, closed="left")], [-1]),
([Interval(2, 4, closed="both")], [-1]),
([Interval(2, 4, closed="neither")], [-1]),
([Interval(1, 4, closed="right")], [-1]),
([Interval(0, 4, closed="right")], [-1]),
([Interval(0.5, 1.5, closed="right")], [-1]),
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
],
)
def test_get_indexer_with_interval(self, query, expected):
tuples = [(0, 2), (2, 4), (5, 7)]
index = IntervalIndex.from_tuples(tuples, closed="right")
result = index.get_indexer(query)
expected = np.array(expected, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"query, expected",
[
([-0.5], [-1]),
([0], [-1]),
([0.5], [0]),
([1], [0]),
([1.5], [1]),
([2], [1]),
([2.5], [-1]),
([3], [-1]),
([3.5], [2]),
([4], [2]),
([4.5], [-1]),
([1, 2], [0, 1]),
([1, 2, 3], [0, 1, -1]),
([1, 2, 3, 4], [0, 1, -1, 2]),
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
],
)
def test_get_indexer_with_int_and_float(self, query, expected):
tuples = [(0, 1), (1, 2), (3, 4)]
index = IntervalIndex.from_tuples(tuples, closed="right")
result = index.get_indexer(query)
expected = np.array(expected, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
def test_get_indexer_length_one(self, item, closed):
# GH 17284
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
result = index.get_indexer(item)
expected = np.array([0] * len(item), dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("size", [1, 5])
def test_get_indexer_length_one_interval(self, size, closed):
# GH 17284
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
result = index.get_indexer([Interval(0, 5, closed)] * size)
expected = np.array([0] * size, dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"target",
[
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
["foo", "foo", "bar", "baz"],
],
)
def test_get_indexer_categorical(self, target, ordered):
# GH 30063: categorical and non-categorical results should be consistent
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
categorical_target = CategoricalIndex(target, ordered=ordered)
result = index.get_indexer(categorical_target)
expected = index.get_indexer(target)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.filterwarnings(
"ignore:invalid value encountered in cast:RuntimeWarning"
)
def test_get_indexer_categorical_with_nans(self):
# GH#41934 nans in both index and in target
ii = IntervalIndex.from_breaks(range(5))
ii2 = ii.append(IntervalIndex([np.nan]))
ci2 = CategoricalIndex(ii2)
result = ii2.get_indexer(ci2)
expected = np.arange(5, dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
# not-all-matches
result = ii2[1:].get_indexer(ci2[::-1])
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
# non-unique target, non-unique nans
result = ii2.get_indexer(ci2.append(ci2))
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_datetime(self):
ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4))
# TODO: with mismatched resolution get_indexer currently raises;
# this should probably coerce?
target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]")
result = ii.get_indexer(target)
expected = np.array([0], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
result = ii.get_indexer(target.astype(str))
tm.assert_numpy_array_equal(result, expected)
# https://github.com/pandas-dev/pandas/issues/47772
result = ii.get_indexer(target.asi8)
expected = np.array([-1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"tuples, closed",
[
([(0, 2), (1, 3), (3, 4)], "neither"),
([(0, 5), (1, 4), (6, 7)], "left"),
([(0, 1), (0, 1), (1, 2)], "right"),
([(0, 1), (2, 3), (3, 4)], "both"),
],
)
def test_get_indexer_errors(self, tuples, closed):
# IntervalIndex needs non-overlapping for uniqueness when querying
index = IntervalIndex.from_tuples(tuples, closed=closed)
msg = (
"cannot handle overlapping indices; use "
"IntervalIndex.get_indexer_non_unique"
)
with pytest.raises(InvalidIndexError, match=msg):
index.get_indexer([0, 2])
@pytest.mark.parametrize(
"query, expected",
[
([-0.5], ([-1], [0])),
([0], ([0], [])),
([0.5], ([0], [])),
([1], ([0, 1], [])),
([1.5], ([0, 1], [])),
([2], ([0, 1, 2], [])),
([2.5], ([1, 2], [])),
([3], ([2], [])),
([3.5], ([2], [])),
([4], ([-1], [0])),
([4.5], ([-1], [0])),
([1, 2], ([0, 1, 0, 1, 2], [])),
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
],
)
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
tuples = [(0, 2.5), (1, 3), (2, 4)]
index = IntervalIndex.from_tuples(tuples, closed="left")
result_indexer, result_missing = index.get_indexer_non_unique(query)
expected_indexer = np.array(expected[0], dtype="intp")
expected_missing = np.array(expected[1], dtype="intp")
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
tm.assert_numpy_array_equal(result_missing, expected_missing)
# TODO we may also want to test get_indexer for the case when
# the intervals are duplicated, decreasing, non-monotonic, etc..
def test_get_indexer_non_monotonic(self):
# GH 16410
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
result = idx1.get_indexer(idx2)
expected = np.array([2, 0, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
result = idx1.get_indexer(idx1[1:])
expected = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, np.nan])
other = IntervalIndex([np.nan])
assert not index._index_as_unique
result = index.get_indexer_for(other)
expected = np.array([0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_index_non_unique_non_monotonic(self):
# GH#44084 (root cause)
index = IntervalIndex.from_tuples(
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
)
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
expected = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_multiindex_with_intervals(self):
# GH#44084 (MultiIndex case as reported)
interval_index = IntervalIndex.from_tuples(
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
)
foo_index = Index([1, 2, 3], name="foo")
multi_index = MultiIndex.from_product([foo_index, interval_index])
result = multi_index.get_level_values("interval").get_indexer_for(
[Interval(0.0, 1.0)]
)
expected = np.array([1, 4, 7], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("box", [IntervalIndex, array, list])
def test_get_indexer_interval_index(self, box):
# GH#30178
rng = period_range("2022-07-01", freq="D", periods=3)
idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3))
actual = rng.get_indexer(idx)
expected = np.array([-1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(actual, expected)
def test_get_indexer_read_only(self):
idx = interval_range(start=0, end=5)
arr = np.array([1, 2])
arr.flags.writeable = False
result = idx.get_indexer(arr)
expected = np.array([0, 1])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
result = idx.get_indexer_non_unique(arr)[0]
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
class TestSliceLocs:
def test_slice_locs_with_interval(self):
# increasing monotonically
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
# decreasing monotonically
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
# sorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
# unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get left slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get left slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(0, 2))
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get right slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(end=Interval(0, 2))
with pytest.raises(
KeyError,
match=re.escape(
'"Cannot get right slice bound for non-unique label: '
"Interval(0, 2, closed='right')\""
),
):
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
# another unsorted duplicates
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_slice_locs_with_ints_and_floats_succeeds(self):
# increasing non-overlapping
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
assert index.slice_locs(0, 1) == (0, 1)
assert index.slice_locs(0, 2) == (0, 2)
assert index.slice_locs(0, 3) == (0, 2)
assert index.slice_locs(3, 1) == (2, 1)
assert index.slice_locs(3, 4) == (2, 3)
assert index.slice_locs(0, 4) == (0, 3)
# decreasing non-overlapping
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
assert index.slice_locs(0, 1) == (3, 3)
assert index.slice_locs(0, 2) == (3, 2)
assert index.slice_locs(0, 3) == (3, 1)
assert index.slice_locs(3, 1) == (1, 3)
assert index.slice_locs(3, 4) == (1, 1)
assert index.slice_locs(0, 4) == (3, 1)
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
@pytest.mark.parametrize(
"tuples",
[
[(0, 2), (1, 3), (2, 4)],
[(2, 4), (1, 3), (0, 2)],
[(0, 2), (0, 2), (2, 4)],
[(0, 2), (2, 4), (0, 2)],
[(0, 2), (0, 2), (2, 4), (1, 3)],
],
)
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
start, stop = query
index = IntervalIndex.from_tuples(tuples)
with pytest.raises(
KeyError,
match=(
"'can only get slices from an IntervalIndex if bounds are "
"non-overlapping and all monotonic increasing or decreasing'"
),
):
index.slice_locs(start, stop)
class TestPutmask:
@pytest.mark.parametrize("tz", ["US/Pacific", None])
def test_putmask_dt64(self, tz):
# GH#37968
dti = date_range("2016-01-01", periods=9, tz=tz)
idx = IntervalIndex.from_breaks(dti)
mask = np.zeros(idx.shape, dtype=bool)
mask[0:3] = True
result = idx.putmask(mask, idx[-1])
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
tm.assert_index_equal(result, expected)
def test_putmask_td64(self):
# GH#37968
dti = date_range("2016-01-01", periods=9)
tdi = dti - dti[0]
idx = IntervalIndex.from_breaks(tdi)
mask = np.zeros(idx.shape, dtype=bool)
mask[0:3] = True
result = idx.putmask(mask, idx[-1])
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
tm.assert_index_equal(result, expected)
class TestContains:
# .__contains__, not .contains
def test_contains_dunder(self):
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
# __contains__ requires perfect matches to intervals.
assert 0 not in index
assert 1 not in index
assert 2 not in index
assert Interval(0, 1, closed="right") in index
assert Interval(0, 2, closed="right") not in index
assert Interval(0, 0.5, closed="right") not in index
assert Interval(3, 5, closed="right") not in index
assert Interval(-1, 0, closed="left") not in index
assert Interval(0, 1, closed="left") not in index
assert Interval(0, 1, closed="both") not in index

View File

@ -0,0 +1,918 @@
from itertools import permutations
import re
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
Interval,
IntervalIndex,
Timedelta,
Timestamp,
date_range,
interval_range,
isna,
notna,
timedelta_range,
)
import pandas._testing as tm
import pandas.core.common as com
@pytest.fixture(params=[None, "foo"])
def name(request):
return request.param
class TestIntervalIndex:
index = IntervalIndex.from_arrays([0, 1], [1, 2])
def create_index(self, closed="right"):
return IntervalIndex.from_breaks(range(11), closed=closed)
def create_index_with_nan(self, closed="right"):
mask = [True, False] + [True] * 8
return IntervalIndex.from_arrays(
np.where(mask, np.arange(10), np.nan),
np.where(mask, np.arange(1, 11), np.nan),
closed=closed,
)
def test_properties(self, closed):
index = self.create_index(closed=closed)
assert len(index) == 10
assert index.size == 10
assert index.shape == (10,)
tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
assert index.closed == closed
ivs = [
Interval(left, right, closed)
for left, right in zip(range(10), range(1, 11))
]
expected = np.array(ivs, dtype=object)
tm.assert_numpy_array_equal(np.asarray(index), expected)
# with nans
index = self.create_index_with_nan(closed=closed)
assert len(index) == 10
assert index.size == 10
assert index.shape == (10,)
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
expected_right = expected_left + 1
expected_mid = expected_left + 0.5
tm.assert_index_equal(index.left, expected_left)
tm.assert_index_equal(index.right, expected_right)
tm.assert_index_equal(index.mid, expected_mid)
assert index.closed == closed
ivs = [
Interval(left, right, closed) if notna(left) else np.nan
for left, right in zip(expected_left, expected_right)
]
expected = np.array(ivs, dtype=object)
tm.assert_numpy_array_equal(np.asarray(index), expected)
@pytest.mark.parametrize(
"breaks",
[
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
date_range("2017-01-01", "2017-01-04"),
pytest.param(
date_range("2017-01-01", "2017-01-04", unit="s"),
marks=pytest.mark.xfail(reason="mismatched result unit"),
),
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]),
],
)
def test_length(self, closed, breaks):
# GH 18789
index = IntervalIndex.from_breaks(breaks, closed=closed)
result = index.length
expected = Index(iv.length for iv in index)
tm.assert_index_equal(result, expected)
# with NA
index = index.insert(1, np.nan)
result = index.length
expected = Index(iv.length if notna(iv) else iv for iv in index)
tm.assert_index_equal(result, expected)
def test_with_nans(self, closed):
index = self.create_index(closed=closed)
assert index.hasnans is False
result = index.isna()
expected = np.zeros(len(index), dtype=bool)
tm.assert_numpy_array_equal(result, expected)
result = index.notna()
expected = np.ones(len(index), dtype=bool)
tm.assert_numpy_array_equal(result, expected)
index = self.create_index_with_nan(closed=closed)
assert index.hasnans is True
result = index.isna()
expected = np.array([False, True] + [False] * (len(index) - 2))
tm.assert_numpy_array_equal(result, expected)
result = index.notna()
expected = np.array([True, False] + [True] * (len(index) - 2))
tm.assert_numpy_array_equal(result, expected)
def test_copy(self, closed):
expected = self.create_index(closed=closed)
result = expected.copy()
assert result.equals(expected)
result = expected.copy(deep=True)
assert result.equals(expected)
assert result.left is not expected.left
def test_ensure_copied_data(self, closed):
# exercise the copy flag in the constructor
# not copying
index = self.create_index(closed=closed)
result = IntervalIndex(index, copy=False)
tm.assert_numpy_array_equal(
index.left.values, result.left.values, check_same="same"
)
tm.assert_numpy_array_equal(
index.right.values, result.right.values, check_same="same"
)
# by-definition make a copy
result = IntervalIndex(np.array(index), copy=False)
tm.assert_numpy_array_equal(
index.left.values, result.left.values, check_same="copy"
)
tm.assert_numpy_array_equal(
index.right.values, result.right.values, check_same="copy"
)
def test_delete(self, closed):
breaks = np.arange(1, 11, dtype=np.int64)
expected = IntervalIndex.from_breaks(breaks, closed=closed)
result = self.create_index(closed=closed).delete(0)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
interval_range(0, periods=10, closed="neither"),
interval_range(1.7, periods=8, freq=2.5, closed="both"),
interval_range(Timestamp("20170101"), periods=12, closed="left"),
interval_range(Timedelta("1 day"), periods=6, closed="right"),
],
)
def test_insert(self, data):
item = data[0]
idx_item = IntervalIndex([item])
# start
expected = idx_item.append(data)
result = data.insert(0, item)
tm.assert_index_equal(result, expected)
# end
expected = data.append(idx_item)
result = data.insert(len(data), item)
tm.assert_index_equal(result, expected)
# mid
expected = data[:3].append(idx_item).append(data[3:])
result = data.insert(3, item)
tm.assert_index_equal(result, expected)
# invalid type
res = data.insert(1, "foo")
expected = data.astype(object).insert(1, "foo")
tm.assert_index_equal(res, expected)
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(TypeError, match=msg):
data._data.insert(1, "foo")
# invalid closed
msg = "'value.closed' is 'left', expected 'right'."
for closed in {"left", "right", "both", "neither"} - {item.closed}:
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
bad_item = Interval(item.left, item.right, closed=closed)
res = data.insert(1, bad_item)
expected = data.astype(object).insert(1, bad_item)
tm.assert_index_equal(res, expected)
with pytest.raises(ValueError, match=msg):
data._data.insert(1, bad_item)
# GH 18295 (test missing)
na_idx = IntervalIndex([np.nan], closed=data.closed)
for na in [np.nan, None, pd.NA]:
expected = data[:1].append(na_idx).append(data[1:])
result = data.insert(1, na)
tm.assert_index_equal(result, expected)
if data.left.dtype.kind not in ["m", "M"]:
# trying to insert pd.NaT into a numeric-dtyped Index should cast
expected = data.astype(object).insert(1, pd.NaT)
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(TypeError, match=msg):
data._data.insert(1, pd.NaT)
result = data.insert(1, pd.NaT)
tm.assert_index_equal(result, expected)
def test_is_unique_interval(self, closed):
"""
Interval specific tests for is_unique in addition to base class tests
"""
# unique overlapping - distinct endpoints
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
assert idx.is_unique is True
# unique overlapping - shared endpoints
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_unique is True
# unique nested
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
assert idx.is_unique is True
# unique NaN
idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed)
assert idx.is_unique is True
# non-unique NaN
idx = IntervalIndex.from_tuples(
[(np.nan, np.nan), (np.nan, np.nan)], closed=closed
)
assert idx.is_unique is False
def test_monotonic(self, closed):
# increasing non-overlapping
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing non-overlapping
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
assert idx.is_monotonic_increasing is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# unordered non-overlapping
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
assert idx.is_monotonic_increasing is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# increasing overlapping
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing overlapping
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
assert idx.is_monotonic_increasing is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# unordered overlapping
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
assert idx.is_monotonic_increasing is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# increasing overlapping shared endpoints
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is False
assert idx._is_strictly_monotonic_decreasing is False
# decreasing overlapping shared endpoints
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
assert idx.is_monotonic_increasing is False
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
# stationary
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is False
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is False
# empty
idx = IntervalIndex([], closed=closed)
assert idx.is_monotonic_increasing is True
assert idx._is_strictly_monotonic_increasing is True
assert idx.is_monotonic_decreasing is True
assert idx._is_strictly_monotonic_decreasing is True
def test_is_monotonic_with_nans(self):
# GH#41831
index = IntervalIndex([np.nan, np.nan])
assert not index.is_monotonic_increasing
assert not index._is_strictly_monotonic_increasing
assert not index.is_monotonic_increasing
assert not index._is_strictly_monotonic_decreasing
assert not index.is_monotonic_decreasing
@pytest.mark.parametrize(
"breaks",
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
ids=lambda x: str(x.dtype),
)
def test_maybe_convert_i8(self, breaks):
# GH 20636
index = IntervalIndex.from_breaks(breaks)
# intervalindex
result = index._maybe_convert_i8(index)
expected = IntervalIndex.from_breaks(breaks.asi8)
tm.assert_index_equal(result, expected)
# interval
interval = Interval(breaks[0], breaks[1])
result = index._maybe_convert_i8(interval)
expected = Interval(breaks[0]._value, breaks[1]._value)
assert result == expected
# datetimelike index
result = index._maybe_convert_i8(breaks)
expected = Index(breaks.asi8)
tm.assert_index_equal(result, expected)
# datetimelike scalar
result = index._maybe_convert_i8(breaks[0])
expected = breaks[0]._value
assert result == expected
# list-like of datetimelike scalars
result = index._maybe_convert_i8(list(breaks))
expected = Index(breaks.asi8)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"breaks",
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
)
def test_maybe_convert_i8_nat(self, breaks):
# GH 20636
index = IntervalIndex.from_breaks(breaks)
to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns")
expected = Index([np.nan] * 3, dtype=np.float64)
result = index._maybe_convert_i8(to_convert)
tm.assert_index_equal(result, expected)
to_convert = to_convert.insert(0, breaks[0])
expected = expected.insert(0, float(breaks[0]._value))
result = index._maybe_convert_i8(to_convert)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"make_key",
[lambda breaks: breaks, list],
ids=["lambda", "list"],
)
def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
# GH 20636
breaks = np.arange(5, dtype=any_real_numpy_dtype)
index = IntervalIndex.from_breaks(breaks)
key = make_key(breaks)
result = index._maybe_convert_i8(key)
kind = breaks.dtype.kind
expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
expected = Index(key, dtype=expected_dtype)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"make_key",
[
IntervalIndex.from_breaks,
lambda breaks: Interval(breaks[0], breaks[1]),
lambda breaks: breaks[0],
],
ids=["IntervalIndex", "Interval", "scalar"],
)
def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
# GH 20636
breaks = np.arange(5, dtype=any_real_numpy_dtype)
index = IntervalIndex.from_breaks(breaks)
key = make_key(breaks)
# test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
result = index._maybe_convert_i8(key)
assert result is key
@pytest.mark.parametrize(
"breaks1, breaks2",
permutations(
[
date_range("20180101", periods=4),
date_range("20180101", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
],
2,
),
ids=lambda x: str(x.dtype),
)
@pytest.mark.parametrize(
"make_key",
[
IntervalIndex.from_breaks,
lambda breaks: Interval(breaks[0], breaks[1]),
lambda breaks: breaks,
lambda breaks: breaks[0],
list,
],
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
)
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
# GH 20636
index = IntervalIndex.from_breaks(breaks1)
key = make_key(breaks2)
msg = (
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
f"values of dtype {breaks2.dtype}"
)
msg = re.escape(msg)
with pytest.raises(ValueError, match=msg):
index._maybe_convert_i8(key)
def test_contains_method(self):
# can select values that are IN the range of a value
i = IntervalIndex.from_arrays([0, 1], [1, 2])
expected = np.array([False, False], dtype="bool")
actual = i.contains(0)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(3)
tm.assert_numpy_array_equal(actual, expected)
expected = np.array([True, False], dtype="bool")
actual = i.contains(0.5)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(1)
tm.assert_numpy_array_equal(actual, expected)
# __contains__ not implemented for "interval in interval", follow
# that for the contains method for now
with pytest.raises(
NotImplementedError, match="contains not implemented for two"
):
i.contains(Interval(0, 1))
def test_dropna(self, closed):
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
result = ii.dropna()
tm.assert_index_equal(result, expected)
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
result = ii.dropna()
tm.assert_index_equal(result, expected)
def test_non_contiguous(self, closed):
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
target = [0.5, 1.5, 2.5]
actual = index.get_indexer(target)
expected = np.array([0, -1, 1], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
assert 1.5 not in index
def test_isin(self, closed):
index = self.create_index(closed=closed)
expected = np.array([True] + [False] * (len(index) - 1))
result = index.isin(index[:1])
tm.assert_numpy_array_equal(result, expected)
result = index.isin([index[0]])
tm.assert_numpy_array_equal(result, expected)
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
expected = np.array([True] * (len(index) - 1) + [False])
result = index.isin(other)
tm.assert_numpy_array_equal(result, expected)
result = index.isin(other.tolist())
tm.assert_numpy_array_equal(result, expected)
for other_closed in ["right", "left", "both", "neither"]:
other = self.create_index(closed=other_closed)
expected = np.repeat(closed == other_closed, len(index))
result = index.isin(other)
tm.assert_numpy_array_equal(result, expected)
result = index.isin(other.tolist())
tm.assert_numpy_array_equal(result, expected)
def test_comparison(self):
actual = Interval(0, 1) < self.index
expected = np.array([False, True])
tm.assert_numpy_array_equal(actual, expected)
actual = Interval(0.5, 1.5) < self.index
expected = np.array([False, True])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index > Interval(0.5, 1.5)
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == self.index
expected = np.array([True, True])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index <= self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index >= self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index < self.index
expected = np.array([False, False])
tm.assert_numpy_array_equal(actual, expected)
actual = self.index > self.index
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
tm.assert_numpy_array_equal(actual, expected)
actual = self.index == self.index.values
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index.values == self.index
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index <= self.index.values
tm.assert_numpy_array_equal(actual, np.array([True, True]))
actual = self.index != self.index.values
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index > self.index.values
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index.values > self.index
tm.assert_numpy_array_equal(actual, np.array([False, False]))
# invalid comparisons
actual = self.index == 0
tm.assert_numpy_array_equal(actual, np.array([False, False]))
actual = self.index == self.index.left
tm.assert_numpy_array_equal(actual, np.array([False, False]))
msg = "|".join(
[
"not supported between instances of 'int' and '.*.Interval'",
r"Invalid comparison between dtype=interval\[int64, right\] and ",
]
)
with pytest.raises(TypeError, match=msg):
self.index > 0
with pytest.raises(TypeError, match=msg):
self.index <= 0
with pytest.raises(TypeError, match=msg):
self.index > np.arange(2)
msg = "Lengths must match to compare"
with pytest.raises(ValueError, match=msg):
self.index > np.arange(3)
def test_missing_values(self, closed):
idx = Index(
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
)
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
assert idx.equals(idx2)
msg = (
"missing values must be missing in the same location both left "
"and right sides"
)
with pytest.raises(ValueError, match=msg):
IntervalIndex.from_arrays(
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
)
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
def test_sort_values(self, closed):
index = self.create_index(closed=closed)
result = index.sort_values()
tm.assert_index_equal(result, index)
result = index.sort_values(ascending=False)
tm.assert_index_equal(result, index[::-1])
# with nan
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
result = index.sort_values()
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
tm.assert_index_equal(result, expected)
result = index.sort_values(ascending=False, na_position="first")
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_datetime(self, tz):
start = Timestamp("2000-01-01", tz=tz)
dates = date_range(start=start, periods=10)
index = IntervalIndex.from_breaks(dates)
# test mid
start = Timestamp("2000-01-01T12:00", tz=tz)
expected = date_range(start=start, periods=9)
tm.assert_index_equal(index.mid, expected)
# __contains__ doesn't check individual points
assert Timestamp("2000-01-01", tz=tz) not in index
assert Timestamp("2000-01-01T12", tz=tz) not in index
assert Timestamp("2000-01-02", tz=tz) not in index
iv_true = Interval(
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
)
iv_false = Interval(
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
)
assert iv_true in index
assert iv_false not in index
# .contains does check individual points
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
# test get_indexer
start = Timestamp("1999-12-31T12:00", tz=tz)
target = date_range(start=start, periods=7, freq="12h")
actual = index.get_indexer(target)
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
start = Timestamp("2000-01-08T18:00", tz=tz)
target = date_range(start=start, periods=7, freq="6h")
actual = index.get_indexer(target)
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
tm.assert_numpy_array_equal(actual, expected)
def test_append(self, closed):
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
result = index1.append(index2)
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
tm.assert_index_equal(result, expected)
result = index1.append([index1, index2])
expected = IntervalIndex.from_arrays(
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
)
tm.assert_index_equal(result, expected)
for other_closed in {"left", "right", "both", "neither"} - {closed}:
index_other_closed = IntervalIndex.from_arrays(
[0, 1], [1, 2], closed=other_closed
)
result = index1.append(index_other_closed)
expected = index1.astype(object).append(index_other_closed.astype(object))
tm.assert_index_equal(result, expected)
def test_is_non_overlapping_monotonic(self, closed):
# Should be True in all cases
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is True
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is True
# Should be False in all cases (overlapping)
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False
# Should be False in all cases (non-monotonic)
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
assert idx.is_non_overlapping_monotonic is False
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
assert idx.is_non_overlapping_monotonic is False
# Should be False for closed='both', otherwise True (GH16560)
if closed == "both":
idx = IntervalIndex.from_breaks(range(4), closed=closed)
assert idx.is_non_overlapping_monotonic is False
else:
idx = IntervalIndex.from_breaks(range(4), closed=closed)
assert idx.is_non_overlapping_monotonic is True
@pytest.mark.parametrize(
"start, shift, na_value",
[
(0, 1, np.nan),
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
],
)
def test_is_overlapping(self, start, shift, na_value, closed):
# GH 23309
# see test_interval_tree.py for extensive tests; interface tests here
# non-overlapping
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is False
# non-overlapping with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is False
# overlapping
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is True
# overlapping with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
assert index.is_overlapping is True
# common endpoints
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index.is_overlapping
expected = closed == "both"
assert result is expected
# common endpoints with NA
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
index = IntervalIndex.from_tuples(tuples, closed=closed)
result = index.is_overlapping
assert result is expected
# intervals with duplicate left values
a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
index = IntervalIndex.from_arrays(a, b, closed="right")
result = index.is_overlapping
assert result is False
@pytest.mark.parametrize(
"tuples",
[
list(zip(range(10), range(1, 11))),
list(
zip(
date_range("20170101", periods=10),
date_range("20170101", periods=10),
)
),
list(
zip(
timedelta_range("0 days", periods=10),
timedelta_range("1 day", periods=10),
)
),
],
)
def test_to_tuples(self, tuples):
# GH 18756
idx = IntervalIndex.from_tuples(tuples)
result = idx.to_tuples()
expected = Index(com.asarray_tuplesafe(tuples))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"tuples",
[
list(zip(range(10), range(1, 11))) + [np.nan],
list(
zip(
date_range("20170101", periods=10),
date_range("20170101", periods=10),
)
)
+ [np.nan],
list(
zip(
timedelta_range("0 days", periods=10),
timedelta_range("1 day", periods=10),
)
)
+ [np.nan],
],
)
@pytest.mark.parametrize("na_tuple", [True, False])
def test_to_tuples_na(self, tuples, na_tuple):
# GH 18756
idx = IntervalIndex.from_tuples(tuples)
result = idx.to_tuples(na_tuple=na_tuple)
# check the non-NA portion
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
result_notna = result[:-1]
tm.assert_index_equal(result_notna, expected_notna)
# check the NA portion
result_na = result[-1]
if na_tuple:
assert isinstance(result_na, tuple)
assert len(result_na) == 2
assert all(isna(x) for x in result_na)
else:
assert isna(result_na)
def test_nbytes(self):
# GH 19209
left = np.arange(0, 4, dtype="i8")
right = np.arange(1, 5, dtype="i8")
result = IntervalIndex.from_arrays(left, right).nbytes
expected = 64 # 4 * 8 * 2
assert result == expected
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
def test_set_closed(self, name, closed, new_closed):
# GH 21670
index = interval_range(0, 5, closed=closed, name=name)
result = index.set_closed(new_closed)
expected = interval_range(0, 5, closed=new_closed, name=name)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
def test_set_closed_errors(self, bad_closed):
# GH 21670
index = interval_range(0, 5)
msg = f"invalid option for 'closed': {bad_closed}"
with pytest.raises(ValueError, match=msg):
index.set_closed(bad_closed)
def test_is_all_dates(self):
# GH 23576
year_2017 = Interval(
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
)
year_2017_index = IntervalIndex([year_2017])
assert not year_2017_index._is_all_dates
def test_dir():
# GH#27571 dir(interval_index) should not raise
index = IntervalIndex.from_arrays([0, 1], [1, 2])
result = dir(index)
assert "str" not in result
def test_searchsorted_different_argument_classes(listlike_box):
# https://github.com/pandas-dev/pandas/issues/32762
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
result = values.searchsorted(listlike_box(values))
expected = np.array([0, 1], dtype=result.dtype)
tm.assert_numpy_array_equal(result, expected)
result = values._data.searchsorted(listlike_box(values))
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
)
def test_searchsorted_invalid_argument(arg):
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
with pytest.raises(TypeError, match=msg):
values.searchsorted(arg)

View File

@ -0,0 +1,369 @@
from datetime import timedelta
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
from pandas import (
DateOffset,
Interval,
IntervalIndex,
Timedelta,
Timestamp,
date_range,
interval_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.tseries.offsets import Day
@pytest.fixture(params=[None, "foo"])
def name(request):
return request.param
class TestIntervalRange:
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
def test_constructor_numeric(self, closed, name, freq, periods):
start, end = 0, 100
breaks = np.arange(101, step=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
@pytest.mark.parametrize(
"freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)]
)
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
breaks = date_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
if not breaks.freq.n == 1 and tz is None:
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)]
)
def test_constructor_timedelta(self, closed, name, freq, periods):
start, end = Timedelta("0 days"), Timedelta("100 days")
breaks = timedelta_range(start=start, end=end, freq=freq)
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
# defined from start/end/freq
result = interval_range(
start=start, end=end, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from start/periods/freq
result = interval_range(
start=start, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# defined from end/periods/freq
result = interval_range(
end=end, periods=periods, freq=freq, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
# GH 20976: linspace behavior defined from start/end/periods
result = interval_range(
start=start, end=end, periods=periods, name=name, closed=closed
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"start, end, freq, expected_endpoint",
[
(0, 10, 3, 9),
(0, 10, 1.5, 9),
(0.5, 10, 3, 9.5),
(Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")),
(
Timestamp("2018-01-01"),
Timestamp("2018-02-09"),
"MS",
Timestamp("2018-02-01"),
),
(
Timestamp("2018-01-01", tz="US/Eastern"),
Timestamp("2018-01-20", tz="US/Eastern"),
"5D12h",
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
),
],
)
def test_early_truncation(self, start, end, freq, expected_endpoint):
# index truncates early if freq causes end to be skipped
result = interval_range(start=start, end=end, freq=freq)
result_endpoint = result.right[-1]
assert result_endpoint == expected_endpoint
@pytest.mark.parametrize(
"start, end, freq",
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
)
def test_no_invalid_float_truncation(self, start, end, freq):
# GH 21161
if freq is None:
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
else:
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
expected = IntervalIndex.from_breaks(breaks)
result = interval_range(start=start, end=end, periods=4, freq=freq)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"start, mid, end",
[
(
Timestamp("2018-03-10", tz="US/Eastern"),
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
Timestamp("2018-03-12", tz="US/Eastern"),
),
(
Timestamp("2018-11-03", tz="US/Eastern"),
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
Timestamp("2018-11-05", tz="US/Eastern"),
),
],
)
def test_linspace_dst_transition(self, start, mid, end):
# GH 20976: linspace behavior defined from start/end/periods
# accounts for the hour gained/lost during DST transition
start = start.as_unit("ns")
mid = mid.as_unit("ns")
end = end.as_unit("ns")
result = interval_range(start=start, end=end, periods=2)
expected = IntervalIndex.from_breaks([start, mid, end])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("freq", [2, 2.0])
@pytest.mark.parametrize("end", [10, 10.0])
@pytest.mark.parametrize("start", [0, 0.0])
def test_float_subtype(self, start, end, freq):
# Has float subtype if any of start/end/freq are float, even if all
# resulting endpoints can safely be upcast to integers
# defined from start/end/freq
index = interval_range(start=start, end=end, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(start + end + freq) else "float64"
assert result == expected
# defined from start/periods/freq
index = interval_range(start=start, periods=5, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(start + freq) else "float64"
assert result == expected
# defined from end/periods/freq
index = interval_range(end=end, periods=5, freq=freq)
result = index.dtype.subtype
expected = "int64" if is_integer(end + freq) else "float64"
assert result == expected
# GH 20976: linspace behavior defined from start/end/periods
index = interval_range(start=start, end=end, periods=5)
result = index.dtype.subtype
expected = "int64" if is_integer(start + end) else "float64"
assert result == expected
def test_interval_range_fractional_period(self):
# float value for periods
expected = interval_range(start=0, periods=10)
msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = interval_range(start=0, periods=10.5)
tm.assert_index_equal(result, expected)
def test_constructor_coverage(self):
# equivalent timestamp-like start/end
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timestamp
equiv_freq = [
"D",
Day(),
Timedelta(days=1),
timedelta(days=1),
DateOffset(days=1),
]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
# equivalent timedelta-like start/end
start, end = Timedelta(days=1), Timedelta(days=10)
expected = interval_range(start=start, end=end)
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
tm.assert_index_equal(result, expected)
result = interval_range(start=start.asm8, end=end.asm8)
tm.assert_index_equal(result, expected)
# equivalent freq with timedelta
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
for freq in equiv_freq:
result = interval_range(start=start, end=end, freq=freq)
tm.assert_index_equal(result, expected)
def test_errors(self):
# not enough params
msg = (
"Of the four parameters: start, end, periods, and freq, "
"exactly three must be specified"
)
with pytest.raises(ValueError, match=msg):
interval_range(start=0)
with pytest.raises(ValueError, match=msg):
interval_range(end=5)
with pytest.raises(ValueError, match=msg):
interval_range(periods=2)
with pytest.raises(ValueError, match=msg):
interval_range()
# too many params
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=5, periods=6, freq=1.5)
# mixed units
msg = "start, end, freq need to be type compatible"
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timestamp("20130101"), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=Timedelta("1 day"), freq=2)
with pytest.raises(TypeError, match=msg):
interval_range(start=0, end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(start=Timestamp("20130101"), end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
)
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
)
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta("1 day"), end=10, freq="D")
with pytest.raises(TypeError, match=msg):
interval_range(
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
)
with pytest.raises(TypeError, match=msg):
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
# invalid periods
msg = "periods must be a number, got foo"
with pytest.raises(TypeError, match=msg):
interval_range(start=0, periods="foo")
# invalid start
msg = "start must be numeric or datetime-like, got foo"
with pytest.raises(ValueError, match=msg):
interval_range(start="foo", periods=10)
# invalid end
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
with pytest.raises(ValueError, match=msg):
interval_range(end=Interval(0, 1), periods=10)
# invalid freq for datetime-like
msg = "freq must be numeric or convertible to DateOffset, got foo"
with pytest.raises(ValueError, match=msg):
interval_range(start=0, end=10, freq="foo")
with pytest.raises(ValueError, match=msg):
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
with pytest.raises(ValueError, match=msg):
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
# mixed tz
start = Timestamp("2017-01-01", tz="US/Eastern")
end = Timestamp("2017-01-07", tz="US/Pacific")
msg = "Start and end cannot both be tz-aware with different timezones"
with pytest.raises(TypeError, match=msg):
interval_range(start=start, end=end)
def test_float_freq(self):
# GH 54477
result = interval_range(0, 1, freq=0.1)
expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)])
tm.assert_index_equal(result, expected)
result = interval_range(0, 1, freq=0.6)
expected = IntervalIndex.from_breaks([0, 0.6])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,208 @@
from itertools import permutations
import numpy as np
import pytest
from pandas._libs.interval import IntervalTree
from pandas.compat import IS64
import pandas._testing as tm
def skipif_32bit(param):
"""
Skip parameters in a parametrize on 32bit systems. Specifically used
here to skip leaf_size parameters related to GH 23440.
"""
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
return pytest.param(param, marks=marks)
@pytest.fixture(params=["int64", "float64", "uint64"])
def dtype(request):
return request.param
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
def leaf_size(request):
"""
Fixture to specify IntervalTree leaf_size parameter; to be used with the
tree fixture.
"""
return request.param
@pytest.fixture(
params=[
np.arange(5, dtype="int64"),
np.arange(5, dtype="uint64"),
np.arange(5, dtype="float64"),
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
]
)
def tree(request, leaf_size):
left = request.param
return IntervalTree(left, left + 2, leaf_size=leaf_size)
class TestIntervalTree:
def test_get_indexer(self, tree):
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
expected = np.array([0, 4, -1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(
KeyError, match="'indexer does not intersect a unique set of intervals'"
):
tree.get_indexer(np.array([3.0]))
@pytest.mark.parametrize(
"dtype, target_value, target_dtype",
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
)
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
tree = IntervalTree(left, right)
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
expected = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_non_unique(self, tree):
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
result = indexer[:1]
expected = np.array([0], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[1:3])
expected = np.array([0, 1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = np.sort(indexer[3:])
expected = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([2], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"dtype, target_value, target_dtype",
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
)
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
tree = IntervalTree(left, right)
target = np.array([target_value], dtype=target_dtype)
result_indexer, result_missing = tree.get_indexer_non_unique(target)
expected_indexer = np.array([-1], dtype="intp")
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
expected_missing = np.array([0], dtype="intp")
tm.assert_numpy_array_equal(result_missing, expected_missing)
def test_duplicates(self, dtype):
left = np.array([0, 0, 0], dtype=dtype)
tree = IntervalTree(left, left + 1)
with pytest.raises(
KeyError, match="'indexer does not intersect a unique set of intervals'"
):
tree.get_indexer(np.array([0.5]))
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
result = np.sort(indexer)
expected = np.array([0, 1, 2], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
result = missing
expected = np.array([], dtype="intp")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
)
def test_get_indexer_closed(self, closed, leaf_size):
x = np.arange(1000, dtype="float64")
found = x.astype("intp")
not_found = (-1 * np.ones(1000)).astype("intp")
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
expected = found if tree.closed_left else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
expected = found if tree.closed_right else not_found
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
@pytest.mark.parametrize(
"left, right, expected",
[
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
],
)
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
def test_is_overlapping(self, closed, order, left, right, expected):
# GH 23309
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
assert result is expected
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
def test_is_overlapping_endpoints(self, closed, order):
"""shared endpoints are marked as overlapping"""
# GH 23309
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
tree = IntervalTree(left[order], right[order], closed=closed)
result = tree.is_overlapping
expected = closed == "both"
assert result is expected
@pytest.mark.parametrize(
"left, right",
[
(np.array([], dtype="int64"), np.array([], dtype="int64")),
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
(np.array([np.nan]), np.array([np.nan])),
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
],
)
def test_is_overlapping_trivial(self, closed, left, right):
# GH 23309
tree = IntervalTree(left, right, closed=closed)
assert tree.is_overlapping is False
@pytest.mark.skipif(not IS64, reason="GH 23440")
def test_construction_overflow(self):
# GH 25485
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
tree = IntervalTree(left, right)
# pivot should be average of left/right medians
result = tree.root.pivot
expected = (50 + np.iinfo(np.int64).max) / 2
assert result == expected
@pytest.mark.parametrize(
"left, right, expected",
[
([-np.inf, 1.0], [1.0, 2.0], 0.0),
([-np.inf, -2.0], [-2.0, -1.0], -2.0),
([-2.0, -1.0], [-1.0, np.inf], 0.0),
([1.0, 2.0], [2.0, np.inf], 2.0),
],
)
def test_inf_bound_infinite_recursion(self, left, right, expected):
# GH 46658
tree = IntervalTree(left * 101, right * 101)
result = tree.root.pivot
assert result == expected

View File

@ -0,0 +1,44 @@
import pytest
from pandas import (
IntervalIndex,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
@pytest.fixture
def range_index():
return RangeIndex(3, name="range_index")
@pytest.fixture
def interval_index():
return IntervalIndex.from_tuples(
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
)
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
# GH-45661
multi_index = MultiIndex.from_product([interval_index, range_index])
result = multi_index.join(interval_index)
tm.assert_index_equal(result, multi_index)
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
# GH-45661
multi_index = MultiIndex.from_product([interval_index, range_index])
result = interval_index.join(multi_index)
tm.assert_index_equal(result, multi_index)
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
# GH-45661
flipped_interval_index = interval_index[::-1]
result = interval_index.join(flipped_interval_index)
tm.assert_index_equal(result, interval_index)

View File

@ -0,0 +1,13 @@
import pytest
from pandas import IntervalIndex
import pandas._testing as tm
class TestPickle:
@pytest.mark.parametrize("closed", ["left", "right", "both"])
def test_pickle_round_trip_closed(self, closed):
# https://github.com/pandas-dev/pandas/issues/35658
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)

View File

@ -0,0 +1,208 @@
import numpy as np
import pytest
from pandas import (
Index,
IntervalIndex,
Timestamp,
interval_range,
)
import pandas._testing as tm
def monotonic_index(start, end, dtype="int64", closed="right"):
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
def empty_index(dtype="int64", closed="right"):
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
class TestIntervalIndex:
def test_union(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
other = monotonic_index(5, 13, closed=closed)
expected = monotonic_index(0, 13, closed=closed)
result = index[::-1].union(other, sort=sort)
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
result = other[::-1].union(index, sort=sort)
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
tm.assert_index_equal(index.union(index, sort=sort), index)
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
def test_union_empty_result(self, closed, sort):
# GH 19101: empty result, same dtype
index = empty_index(dtype="int64", closed=closed)
result = index.union(index, sort=sort)
tm.assert_index_equal(result, index)
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
other = empty_index(dtype="float64", closed=closed)
result = index.union(other, sort=sort)
expected = other
tm.assert_index_equal(result, expected)
other = index.union(index, sort=sort)
tm.assert_index_equal(result, expected)
other = empty_index(dtype="uint64", closed=closed)
result = index.union(other, sort=sort)
tm.assert_index_equal(result, expected)
result = other.union(index, sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
other = monotonic_index(5, 13, closed=closed)
expected = monotonic_index(5, 11, closed=closed)
result = index[::-1].intersection(other, sort=sort)
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
result = other[::-1].intersection(index, sort=sort)
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
tm.assert_index_equal(index.intersection(index, sort=sort), index)
# GH 26225: nested intervals
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
# GH 26225
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
expected = IntervalIndex.from_tuples([(0, 2)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
# GH 26225: duplicate nan element
index = IntervalIndex([np.nan, np.nan])
other = IntervalIndex([np.nan])
expected = IntervalIndex([np.nan])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
def test_intersection_empty_result(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
# GH 19101: empty result, same dtype
other = monotonic_index(300, 314, closed=closed)
expected = empty_index(dtype="int64", closed=closed)
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
other = monotonic_index(300, 314, dtype="float64", closed=closed)
result = index.intersection(other, sort=sort)
expected = other[:0]
tm.assert_index_equal(result, expected)
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection_duplicates(self):
# GH#38743
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
result = index.intersection(other)
tm.assert_index_equal(result, expected)
def test_difference(self, closed, sort):
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
result = index.difference(index[:1], sort=sort)
expected = index[1:]
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
# GH 19101: empty result, same dtype
result = index.difference(index, sort=sort)
expected = empty_index(dtype="int64", closed=closed)
tm.assert_index_equal(result, expected)
# GH 19101: empty result, different dtypes
other = IntervalIndex.from_arrays(
index.left.astype("float64"), index.right, closed=closed
)
result = index.difference(other, sort=sort)
tm.assert_index_equal(result, expected)
def test_symmetric_difference(self, closed, sort):
index = monotonic_index(0, 11, closed=closed)
result = index[1:].symmetric_difference(index[:-1], sort=sort)
expected = IntervalIndex([index[0], index[-1]])
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
# GH 19101: empty result, same dtype
result = index.symmetric_difference(index, sort=sort)
expected = empty_index(dtype="int64", closed=closed)
if sort in (None, True):
tm.assert_index_equal(result, expected)
else:
tm.assert_index_equal(result.sort_values(), expected)
# GH 19101: empty result, different dtypes
other = IntervalIndex.from_arrays(
index.left.astype("float64"), index.right, closed=closed
)
result = index.symmetric_difference(other, sort=sort)
expected = empty_index(dtype="float64", closed=closed)
tm.assert_index_equal(result, expected)
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
@pytest.mark.parametrize(
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_set_incompatible_types(self, closed, op_name, sort):
index = monotonic_index(0, 11, closed=closed)
set_op = getattr(index, op_name)
# TODO: standardize return type of non-union setops type(self vs other)
# non-IntervalIndex
if op_name == "difference":
expected = index
else:
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
result = set_op(Index([1, 2, 3]), sort=sort)
tm.assert_index_equal(result, expected)
# mixed closed -> cast to object
for other_closed in {"right", "left", "both", "neither"} - {closed}:
other = monotonic_index(0, 11, closed=other_closed)
expected = getattr(index.astype(object), op_name)(other, sort=sort)
if op_name == "difference":
expected = index
result = set_op(other, sort=sort)
tm.assert_index_equal(result, expected)
# GH 19016: incompatible dtypes -> cast to object
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
expected = getattr(index.astype(object), op_name)(other, sort=sort)
if op_name == "difference":
expected = index
result = set_op(other, sort=sort)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,27 @@
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
)
# Note: identical the "multi" entry in the top-level "index" fixture
@pytest.fixture
def idx():
# a MultiIndex used to test the general functionality of the
# general functionality of this object
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi

View File

@ -0,0 +1,263 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
date_range,
period_range,
)
import pandas._testing as tm
def test_infer_objects(idx):
with pytest.raises(NotImplementedError, match="to_frame"):
idx.infer_objects()
def test_shift(idx):
# GH8083 test the base class for shift
msg = (
"This method is only implemented for DatetimeIndex, PeriodIndex and "
"TimedeltaIndex; Got type MultiIndex"
)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1)
with pytest.raises(NotImplementedError, match=msg):
idx.shift(1, 2)
def test_groupby(idx):
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = idx.tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = idx.groupby(idx)
exp = {key: [key] for key in idx}
tm.assert_dict_equal(groups, exp)
def test_truncate_multiindex():
# GH 34564 for MultiIndex level names check
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=["L1", "L2"],
)
result = index.truncate(before=1)
assert "foo" not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
assert index.names == result.names
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
assert index.names == result.names
msg = "after < before"
with pytest.raises(ValueError, match=msg):
index.truncate(3, 1)
# TODO: reshape
def test_reorder_levels(idx):
# this blows up
with pytest.raises(IndexError, match="^Too many levels"):
idx.reorder_levels([2, 1, 0])
def test_numpy_repeat():
reps = 2
numbers = [1, 2, 3]
names = np.array(["foo", "bar"])
m = MultiIndex.from_product([numbers, names], names=names)
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(m, reps, axis=1)
def test_append_mixed_dtypes():
# GH 13660
dti = date_range("2011-01-01", freq="ME", periods=3)
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
pi = period_range("2011-01", freq="M", periods=3)
mi = MultiIndex.from_arrays(
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
)
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
["a", "b", "c", "a", "b", "c"],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi),
]
)
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays(
[
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
["x", "y", "z"],
]
)
res = mi.append(other)
exp = MultiIndex.from_arrays(
[
[1, 2, 3, "x", "y", "z"],
[1.1, np.nan, 3.3, "x", "y", "z"],
["a", "b", "c", "x", "y", "z"],
dti.append(Index(["x", "y", "z"])),
dti_tz.append(Index(["x", "y", "z"])),
pi.append(Index(["x", "y", "z"])),
]
)
tm.assert_index_equal(res, exp)
def test_iter(idx):
result = list(idx)
expected = [
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
]
assert result == expected
def test_sub(idx):
first = idx
# - now raises (previously was set op difference)
msg = "cannot perform __sub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first - idx[-3:]
with pytest.raises(TypeError, match=msg):
idx[-3:] - first
with pytest.raises(TypeError, match=msg):
idx[-3:] - first.tolist()
msg = "cannot perform __rsub__ with this index type: MultiIndex"
with pytest.raises(TypeError, match=msg):
first.tolist() - idx[-3:]
def test_map(idx):
# callable
index = idx
result = index.map(lambda x: x)
tm.assert_index_equal(result, index)
@pytest.mark.parametrize(
"mapper",
[
lambda values, idx: {i: e for e, i in zip(values, idx)},
lambda values, idx: pd.Series(values, idx),
],
)
def test_map_dictlike(idx, mapper):
identity = mapper(idx.values, idx)
# we don't infer to uint64 dtype for a dict
if idx.dtype == np.uint64 and isinstance(identity, dict):
expected = idx.astype("int64")
else:
expected = idx
result = idx.map(identity)
tm.assert_index_equal(result, expected)
# empty mappable
expected = Index([np.nan] * len(idx))
result = idx.map(mapper(expected, idx))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"func",
[
np.exp,
np.exp2,
np.expm1,
np.log,
np.log2,
np.log10,
np.log1p,
np.sqrt,
np.sin,
np.cos,
np.tan,
np.arcsin,
np.arccos,
np.arctan,
np.sinh,
np.cosh,
np.tanh,
np.arcsinh,
np.arccosh,
np.arctanh,
np.deg2rad,
np.rad2deg,
],
ids=lambda func: func.__name__,
)
def test_numpy_ufuncs(idx, func):
# test ufuncs of numpy. see:
# https://numpy.org/doc/stable/reference/ufuncs.html
expected_exception = TypeError
msg = (
"loop of ufunc does not support argument 0 of type tuple which "
f"has no callable {func.__name__} method"
)
with pytest.raises(expected_exception, match=msg):
func(idx)
@pytest.mark.parametrize(
"func",
[np.isfinite, np.isinf, np.isnan, np.signbit],
ids=lambda func: func.__name__,
)
def test_numpy_type_funcs(idx, func):
msg = (
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
"could not be safely coerced to any supported types according to "
"the casting rule ''safe''"
)
with pytest.raises(TypeError, match=msg):
func(idx)

View File

@ -0,0 +1,30 @@
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas._testing as tm
def test_astype(idx):
expected = idx.copy()
actual = idx.astype("O")
tm.assert_copy(actual.levels, expected.levels)
tm.assert_copy(actual.codes, expected.codes)
assert actual.names == list(expected.names)
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
idx.astype(np.dtype(int))
@pytest.mark.parametrize("ordered", [True, False])
def test_astype_category(idx, ordered):
# GH 18630
msg = "> 1 ndim Categorical are not supported at this time"
with pytest.raises(NotImplementedError, match=msg):
idx.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with pytest.raises(NotImplementedError, match=msg):
idx.astype("category")

View File

@ -0,0 +1,122 @@
import numpy as np
import pytest
import pandas as pd
from pandas import MultiIndex
import pandas._testing as tm
def test_numeric_compat(idx):
with pytest.raises(TypeError, match="cannot perform __mul__"):
idx * 1
with pytest.raises(TypeError, match="cannot perform __rmul__"):
1 * idx
div_err = "cannot perform __truediv__"
with pytest.raises(TypeError, match=div_err):
idx / 1
div_err = div_err.replace(" __", " __r")
with pytest.raises(TypeError, match=div_err):
1 / idx
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
idx // 1
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
1 // idx
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
def test_logical_compat(idx, method):
msg = f"cannot perform {method}"
with pytest.raises(TypeError, match=msg):
getattr(idx, method)()
def test_inplace_mutation_resets_values():
levels = [["a", "b", "c"], [4]]
levels2 = [[1, 2, 3], ["a"]]
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, codes=codes)
mi2 = MultiIndex(levels=levels2, codes=codes)
# instantiating MultiIndex should not access/cache _.values
assert "_values" not in mi1._cache
assert "_values" not in mi2._cache
vals = mi1.values.copy()
vals2 = mi2.values.copy()
# accessing .values should cache ._values
assert mi1._values is mi1._cache["_values"]
assert mi1.values is mi1._cache["_values"]
assert isinstance(mi1._cache["_values"], np.ndarray)
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Doesn't drop _values from _cache [implementation detail]
tm.assert_almost_equal(mi1._cache["_values"], vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Make sure label setting works too
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(1, "a")] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_mi = mi2.set_codes(codes2)
assert "_values" not in new_mi._cache
new_values = new_mi.values
assert "_values" in new_mi._cache
# Shouldn't change cache
tm.assert_almost_equal(mi2._cache["_values"], vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
def test_boxable_categorical_values():
cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h"))
result = MultiIndex.from_product([["a", "b", "c"], cat]).values
expected = pd.Series(
[
("a", pd.Timestamp("2012-01-01 00:00:00")),
("a", pd.Timestamp("2012-01-01 01:00:00")),
("a", pd.Timestamp("2012-01-01 02:00:00")),
("b", pd.Timestamp("2012-01-01 00:00:00")),
("b", pd.Timestamp("2012-01-01 01:00:00")),
("b", pd.Timestamp("2012-01-01 02:00:00")),
("c", pd.Timestamp("2012-01-01 00:00:00")),
("c", pd.Timestamp("2012-01-01 01:00:00")),
("c", pd.Timestamp("2012-01-01 02:00:00")),
]
).values
tm.assert_numpy_array_equal(result, expected)
result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values
expected = pd.DataFrame(
{
"a": ["a", "b", "c"],
"b": [
pd.Timestamp("2012-01-01 00:00:00"),
pd.Timestamp("2012-01-01 01:00:00"),
pd.Timestamp("2012-01-01 02:00:00"),
],
"c": [
pd.Timestamp("2012-01-01 00:00:00"),
pd.Timestamp("2012-01-01 01:00:00"),
pd.Timestamp("2012-01-01 02:00:00"),
],
}
).values
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,860 @@
from datetime import (
date,
datetime,
)
import itertools
import numpy as np
import pytest
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
def test_constructor_single_level():
result = MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
assert isinstance(result, MultiIndex)
expected = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["first"]
def test_constructor_no_levels():
msg = "non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=[], codes=[])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=[])
with pytest.raises(TypeError, match=msg):
MultiIndex(codes=[])
def test_constructor_nonhashable_names():
# GH 20527
levels = [[1, 2], ["one", "two"]]
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = (["foo"], ["bar"])
msg = r"MultiIndex\.name must be a hashable type"
with pytest.raises(TypeError, match=msg):
MultiIndex(levels=levels, codes=codes, names=names)
# With .rename()
mi = MultiIndex(
levels=[[1, 2], ["one", "two"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=("foo", "bar"),
)
renamed = [["fooo"], ["barr"]]
with pytest.raises(TypeError, match=msg):
mi.rename(names=renamed)
# With .set_names()
with pytest.raises(TypeError, match=msg):
mi.set_names(names=renamed)
def test_constructor_mismatched_codes_levels(idx):
codes = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
msg = "Length of levels and codes must be the same"
with pytest.raises(ValueError, match=msg):
MultiIndex(levels=levels, codes=codes)
length_error = (
r"On level 0, code max \(3\) >= length of level \(1\)\. "
"NOTE: this index is in an inconsistent state"
)
label_error = r"Unequal code lengths: \[4, 2\]"
code_value_error = r"On level 0, code value \(-2\) < -1"
# important to check that it's looking at the right thing.
with pytest.raises(ValueError, match=length_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
with pytest.raises(ValueError, match=label_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
# external API
with pytest.raises(ValueError, match=length_error):
idx.copy().set_levels([["a"], ["b"]])
with pytest.raises(ValueError, match=label_error):
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
# test set_codes with verify_integrity=False
# the setting should not raise any value error
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
# code value smaller than -1
with pytest.raises(ValueError, match=code_value_error):
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
def test_na_levels():
# GH26408
# test if codes are re-assigned value -1 for levels
# with missing values (NaN, NaT, None)
result = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
)
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
)
expected = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
)
tm.assert_index_equal(result, expected)
# verify set_levels and set_codes
result = MultiIndex(
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
tm.assert_index_equal(result, expected)
result = MultiIndex(
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
).set_codes([[0, -1, 1, 2, 3, 4]])
tm.assert_index_equal(result, expected)
def test_copy_in_constructor():
levels = np.array(["a", "b", "c"])
codes = np.array([1, 1, 2, 0, 0, 1, 1])
val = codes[0]
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
assert mi.codes[0][0] == val
codes[0] = 15
assert mi.codes[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
# ----------------------------------------------------------------------------
# from_arrays
# ----------------------------------------------------------------------------
def test_from_arrays(idx):
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
assert result.levels[0].equals(Index([Timestamp("20130101")]))
assert result.levels[1].equals(Index(["a", "b"]))
def test_from_arrays_iterator(idx):
# GH 18434
arrays = [
np.asarray(lev).take(level_codes)
for lev, level_codes in zip(idx.levels, idx.codes)
]
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
tm.assert_index_equal(result, idx)
# invalid iterator input
msg = "Input must be a list / sequence of array-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(0)
def test_from_arrays_tuples(idx):
arrays = tuple(
tuple(np.asarray(lev).take(level_codes))
for lev, level_codes in zip(idx.levels, idx.codes)
)
# tuple of tuples as input
result = MultiIndex.from_arrays(arrays, names=idx.names)
tm.assert_index_equal(result, idx)
@pytest.mark.parametrize(
("idx1", "idx2"),
[
(
pd.period_range("2011-01-01", freq="D", periods=3),
pd.period_range("2015-01-01", freq="h", periods=3),
),
(
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
),
(
pd.timedelta_range("1 days", freq="D", periods=3),
pd.timedelta_range("2 hours", freq="h", periods=3),
),
],
)
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed():
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = MultiIndex.from_arrays(
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
)
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical():
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
result = MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
assert isinstance(result, MultiIndex)
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list("ABC")[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_sequence_of_arrays",
[
1,
[1],
[1, 2],
[[1], 2],
[1, [2]],
"a",
["a"],
["a", "b"],
[["a"], "b"],
(1,),
(1, 2),
([1], 2),
(1, [2]),
"a",
("a",),
("a", "b"),
(["a"], "b"),
[(1,), 2],
[1, (2,)],
[("a",), "b"],
((1,), 2),
(1, (2,)),
(("a",), "b"),
],
)
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
msg = "Input must be a list / sequence of array-likes"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
@pytest.mark.parametrize(
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
)
def test_from_arrays_different_lengths(idx1, idx2):
# see gh-13599
msg = "^all arrays must be same length$"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_arrays([idx1, idx2])
def test_from_arrays_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b", "c"], name="bar")
result = MultiIndex.from_arrays([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_tuples
# ----------------------------------------------------------------------------
def test_from_tuples():
msg = "Cannot infer number of levels from empty list"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples([])
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator():
# GH 18434
# input iterator for tuples
expected = MultiIndex(
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
)
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
tm.assert_index_equal(result, expected)
# input non-iterables
msg = "Input must be a list / sequence of tuple-likes."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_tuples(0)
def test_from_tuples_empty():
# GH 16777
result = MultiIndex.from_tuples([], names=["a", "b"])
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(result, expected)
def test_from_tuples_index_values(idx):
result = MultiIndex.from_tuples(idx)
assert (result.values == idx.values).all()
def test_tuples_with_name_string():
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
msg = "Names should be list-like for a MultiIndex"
with pytest.raises(ValueError, match=msg):
Index(li, name="abc")
with pytest.raises(ValueError, match=msg):
Index(li, name="a")
def test_from_tuples_with_tuple_label():
# GH 15457
expected = pd.DataFrame(
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
).set_index(["a", "b"])
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
tm.assert_frame_equal(expected, result)
# ----------------------------------------------------------------------------
# from_product
# ----------------------------------------------------------------------------
def test_from_product_empty_zero_levels():
# 0 levels
msg = "Must pass non-zero number of levels/codes"
with pytest.raises(ValueError, match=msg):
MultiIndex.from_product([])
def test_from_product_empty_one_level():
result = MultiIndex.from_product([[]], names=["A"])
expected = Index([], name="A")
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ["A"]
@pytest.mark.parametrize(
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
)
def test_from_product_empty_two_levels(first, second):
names = ["A", "B"]
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("N", list(range(4)))
def test_from_product_empty_three_levels(N):
# GH12258
names = ["A", "B", "C"]
lvl2 = list(range(N))
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
)
def test_from_product_invalid_input(invalid_input):
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(iterables=invalid_input)
def test_from_product_datetimeindex():
dt_index = date_range("2000-01-01", periods=2)
mi = MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike(
[
(1, Timestamp("2000-01-01")),
(1, Timestamp("2000-01-02")),
(2, Timestamp("2000-01-01")),
(2, Timestamp("2000-01-02")),
]
)
tm.assert_numpy_array_equal(mi.values, etalon)
def test_from_product_rangeindex():
# RangeIndex is preserved by factorize, so preserved in levels
rng = Index(range(5))
other = ["a", "b"]
mi = MultiIndex.from_product([rng, other])
tm.assert_index_equal(mi._levels[0], rng, exact=True)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
def test_from_product_index_series_categorical(ordered, f):
# GH13743
first = ["foo", "bar"]
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
expected = pd.CategoricalIndex(
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
)
result = MultiIndex.from_product([first, f(idx)])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_from_product():
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
result = MultiIndex.from_product([first, second], names=names)
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator():
# GH 18434
first = ["foo", "bar", "buz"]
second = ["a", "b", "c"]
names = ["first", "second"]
tuples = [
("foo", "a"),
("foo", "b"),
("foo", "c"),
("bar", "a"),
("bar", "b"),
("bar", "c"),
("buz", "a"),
("buz", "b"),
("buz", "c"),
]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
msg = "Input must be a list / sequence of iterables."
with pytest.raises(TypeError, match=msg):
MultiIndex.from_product(0)
@pytest.mark.parametrize(
"a, b, expected_names",
[
(
Series([1, 2, 3], name="foo"),
Series(["a", "b"], name="bar"),
["foo", "bar"],
),
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
([1, 2, 3], ["a", "b"], None),
],
)
def test_from_product_infer_names(a, b, expected_names):
# GH27292
result = MultiIndex.from_product([a, b])
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=expected_names,
)
tm.assert_index_equal(result, expected)
def test_from_product_respects_none_names():
# GH27292
a = Series([1, 2, 3], name="foo")
b = Series(["a", "b"], name="bar")
result = MultiIndex.from_product([a, b], names=None)
expected = MultiIndex(
levels=[[1, 2, 3], ["a", "b"]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=None,
)
tm.assert_index_equal(result, expected)
def test_from_product_readonly():
# GH#15286 passing read-only array to from_product
a = np.array(range(3))
b = ["a", "b"]
expected = MultiIndex.from_product([a, b])
a.setflags(write=False)
result = MultiIndex.from_product([a, b])
tm.assert_index_equal(result, expected)
def test_create_index_existing_name(idx):
# GH11193, when an existing index is passed, and a new name is not
# specified, the new index should inherit the previous object name
index = idx
index.names = ["foo", "bar"]
result = Index(index)
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
)
)
tm.assert_index_equal(result, expected)
result = Index(index, name="A")
expected = Index(
Index(
[
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
],
dtype="object",
),
name="A",
)
tm.assert_index_equal(result, expected)
# ----------------------------------------------------------------------------
# from_frame
# ----------------------------------------------------------------------------
def test_from_frame():
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
)
expected = MultiIndex.from_tuples(
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
)
result = MultiIndex.from_frame(df)
tm.assert_index_equal(expected, result)
def test_from_frame_missing_values_multiIndex():
# GH 39984
pa = pytest.importorskip("pyarrow")
df = pd.DataFrame(
{
"a": Series([1, 2, None], dtype="Int64"),
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
}
)
multi_indexed = MultiIndex.from_frame(df)
expected = MultiIndex.from_arrays(
[
Series([1, 2, None]).astype("Int64"),
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
],
names=["a", "b"],
)
tm.assert_index_equal(multi_indexed, expected)
@pytest.mark.parametrize(
"non_frame",
[
Series([1, 2, 3, 4]),
[1, 2, 3, 4],
[[1, 2], [3, 4], [5, 6]],
Index([1, 2, 3, 4]),
np.array([[1, 2], [3, 4], [5, 6]]),
27,
],
)
def test_from_frame_error(non_frame):
# GH 22420
with pytest.raises(TypeError, match="Input must be a DataFrame"):
MultiIndex.from_frame(non_frame)
def test_from_frame_dtype_fidelity():
# GH 22420
df = pd.DataFrame(
{
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
original_dtypes = df.dtypes.to_dict()
expected_mi = MultiIndex.from_arrays(
[
date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
mi = MultiIndex.from_frame(df)
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
tm.assert_index_equal(expected_mi, mi)
assert original_dtypes == mi_dtypes
@pytest.mark.parametrize(
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
)
def test_from_frame_valid_names(names_in, names_out):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
mi = MultiIndex.from_frame(df, names=names_in)
assert mi.names == names_out
@pytest.mark.parametrize(
"names,expected_error_msg",
[
("bad_input", "Names should be list-like for a MultiIndex"),
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
],
)
def test_from_frame_invalid_names(names, expected_error_msg):
# GH 22420
df = pd.DataFrame(
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
)
with pytest.raises(ValueError, match=expected_error_msg):
MultiIndex.from_frame(df, names=names)
def test_index_equal_empty_iterable():
# #16844
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
tm.assert_index_equal(a, b)
def test_raise_invalid_sortorder():
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
# GH#28518
levels = [[0, 1], [0, 1, 2]]
# Correct sortorder
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
)
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
MultiIndex(
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
)
def test_datetimeindex():
idx1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
)
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
idx = MultiIndex.from_arrays([idx1, idx2])
expected1 = pd.DatetimeIndex(
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
)
tm.assert_index_equal(idx.levels[0], expected1)
tm.assert_index_equal(idx.levels[1], idx2)
# from datetime combos
# GH 7888
date1 = np.datetime64("today")
date2 = datetime.today()
date3 = Timestamp.today()
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
index = MultiIndex.from_product([[d1], [d2]])
assert isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
# but NOT date objects, matching Index behavior
date4 = date.today()
index = MultiIndex.from_product([[date4], [date2]])
assert not isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)
def test_constructor_with_tz():
index = pd.DatetimeIndex(
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
)
columns = pd.DatetimeIndex(
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
)
result = MultiIndex.from_arrays([index, columns])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
result = MultiIndex.from_arrays([Series(index), Series(columns)])
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)
def test_multiindex_inference_consistency():
# check that inference behavior matches the base class
v = date.today()
arr = [v, v]
idx = Index(arr)
assert idx.dtype == object
mi = MultiIndex.from_arrays([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_product([arr])
lev = mi.levels[0]
assert lev.dtype == object
mi = MultiIndex.from_tuples([(x,) for x in arr])
lev = mi.levels[0]
assert lev.dtype == object
def test_dtype_representation(using_infer_string):
# GH#46900
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
result = pmidx.dtypes
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
expected = Series(
["int64", exp],
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
dtype=object,
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,201 @@
import numpy as np
import pytest
from pandas.compat.numpy import np_version_gt2
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
)
import pandas._testing as tm
def test_to_numpy(idx):
result = idx.to_numpy()
exp = idx.values
tm.assert_numpy_array_equal(result, exp)
def test_array_interface(idx):
# https://github.com/pandas-dev/pandas/pull/60046
result = np.asarray(idx)
expected = np.empty((6,), dtype=object)
expected[:] = [
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
]
tm.assert_numpy_array_equal(result, expected)
# it always gives a copy by default, but the values are cached, so results
# are still sharing memory
result_copy1 = np.asarray(idx)
result_copy2 = np.asarray(idx)
assert np.may_share_memory(result_copy1, result_copy2)
# with explicit copy=True, then it is an actual copy
result_copy1 = np.array(idx, copy=True)
result_copy2 = np.array(idx, copy=True)
assert not np.may_share_memory(result_copy1, result_copy2)
if not np_version_gt2:
# copy=False semantics are only supported in NumPy>=2.
return
# for MultiIndex, copy=False is never allowed
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
with tm.assert_produces_warning(FutureWarning, match=msg):
np.array(idx, copy=False)
def test_to_frame():
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(tuples)
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
expected.columns = ["first", "second"]
tm.assert_frame_equal(result, expected)
msg = "'name' must be a list / sequence of column names."
with pytest.raises(TypeError, match=msg):
index.to_frame(name="first")
msg = "'name' should have same length as number of levels on index."
with pytest.raises(ValueError, match=msg):
index.to_frame(name=["first"])
# Tests for datetime index
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{
0: np.repeat(np.arange(5, dtype="int64"), 3),
1: np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
# See GH-22580
result = index.to_frame(index=False, name=["first", "second"])
expected = DataFrame(
{
"first": np.repeat(np.arange(5, dtype="int64"), 3),
"second": np.tile(pd.date_range("20130101", periods=3), 5),
}
)
tm.assert_frame_equal(result, expected)
result = index.to_frame(name=["first", "second"])
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_frame_dtype_fidelity():
# GH 22420
mi = MultiIndex.from_arrays(
[
pd.date_range("19910905", periods=6, tz="US/Eastern"),
[1, 1, 1, 2, 2, 2],
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
["x", "x", "y", "z", "x", "y"],
],
names=["dates", "a", "b", "c"],
)
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
expected_df = DataFrame(
{
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
"a": [1, 1, 1, 2, 2, 2],
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
"c": ["x", "x", "y", "z", "x", "y"],
}
)
df = mi.to_frame(index=False)
df_dtypes = df.dtypes.to_dict()
tm.assert_frame_equal(df, expected_df)
assert original_dtypes == df_dtypes
def test_to_frame_resulting_column_order():
# GH 22420
expected = ["z", 0, "a"]
mi = MultiIndex.from_arrays(
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
)
result = mi.to_frame().columns.tolist()
assert result == expected
def test_to_frame_duplicate_labels():
# GH 45245
data = [(1, 2), (3, 4)]
names = ["a", "a"]
index = MultiIndex.from_tuples(data, names=names)
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
index.to_frame()
result = index.to_frame(allow_duplicates=True)
expected = DataFrame(data, index=index, columns=names)
tm.assert_frame_equal(result, expected)
names = [None, 0]
index = MultiIndex.from_tuples(data, names=names)
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
index.to_frame()
result = index.to_frame(allow_duplicates=True)
expected = DataFrame(data, index=index, columns=[0, 0])
tm.assert_frame_equal(result, expected)
def test_to_flat_index(idx):
expected = pd.Index(
(
("foo", "one"),
("foo", "two"),
("bar", "one"),
("baz", "two"),
("qux", "one"),
("qux", "two"),
),
tupleize_cols=False,
)
result = idx.to_flat_index()
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,96 @@
from copy import (
copy,
deepcopy,
)
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def assert_multiindex_copied(copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.codes, original.codes)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.codes, original.codes)
assert copy.codes is not original.codes
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(idx):
i_copy = idx.copy()
assert_multiindex_copied(i_copy, idx)
def test_shallow_copy(idx):
i_copy = idx._view()
assert_multiindex_copied(i_copy, idx)
def test_view(idx):
i_view = idx.view()
assert_multiindex_copied(i_view, idx)
@pytest.mark.parametrize("func", [copy, deepcopy])
def test_copy_and_deepcopy(func):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = func(idx)
assert idx_copy is not idx
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
def test_copy_method(deep):
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(deep=deep)
assert idx_copy.equals(idx)
@pytest.mark.parametrize("deep", [True, False])
@pytest.mark.parametrize(
"kwarg, value",
[
("names", ["third", "fourth"]),
],
)
def test_copy_method_kwargs(deep, kwarg, value):
# gh-12309: Check that the "name" argument as well other kwargs are honored
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
assert getattr(idx_copy, kwarg) == value
def test_copy_deep_false_retains_id():
# GH#47878
idx = MultiIndex(
levels=[["foo", "bar"], ["fizz", "buzz"]],
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
names=["first", "second"],
)
res = idx.copy(deep=False)
assert res._id is idx._id

View File

@ -0,0 +1,190 @@
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_drop(idx):
dropped = idx.drop([("foo", "two"), ("qux", "one")])
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
dropped2 = idx.drop(index)
expected = idx[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = idx.drop(["bar"])
expected = idx[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop("foo")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop([("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop(index)
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(["foo", "two"])
# partially correct argument
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
idx.drop(mixed_index)
# error='ignore'
dropped = idx.drop(index, errors="ignore")
expected = idx[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = idx.drop(["foo", "two"], errors="ignore")
expected = idx[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = idx.drop(["foo", ("qux", "one")])
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ["foo", ("qux", "one"), "two"]
with pytest.raises(KeyError, match=r"^'two'$"):
idx.drop(mixed_index)
dropped = idx.drop(mixed_index, errors="ignore")
expected = idx[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(idx):
index = idx[idx.get_loc("foo")]
dropped = index.droplevel(0)
assert dropped.name == "second"
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index.droplevel(0)
assert dropped.names == ("two", "three")
dropped = index.droplevel("two")
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_list():
index = MultiIndex(
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
names=["one", "two", "three"],
)
dropped = index[:2].droplevel(["three", "one"])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
dropped = index[:2].droplevel([])
expected = index[:2]
assert dropped.equals(expected)
msg = (
"Cannot remove 3 levels from an index with 3 levels: "
"at least one level must be left"
)
with pytest.raises(ValueError, match=msg):
index[:2].droplevel(["one", "two", "three"])
with pytest.raises(KeyError, match="'Level four not found'"):
index[:2].droplevel(["one", "four"])
def test_drop_not_lexsorted():
# GH 12078
# define the lexsorted version of the multi-index
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
assert lexsorted_mi._is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
)
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi._is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
def test_drop_with_nan_in_index(nulls_fixture):
# GH#18853
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(pd.Timestamp("2001"), level="date")
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_drop_with_non_monotonic_duplicates():
# GH#33494
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
result = mi.drop((1, 2))
expected = MultiIndex.from_tuples([(2, 3)])
tm.assert_index_equal(result, expected)
def test_single_level_drop_partially_missing_elements():
# GH 37820
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
msg = r"labels \[4\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop(4, level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([1, 4], level=0)
msg = r"labels \[nan\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan], level=0)
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, 2, 3], level=0)
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
msg = r"labels \['a'\] not found in level"
with pytest.raises(KeyError, match=msg):
mi.drop([np.nan, 1, "a"], level=0)
def test_droplevel_multiindex_one_level():
# GH#37208
index = MultiIndex.from_tuples([(2,)], names=("b",))
result = index.droplevel([])
expected = Index([2], name="b")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,363 @@
from itertools import product
import numpy as np
import pytest
from pandas._libs import (
hashtable,
index as libindex,
)
from pandas import (
NA,
DatetimeIndex,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def idx_dup():
# compare tests/indexes/multi/conftest.py
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 1, 0, 1, 1])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"]
mi = MultiIndex(
levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes],
names=index_names,
verify_integrity=False,
)
return mi
@pytest.mark.parametrize("names", [None, ["first", "second"]])
def test_unique(names):
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
tm.assert_index_equal(res, exp)
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
res = mi.unique()
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
def test_unique_datetimelike():
idx1 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
)
idx2 = DatetimeIndex(
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
tz="Asia/Tokyo",
)
result = MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
eidx2 = DatetimeIndex(
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
)
exp = MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
def test_unique_level(idx, level):
# GH #17896 - with level= argument
result = idx.unique(level=level)
expected = idx.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
def test_duplicate_multiindex_codes():
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
with pytest.raises(ValueError, match=msg):
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]])
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
def test_duplicate_level_names(names):
# GH18872, GH19029
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_meta_data():
# GH 10115
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
for idx in [
mi,
mi.set_names([None, None]),
mi.set_names([None, "Num"]),
mi.set_names(["Upper", "Num"]),
]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_has_duplicates(idx, idx_dup):
# see fixtures
assert idx.is_unique is True
assert idx.has_duplicates is False
assert idx_dup.is_unique is False
assert idx_dup.has_duplicates is True
mi = MultiIndex(
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
)
assert mi.is_unique is False
assert mi.has_duplicates is True
# single instance of NaN
mi_nan = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
)
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False
# multiple instances of NaN
mi_nan_dup = MultiIndex(
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
)
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True
def test_has_duplicates_from_tuples():
# GH 9075
t = [
("x", "out", "z", 5, "y", "in", "z", 169),
("x", "out", "z", 7, "y", "in", "z", 119),
("x", "out", "z", 9, "y", "in", "z", 135),
("x", "out", "z", 13, "y", "in", "z", 145),
("x", "out", "z", 14, "y", "in", "z", 158),
("x", "out", "z", 16, "y", "in", "z", 122),
("x", "out", "z", 17, "y", "in", "z", 160),
("x", "out", "z", 18, "y", "in", "z", 180),
("x", "out", "z", 20, "y", "in", "z", 143),
("x", "out", "z", 21, "y", "in", "z", 128),
("x", "out", "z", 22, "y", "in", "z", 129),
("x", "out", "z", 25, "y", "in", "z", 111),
("x", "out", "z", 28, "y", "in", "z", 114),
("x", "out", "z", 29, "y", "in", "z", 121),
("x", "out", "z", 31, "y", "in", "z", 126),
("x", "out", "z", 32, "y", "in", "z", 155),
("x", "out", "z", 33, "y", "in", "z", 123),
("x", "out", "z", 12, "y", "in", "z", 144),
]
mi = MultiIndex.from_tuples(t)
assert not mi.has_duplicates
@pytest.mark.parametrize("nlevels", [4, 8])
@pytest.mark.parametrize("with_nulls", [True, False])
def test_has_duplicates_overflow(nlevels, with_nulls):
# handle int64 overflow if possible
# no overflow with 4
# overflow possible with 8
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
codes[500] = -1 # common nan value
codes = [codes.copy() for i in range(nlevels)]
for i in range(nlevels):
codes[i][500 + i - nlevels // 2] = -1
codes += [np.array([-1, 1]).repeat(500)]
else:
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
mi = MultiIndex(levels=levels, codes=codes)
assert not mi.has_duplicates
# with a dup
if with_nulls:
def f(a):
return np.insert(a, 1000, a[0])
codes = list(map(f, codes))
mi = MultiIndex(levels=levels, codes=codes)
else:
values = mi.values.tolist()
mi = MultiIndex.from_tuples(values + [values[0]])
assert mi.has_duplicates
@pytest.mark.parametrize(
"keep, expected",
[
("first", np.array([False, False, False, True, True, False])),
("last", np.array([False, True, True, False, False, False])),
(False, np.array([False, True, True, True, True, False])),
],
)
def test_duplicated(idx_dup, keep, expected):
result = idx_dup.duplicated(keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.arm_slow
def test_duplicated_hashtable_impl(keep, monkeypatch):
# GH 9125
n, k = 6, 10
levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)]
codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels]
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", 50)
mi = MultiIndex(levels=levels, codes=codes)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("val", [101, 102])
def test_duplicated_with_nan(val):
# GH5873
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
assert not mi.has_duplicates
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
@pytest.mark.parametrize("n", range(1, 6))
@pytest.mark.parametrize("m", range(1, 5))
def test_duplicated_with_nan_multi_shape(n, m):
# GH5873
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.default_rng(2).permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
def test_duplicated_drop_duplicates():
# GH#4060
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
expected = np.array([False, False, False, True, False, False], dtype=bool)
duplicated = idx.duplicated()
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(), expected)
expected = np.array([True, False, False, False, False, False])
duplicated = idx.duplicated(keep="last")
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
expected = np.array([True, False, False, True, False, False])
duplicated = idx.duplicated(keep=False)
tm.assert_numpy_array_equal(duplicated, expected)
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
@pytest.mark.parametrize(
"dtype",
[
np.complex64,
np.complex128,
],
)
def test_duplicated_series_complex_numbers(dtype):
# GH 17927
expected = Series(
[False, False, False, True, False, False, False, True, False, True],
dtype=bool,
)
result = Series(
[
np.nan + np.nan * 1j,
0,
1j,
1j,
1,
1 + 1j,
1 + 2j,
1 + 1j,
np.nan,
np.nan + np.nan * 1j,
],
dtype=dtype,
).duplicated()
tm.assert_series_equal(result, expected)
def test_midx_unique_ea_dtype():
# GH#48335
vals_a = Series([1, 2, NA, NA], dtype="Int64")
vals_b = np.array([1, 2, 3, 3])
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
result = midx.unique()
exp_vals_a = Series([1, 2, NA], dtype="Int64")
exp_vals_b = np.array([1, 2, 3])
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,284 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_any_real_numeric_dtype
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_equals(idx):
assert idx.equals(idx)
assert idx.equals(idx.copy())
assert idx.equals(idx.astype(object))
assert idx.equals(idx.to_flat_index())
assert idx.equals(idx.to_flat_index().astype("category"))
assert not idx.equals(list(idx))
assert not idx.equals(np.array(idx))
same_values = Index(idx, dtype=object)
assert idx.equals(same_values)
assert same_values.equals(idx)
if idx.nlevels == 1:
# do not test MultiIndex
assert not idx.equals(Series(idx))
def test_equals_op(idx):
# GH9947, GH10637
index_a = idx
n = len(index_a)
index_b = index_a[0:-1]
index_c = index_a[0:-1].append(index_a[-2:-1])
index_d = index_a[0:1]
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_b
expected1 = np.array([True] * n)
expected2 = np.array([True] * (n - 1) + [False])
tm.assert_numpy_array_equal(index_a == index_a, expected1)
tm.assert_numpy_array_equal(index_a == index_c, expected2)
# test comparisons with numpy arrays
array_a = np.array(index_a)
array_b = np.array(index_a[0:-1])
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
array_d = np.array(index_a[0:1])
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_b
tm.assert_numpy_array_equal(index_a == array_a, expected1)
tm.assert_numpy_array_equal(index_a == array_c, expected2)
# test comparisons with Series
series_a = Series(array_a)
series_b = Series(array_b)
series_c = Series(array_c)
series_d = Series(array_d)
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_b
tm.assert_numpy_array_equal(index_a == series_a, expected1)
tm.assert_numpy_array_equal(index_a == series_c, expected2)
# cases where length is 1 for one of them
with pytest.raises(ValueError, match="Lengths must match"):
index_a == index_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
index_a == array_d
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
series_a == series_d
with pytest.raises(ValueError, match="Lengths must match"):
series_a == array_d
# comparing with a scalar should broadcast; note that we are excluding
# MultiIndex because in this case each item in the index is a tuple of
# length 2, and therefore is considered an array of length 2 in the
# comparison instead of a scalar
if not isinstance(index_a, MultiIndex):
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
# assuming the 2nd to last item is unique in the data
item = index_a[-2]
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))
def test_compare_tuple():
# GH#21517
mi = MultiIndex.from_product([[1, 2]] * 2)
all_false = np.array([False, False, False, False])
result = mi == mi[0]
expected = np.array([True, False, False, False])
tm.assert_numpy_array_equal(result, expected)
result = mi != mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi < mi[0]
tm.assert_numpy_array_equal(result, all_false)
result = mi <= mi[0]
tm.assert_numpy_array_equal(result, expected)
result = mi > mi[0]
tm.assert_numpy_array_equal(result, ~expected)
result = mi >= mi[0]
tm.assert_numpy_array_equal(result, ~all_false)
def test_compare_tuple_strs():
# GH#34180
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
result = mi == ("c", "a")
expected = np.array([False, False, True])
tm.assert_numpy_array_equal(result, expected)
result = mi == ("c",)
expected = np.array([False, False, False])
tm.assert_numpy_array_equal(result, expected)
def test_equals_multi(idx):
assert idx.equals(idx)
assert not idx.equals(idx.values)
assert idx.equals(Index(idx.values))
assert idx.equal_levels(idx)
assert not idx.equals(idx[:-1])
assert not idx.equals(idx[-1])
# different number of levels
index = MultiIndex(
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(list(range(4)))
minor_axis = Index(list(range(2)))
major_codes = np.array([0, 0, 1, 2, 2, 3])
minor_codes = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
assert not idx.equal_levels(index)
# some of the labels are different
major_axis = Index(["foo", "bar", "baz", "qux"])
minor_axis = Index(["one", "two"])
major_codes = np.array([0, 0, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert not idx.equals(index)
def test_identical(idx):
mi = idx.copy()
mi2 = idx.copy()
assert mi.identical(mi2)
mi = mi.set_names(["new1", "new2"])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)
mi4 = Index(mi.tolist(), tupleize_cols=False)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_equals_operator(idx):
# GH9785
assert (idx == idx).all()
def test_equals_missing_values():
# make sure take is not using -1
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_equals_missing_values_differently_sorted():
# GH#38439
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
assert not mi1.equals(mi2)
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
assert mi1.equals(mi2)
def test_is_():
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert not mi.is_(mi.set_names(["C", "D"]))
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5 = mi5.set_levels(mi5.levels)
assert not mi5.is_(mi)
def test_is_all_dates(idx):
assert not idx._is_all_dates
def test_is_numeric(idx):
# MultiIndex is never numeric
assert not is_any_real_numeric_dtype(idx)
def test_multiindex_compare():
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = Series([True, True])
result = Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = Series([False, False])
result = Series(midx > midx)
tm.assert_series_equal(result, expected)
def test_equals_ea_int_regular_int():
# GH#46026
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
assert not mi1.equals(mi2)
assert not mi2.equals(mi1)

View File

@ -0,0 +1,249 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Index,
MultiIndex,
)
import pandas._testing as tm
def test_format(idx):
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
idx.format()
idx[:0].format()
def test_format_integer_names():
index = MultiIndex(
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
)
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
index.format(names=True)
def test_format_sparse_config(idx):
# GH1538
msg = "MultiIndex.format is deprecated"
with pd.option_context("display.multi_sparse", False):
with tm.assert_produces_warning(FutureWarning, match=msg):
result = idx.format()
assert result[1] == "foo two"
def test_format_sparse_display():
index = MultiIndex(
levels=[[0, 1], [0, 1], [0, 1], [0]],
codes=[
[0, 0, 0, 1, 1, 1],
[0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
],
)
msg = "MultiIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = index.format()
assert result[3] == "1 0 0 0"
def test_repr_with_unicode_data():
with pd.option_context("display.encoding", "UTF-8"):
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\" not in repr(index) # we don't want unicode-escaped
def test_repr_roundtrip_raises():
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
msg = "Must pass both levels and codes"
with pytest.raises(TypeError, match=msg):
eval(repr(mi))
def test_unicode_string_with_unicode():
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
str(idx)
def test_repr_max_seq_item_setting(idx):
# GH10182
idx = idx.repeat(50)
with pd.option_context("display.max_seq_items", None):
repr(idx)
assert "..." not in str(idx)
class TestRepr:
def test_unicode_repr_issues(self):
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
index = MultiIndex(levels=levels, codes=codes)
repr(index.levels)
repr(index.get_level_values(1))
def test_repr_max_seq_items_equal_to_n(self, idx):
# display.max_seq_items == n
with pd.option_context("display.max_seq_items", 6):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
def test_repr(self, idx):
result = idx[:1].__repr__()
expected = """\
MultiIndex([('foo', 'one')],
names=['first', 'second'])"""
assert result == expected
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
('bar', 'one'),
('baz', 'two'),
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'])"""
assert result == expected
with pd.option_context("display.max_seq_items", 5):
result = idx.__repr__()
expected = """\
MultiIndex([('foo', 'one'),
('foo', 'two'),
...
('qux', 'one'),
('qux', 'two')],
names=['first', 'second'], length=6)"""
assert result == expected
# display.max_seq_items == 1
with pd.option_context("display.max_seq_items", 1):
result = idx.__repr__()
expected = """\
MultiIndex([...
('qux', 'two')],
names=['first', ...], length=6)"""
assert result == expected
def test_rjust(self):
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
result = mi[:1].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi[::500].__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:08:20'),
('abc', 10, '2000-01-01 00:16:40'),
('abc', 10, '2000-01-01 00:25:00')],
names=['a', 'b', 'dti'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
( 'a', 9, '2000-01-01 00:00:01'),
( 'a', 9, '2000-01-01 00:00:02'),
( 'a', 9, '2000-01-01 00:00:03'),
( 'a', 9, '2000-01-01 00:00:04'),
( 'a', 9, '2000-01-01 00:00:05'),
( 'a', 9, '2000-01-01 00:00:06'),
( 'a', 9, '2000-01-01 00:00:07'),
( 'a', 9, '2000-01-01 00:00:08'),
( 'a', 9, '2000-01-01 00:00:09'),
...
('abc', 10, '2000-01-01 00:33:10'),
('abc', 10, '2000-01-01 00:33:11'),
('abc', 10, '2000-01-01 00:33:12'),
('abc', 10, '2000-01-01 00:33:13'),
('abc', 10, '2000-01-01 00:33:14'),
('abc', 10, '2000-01-01 00:33:15'),
('abc', 10, '2000-01-01 00:33:16'),
('abc', 10, '2000-01-01 00:33:17'),
('abc', 10, '2000-01-01 00:33:18'),
('abc', 10, '2000-01-01 00:33:19')],
names=['a', 'b', 'dti'], length=2000)"""
assert result == expected
def test_tuple_width(self):
n = 1000
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
levels = [ci, ci.codes + 9, dti, dti, dti]
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
mi = MultiIndex.from_arrays(levels, names=names)
result = mi[:1].__repr__()
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
assert result == expected
result = mi[:10].__repr__()
expected = """\
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
assert result == expected
result = mi.__repr__()
expected = """\
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
...
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
assert result == expected
def test_multiindex_long_element(self):
# Non-regression test towards GH#52960
data = MultiIndex.from_tuples([("c" * 62,)])
expected = (
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccc',)],\n )"
)
assert str(data) == expected

View File

@ -0,0 +1,124 @@
import numpy as np
import pandas as pd
from pandas import (
CategoricalIndex,
Index,
MultiIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestGetLevelValues:
def test_get_level_values_box_datetime64(self):
dates = date_range("1/1/2000", periods=4)
levels = [dates, [0, 1]]
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
index = MultiIndex(levels=levels, codes=codes)
assert isinstance(index.get_level_values(0)[0], Timestamp)
def test_get_level_values(idx):
result = idx.get_level_values(0)
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
tm.assert_index_equal(result, expected)
assert result.name == "first"
result = idx.get_level_values("first")
expected = idx.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
)
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_level_values_all_na():
# GH#17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_int_with_na():
# GH#17924
arrays = [["a", "b", "b"], [1, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na():
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = Index(["a", np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_when_periods():
# GH33131. See also discussion in GH32669.
# This test can probably be removed when PeriodIndex._engine is removed.
from pandas import (
Period,
PeriodIndex,
)
idx = MultiIndex.from_arrays(
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
)
idx2 = MultiIndex.from_arrays(
[idx._get_level_values(level) for level in range(idx.nlevels)]
)
assert all(x.is_monotonic_increasing for x in idx2.levels)
def test_values_loses_freq_of_underlying_index():
# GH#49054
idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
expected = idx.copy(deep=True)
idx2 = Index([1, 2, 3])
midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
midx.values
assert idx.freq is not None
tm.assert_index_equal(idx, expected)

View File

@ -0,0 +1,384 @@
import numpy as np
import pytest
from pandas.compat import PY311
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
from pandas import (
CategoricalIndex,
MultiIndex,
)
import pandas._testing as tm
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
def test_get_level_number_integer(idx):
idx.names = [1, 0]
assert idx._get_level_number(1) == 0
assert idx._get_level_number(0) == 1
msg = "Too many levels: Index has only 2 levels, not 3"
with pytest.raises(IndexError, match=msg):
idx._get_level_number(2)
with pytest.raises(KeyError, match="Level fourth not found"):
idx._get_level_number("fourth")
def test_get_dtypes(using_infer_string):
# Test MultiIndex.dtypes (# Gh37062)
idx_multitype = MultiIndex.from_product(
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
names=["int", "string", "dt"],
)
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
expected = pd.Series(
{
"int": np.dtype("int64"),
"string": exp,
"dt": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_no_level_name(using_infer_string):
# Test MultiIndex.dtypes (# GH38580 )
idx_multitype = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
)
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
expected = pd.Series(
{
"level_0": np.dtype("int64"),
"level_1": exp,
"level_2": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)
def test_get_dtypes_duplicate_level_names(using_infer_string):
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
result = MultiIndex.from_product(
[
[1, 2, 3],
["a", "b", "c"],
pd.date_range("20200101", periods=2, tz="UTC"),
],
names=["A", "A", "A"],
).dtypes
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
expected = pd.Series(
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
index=["A", "A", "A"],
)
tm.assert_series_equal(result, expected)
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
with pytest.raises(IndexError, match="Too many levels"):
frame.index._get_level_number(2)
with pytest.raises(IndexError, match="not a valid level number"):
frame.index._get_level_number(-3)
def test_set_name_methods(idx):
# so long as these are synonyms, we don't need to test set_names
index_names = ["first", "second"]
assert idx.rename == idx.set_names
new_names = [name + "SUFFIX" for name in index_names]
ind = idx.set_names(new_names)
assert idx.names == index_names
assert ind.names == new_names
msg = "Length of names must match number of levels in MultiIndex"
with pytest.raises(ValueError, match=msg):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = idx.set_names(new_names[0], level=0)
assert idx.names == index_names
assert ind.names == [new_names[0], index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], index_names[1]]
# set names for multiple levels
ind = idx.set_names(new_names, level=[0, 1])
assert idx.names == index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
def test_set_levels_codes_directly(idx):
# setting levels/codes directly raises AttributeError
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
msg = "Can't set attribute"
with pytest.raises(AttributeError, match=msg):
idx.levels = new_levels
msg = (
"property 'codes' of 'MultiIndex' object has no setter"
if PY311
else "can't set attribute"
)
with pytest.raises(AttributeError, match=msg):
idx.codes = new_codes
def test_set_levels(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
levels = idx.levels
new_levels = [[lev + "a" for lev in level] for level in levels]
# level changing [w/o mutation]
ind2 = idx.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# level changing specific level [w/o mutation]
ind2 = idx.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(idx.levels, levels)
ind2 = idx.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(idx.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = idx.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(idx.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = idx.copy()
with pytest.raises(ValueError, match="^On"):
idx.set_levels(["c"], level=0)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(ValueError, match="^On"):
idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
with pytest.raises(TypeError, match="^Levels"):
idx.set_levels("c", level=0)
assert_matching(idx.levels, original_index.levels, check_dtype=True)
with pytest.raises(TypeError, match="^Codes"):
idx.set_codes(1, level=0)
assert_matching(idx.codes, original_index.codes, check_dtype=True)
def test_set_codes(idx):
# side note - you probably wouldn't want to use levels and codes
# directly like this - but it is possible.
codes = idx.codes
major_codes, minor_codes = codes
major_codes = [(x + 1) % 3 for x in major_codes]
minor_codes = [(x + 1) % 1 for x in minor_codes]
new_codes = [major_codes, minor_codes]
# changing codes w/o mutation
ind2 = idx.set_codes(new_codes)
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# codes changing specific level w/o mutation
ind2 = idx.set_codes(new_codes[0], level=0)
assert_matching(ind2.codes, [new_codes[0], codes[1]])
assert_matching(idx.codes, codes)
ind2 = idx.set_codes(new_codes[1], level=1)
assert_matching(ind2.codes, [codes[0], new_codes[1]])
assert_matching(idx.codes, codes)
# codes changing multiple levels w/o mutation
ind2 = idx.set_codes(new_codes, level=[0, 1])
assert_matching(ind2.codes, new_codes)
assert_matching(idx.codes, codes)
# label changing for levels of different magnitude of categories
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
new_codes = range(129, -1, -1)
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
# [w/o mutation]
result = ind.set_codes(codes=new_codes, level=1)
assert result.equals(expected)
def test_set_levels_codes_names_bad_input(idx):
levels, codes = idx.levels, idx.codes
names = idx.names
with pytest.raises(ValueError, match="Length of levels"):
idx.set_levels([levels[0]])
with pytest.raises(ValueError, match="Length of codes"):
idx.set_codes([codes[0]])
with pytest.raises(ValueError, match="Length of names"):
idx.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0])
# shouldn't scalar data error, instead should demand list-like
with pytest.raises(TypeError, match="list-like"):
idx.set_names(names[0])
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_levels(levels[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_levels(levels, level=0)
# should have equal lengths
with pytest.raises(TypeError, match="list of lists-like"):
idx.set_codes(codes[0], level=[0, 1])
with pytest.raises(TypeError, match="list-like"):
idx.set_codes(codes, level=0)
# should have equal lengths
with pytest.raises(ValueError, match="Length of names"):
idx.set_names(names[0], level=[0, 1])
with pytest.raises(TypeError, match="Names must be a"):
idx.set_names(names, level=0)
@pytest.mark.parametrize("inplace", [True, False])
def test_set_names_with_nlevel_1(inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
m = MultiIndex.from_product([[0, 1]])
result = m.set_names("first", level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("ordered", [True, False])
def test_set_levels_categorical(ordered):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, level=0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_set_value_keeps_names():
# motivating example from #3742
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
lev2 = ["1", "2", "3"] * 2
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
df = pd.DataFrame(
np.random.default_rng(2).standard_normal((6, 4)),
columns=["one", "two", "three", "four"],
index=idx,
)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
df.at[("grethe", "4"), "one"] = 99.34
assert df._is_copy is None
assert df.index.names == ("Name", "Number")
def test_set_levels_with_iterable():
# GH23273
sizes = [1, 2, 3]
colors = ["black"] * 3
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
expected_sizes = [3, 2, 1]
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
tm.assert_index_equal(result, expected)
def test_set_empty_level():
# GH#48636
midx = MultiIndex.from_arrays([[]], names=["A"])
result = midx.set_levels(pd.DatetimeIndex([]), level=0)
expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
tm.assert_index_equal(result, expected)
def test_set_levels_pos_args_removal():
# https://github.com/pandas-dev/pandas/issues/41485
idx = MultiIndex.from_tuples(
[
(1, "one"),
(3, "one"),
],
names=["foo", "bar"],
)
with pytest.raises(TypeError, match="positional arguments"):
idx.set_levels(["a", "b", "c"], 0)
with pytest.raises(TypeError, match="positional arguments"):
idx.set_codes([[0, 1], [1, 0]], 0)
def test_set_levels_categorical_keep_dtype():
# GH#52125
midx = MultiIndex.from_arrays([[5, 6]])
result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0)
expected = MultiIndex.from_arrays([pd.Categorical([1, 2])])
tm.assert_index_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,289 @@
import re
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
import pandas as pd
from pandas import (
Index,
IntervalIndex,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm
def test_labels_dtypes():
# GH 8456
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
assert i.codes[0].dtype == "int8"
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(40)])
assert i.codes[1].dtype == "int8"
i = MultiIndex.from_product([["a"], range(400)])
assert i.codes[1].dtype == "int16"
i = MultiIndex.from_product([["a"], range(40000)])
assert i.codes[1].dtype == "int32"
i = MultiIndex.from_product([["a"], range(1000)])
assert (i.codes[0] >= 0).all()
assert (i.codes[1] >= 0).all()
def test_values_boxed():
tuples = [
(1, pd.Timestamp("2000-01-01")),
(2, pd.NaT),
(3, pd.Timestamp("2000-01-03")),
(1, pd.Timestamp("2000-01-04")),
(2, pd.Timestamp("2000-01-02")),
(3, pd.Timestamp("2000-01-03")),
]
result = MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10**18, 10**18 + 5)
naive = pd.DatetimeIndex(ints)
aware = pd.DatetimeIndex(ints, tz="US/Central")
idx = MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex():
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq="D")
idx = MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = Index([x[0] for x in result])
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = Index([x[0] for x in result])
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_consistency():
# need to construct an overflow
major_axis = list(range(70000))
minor_axis = list(range(10))
major_codes = np.arange(70000)
minor_codes = np.repeat(range(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
# inconsistent
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
assert index.is_unique is False
@pytest.mark.slow
def test_hash_collisions(monkeypatch):
# non-smoke test that we don't get hash collisions
size_cutoff = 50
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
index = MultiIndex.from_product(
[np.arange(8), np.arange(8)], names=["one", "two"]
)
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_dims():
pass
def test_take_invalid_kwargs():
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
idx = MultiIndex.from_product(vals, names=["str", "dt"])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
def test_isna_behavior(idx):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
msg = "isna is not defined for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
pd.isna(idx)
def test_large_multiindex_error(monkeypatch):
# GH12527
size_cutoff = 50
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
df_below_cutoff = pd.DataFrame(
1,
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
columns=["dest"],
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_below_cutoff.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_below_cutoff.loc[(3, 0), "dest"]
df_above_cutoff = pd.DataFrame(
1,
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
columns=["dest"],
)
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
df_above_cutoff.loc[(-1, 0), "dest"]
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
df_above_cutoff.loc[(3, 0), "dest"]
def test_mi_hashtable_populated_attribute_error(monkeypatch):
# GH 18165
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
r = range(50)
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
df["a"].foo()
def test_can_hold_identifiers(idx):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_metadata_immutable(idx):
levels, codes = idx.levels, idx.codes
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile("does not support mutable operations")
with pytest.raises(TypeError, match=mutable_regex):
levels[0] = levels[0]
with pytest.raises(TypeError, match=mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with pytest.raises(TypeError, match=mutable_regex):
codes[0] = codes[0]
with pytest.raises(ValueError, match="assignment destination is read-only"):
codes[0][0] = codes[0][0]
# and for names
names = idx.names
with pytest.raises(TypeError, match=mutable_regex):
names[0] = names[0]
def test_level_setting_resets_attributes():
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
assert ind.is_monotonic_increasing
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic_increasing
def test_rangeindex_fallback_coercion_bug():
# GH 12893
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat(
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
axis=1,
)
df.index.names = ["fizz", "buzz"]
expected = pd.DataFrame(
{"df2": np.arange(100), "df1": np.arange(100)},
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
)
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values("fizz")
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values("buzz")
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
tm.assert_index_equal(result, expected)
def test_memory_usage(idx):
result = idx.memory_usage()
if len(idx):
idx.get_loc(idx[0])
result2 = idx.memory_usage()
result3 = idx.memory_usage(deep=True)
# RangeIndex, IntervalIndex
# don't have engines
if not isinstance(idx, (RangeIndex, IntervalIndex)):
assert result2 > result
if idx.inferred_type == "object":
assert result3 > result2
else:
# we report 0 for no-length
assert result == 0
def test_nlevels(idx):
assert idx.nlevels == 2

View File

@ -0,0 +1,103 @@
import numpy as np
import pytest
from pandas import MultiIndex
import pandas._testing as tm
def test_isin_nan():
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
tm.assert_numpy_array_equal(
idx.isin([("bar", float("nan"))]), np.array([False, True])
)
def test_isin_missing(nulls_fixture):
# GH48905
mi1 = MultiIndex.from_tuples([(1, nulls_fixture)])
mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)])
result = mi2.isin(mi1)
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)
def test_isin():
values = [("foo", 2), ("bar", 3), ("quux", 4)]
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
def test_isin_level_kwarg():
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
vals_0 = ["foo", "bar", "quux"]
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
msg = "Too many levels: Index has only 2 levels, not 6"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=5)
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
with pytest.raises(IndexError, match=msg):
idx.isin(vals_0, level=-5)
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
idx.isin(vals_0, level=1.0)
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
idx.isin(vals_1, level=-1.0)
with pytest.raises(KeyError, match="'Level A not found'"):
idx.isin(vals_1, level="A")
idx.names = ["A", "B"]
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
with pytest.raises(KeyError, match="'Level C not found'"):
idx.isin(vals_1, level="C")
@pytest.mark.parametrize(
"labels,expected,level",
[
([("b", np.nan)], np.array([False, False, True]), None),
([np.nan, "a"], np.array([True, True, False]), 0),
(["d", np.nan], np.array([False, True, True]), 1),
],
)
def test_isin_multi_index_with_missing_value(labels, expected, level):
# GH 19132
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
result = midx.isin(labels, level=level)
tm.assert_numpy_array_equal(result, expected)
def test_isin_empty():
# GH#51599
midx = MultiIndex.from_arrays([[1, 2], [3, 4]])
result = midx.isin([])
expected = np.array([False, False])
tm.assert_numpy_array_equal(result, expected)
def test_isin_generator():
# GH#52568
midx = MultiIndex.from_tuples([(1, 2)])
result = midx.isin(x for x in [(1, 2)])
expected = np.array([True])
tm.assert_numpy_array_equal(result, expected)

Some files were not shown because too many files have changed in this diff Show More