done
This commit is contained in:
@ -0,0 +1,27 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
# Note: identical the "multi" entry in the top-level "index" fixture
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
@ -0,0 +1,263 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_infer_objects(idx):
|
||||
with pytest.raises(NotImplementedError, match="to_frame"):
|
||||
idx.infer_objects()
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
# GH8083 test the base class for shift
|
||||
msg = (
|
||||
"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||||
"TimedeltaIndex; Got type MultiIndex"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate_multiindex():
|
||||
# GH 34564 for MultiIndex level names check
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["L1", "L2"],
|
||||
)
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert "foo" not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
assert index.names == result.names
|
||||
|
||||
msg = "after < before"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.truncate(3, 1)
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match="^Too many levels"):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range("2011-01-01", freq="ME", periods=3)
|
||||
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
|
||||
pi = period_range("2011-01", freq="M", periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
||||
)
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
["a", "b", "c", "a", "b", "c"],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays(
|
||||
[
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
]
|
||||
)
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, "x", "y", "z"],
|
||||
[1.1, np.nan, 3.3, "x", "y", "z"],
|
||||
["a", "b", "c", "x", "y", "z"],
|
||||
dti.append(Index(["x", "y", "z"])),
|
||||
dti_tz.append(Index(["x", "y", "z"])),
|
||||
pi.append(Index(["x", "y", "z"])),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first.tolist()
|
||||
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(idx, mapper):
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to uint64 dtype for a dict
|
||||
if idx.dtype == np.uint64 and isinstance(identity, dict):
|
||||
expected = idx.astype("int64")
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs(idx, func):
|
||||
# test ufuncs of numpy. see:
|
||||
# https://numpy.org/doc/stable/reference/ufuncs.html
|
||||
|
||||
expected_exception = TypeError
|
||||
msg = (
|
||||
"loop of ufunc does not support argument 0 of type tuple which "
|
||||
f"has no callable {func.__name__} method"
|
||||
)
|
||||
with pytest.raises(expected_exception, match=msg):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_type_funcs(idx, func):
|
||||
msg = (
|
||||
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
|
||||
"could not be safely coerced to any supported types according to "
|
||||
"the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype("O")
|
||||
tm.assert_copy(actual.levels, expected.levels)
|
||||
tm.assert_copy(actual.codes, expected.codes)
|
||||
assert actual.names == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype("category")
|
@ -0,0 +1,122 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = f"cannot perform {method}"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [["a", "b", "c"], [4]]
|
||||
levels2 = [[1, 2, 3], ["a"]]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
|
||||
# instantiating MultiIndex should not access/cache _.values
|
||||
assert "_values" not in mi1._cache
|
||||
assert "_values" not in mi2._cache
|
||||
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
# accessing .values should cache ._values
|
||||
assert mi1._values is mi1._cache["_values"]
|
||||
assert mi1.values is mi1._cache["_values"]
|
||||
assert isinstance(mi1._cache["_values"], np.ndarray)
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Doesn't drop _values from _cache [implementation detail]
|
||||
tm.assert_almost_equal(mi1._cache["_values"], vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(1, "a")] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
|
||||
new_mi = mi2.set_codes(codes2)
|
||||
assert "_values" not in new_mi._cache
|
||||
new_values = new_mi.values
|
||||
assert "_values" in new_mi._cache
|
||||
|
||||
# Shouldn't change cache
|
||||
tm.assert_almost_equal(mi2._cache["_values"], vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
|
||||
def test_boxable_categorical_values():
|
||||
cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h"))
|
||||
result = MultiIndex.from_product([["a", "b", "c"], cat]).values
|
||||
expected = pd.Series(
|
||||
[
|
||||
("a", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
]
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": ["a", "b", "c"],
|
||||
"b": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
"c": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
}
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,860 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["first"]
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], ["one", "two"]]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (["foo"], ["bar"])
|
||||
msg = r"MultiIndex\.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=("foo", "bar"),
|
||||
)
|
||||
renamed = [["fooo"], ["barr"]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = (
|
||||
r"On level 0, code max \(3\) >= length of level \(1\)\. "
|
||||
"NOTE: this index is in an inconsistent state"
|
||||
)
|
||||
label_error = r"Unequal code lengths: \[4, 2\]"
|
||||
code_value_error = r"On level 0, code value \(-2\) < -1"
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([["a"], ["b"]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# test set_codes with verify_integrity=False
|
||||
# the setting should not raise any value error
|
||||
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
||||
|
||||
# code value smaller than -1
|
||||
with pytest.raises(ValueError, match=code_value_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
||||
|
||||
|
||||
def test_na_levels():
|
||||
# GH26408
|
||||
# test if codes are re-assigned value -1 for levels
|
||||
# with missing values (NaN, NaT, None)
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# verify set_levels and set_codes
|
||||
result = MultiIndex(
|
||||
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
||||
).set_codes([[0, -1, 1, 2, 3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
||||
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
||||
assert result.levels[1].equals(Index(["a", "b"]))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_tuples(idx):
|
||||
arrays = tuple(
|
||||
tuple(np.asarray(lev).take(level_codes))
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
)
|
||||
|
||||
# tuple of tuples as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("idx1", "idx2"),
|
||||
[
|
||||
(
|
||||
pd.period_range("2011-01-01", freq="D", periods=3),
|
||||
pd.period_range("2015-01-01", freq="h", periods=3),
|
||||
),
|
||||
(
|
||||
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
|
||||
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range("1 days", freq="D", periods=3),
|
||||
pd.timedelta_range("2 hours", freq="h", periods=3),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
|
||||
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = MultiIndex.from_arrays(
|
||||
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
|
||||
)
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list("ABC")[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_sequence_of_arrays",
|
||||
[
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
[1, [2]],
|
||||
"a",
|
||||
["a"],
|
||||
["a", "b"],
|
||||
[["a"], "b"],
|
||||
(1,),
|
||||
(1, 2),
|
||||
([1], 2),
|
||||
(1, [2]),
|
||||
"a",
|
||||
("a",),
|
||||
("a", "b"),
|
||||
(["a"], "b"),
|
||||
[(1,), 2],
|
||||
[1, (2,)],
|
||||
[("a",), "b"],
|
||||
((1,), 2),
|
||||
(1, (2,)),
|
||||
(("a",), "b"),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
||||
msg = "Input must be a list / sequence of array-likes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
||||
)
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = "^all arrays must be same length$"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
def test_from_arrays_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b", "c"], name="bar")
|
||||
|
||||
result = MultiIndex.from_arrays([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = "Cannot infer number of levels from empty list"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = "Input must be a list / sequence of tuple-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
msg = "Names should be list-like for a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="abc")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="a")
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame(
|
||||
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
||||
).set_index(["a", "b"])
|
||||
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
||||
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=["A"])
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
||||
)
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ["A", "B"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("N", list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ["A", "B", "C"]
|
||||
lvl2 = list(range(N))
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
||||
)
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range("2000-01-01", periods=2)
|
||||
mi = MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike(
|
||||
[
|
||||
(1, Timestamp("2000-01-01")),
|
||||
(1, Timestamp("2000-01-02")),
|
||||
(2, Timestamp("2000-01-01")),
|
||||
(2, Timestamp("2000-01-02")),
|
||||
]
|
||||
)
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
def test_from_product_rangeindex():
|
||||
# RangeIndex is preserved by factorize, so preserved in levels
|
||||
rng = Index(range(5))
|
||||
other = ["a", "b"]
|
||||
mi = MultiIndex.from_product([rng, other])
|
||||
tm.assert_index_equal(mi._levels[0], rng, exact=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [False, True])
|
||||
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ["foo", "bar"]
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
||||
expected = pd.CategoricalIndex(
|
||||
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
||||
)
|
||||
|
||||
result = MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected_names",
|
||||
[
|
||||
(
|
||||
Series([1, 2, 3], name="foo"),
|
||||
Series(["a", "b"], name="bar"),
|
||||
["foo", "bar"],
|
||||
),
|
||||
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
|
||||
([1, 2, 3], ["a", "b"], None),
|
||||
],
|
||||
)
|
||||
def test_from_product_infer_names(a, b, expected_names):
|
||||
# GH27292
|
||||
result = MultiIndex.from_product([a, b])
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=expected_names,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b"], name="bar")
|
||||
|
||||
result = MultiIndex.from_product([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_readonly():
|
||||
# GH#15286 passing read-only array to from_product
|
||||
a = np.array(range(3))
|
||||
b = ["a", "b"]
|
||||
expected = MultiIndex.from_product([a, b])
|
||||
|
||||
a.setflags(write=False)
|
||||
result = MultiIndex.from_product([a, b])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ["foo", "bar"]
|
||||
result = Index(index)
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Index(index, name="A")
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
name="A",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
||||
)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
||||
)
|
||||
result = MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
def test_from_frame_missing_values_multiIndex():
|
||||
# GH 39984
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"a": Series([1, 2, None], dtype="Int64"),
|
||||
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
}
|
||||
)
|
||||
multi_indexed = MultiIndex.from_frame(df)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 2, None]).astype("Int64"),
|
||||
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
tm.assert_index_equal(multi_indexed, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_frame",
|
||||
[
|
||||
Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27,
|
||||
],
|
||||
)
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
||||
MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = MultiIndex.from_arrays(
|
||||
[
|
||||
date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
mi = MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
||||
)
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
mi = MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names,expected_error_msg",
|
||||
[
|
||||
("bad_input", "Names should be list-like for a MultiIndex"),
|
||||
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
||||
],
|
||||
)
|
||||
def test_from_frame_invalid_names(names, expected_error_msg):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
MultiIndex.from_frame(df, names=names)
|
||||
|
||||
|
||||
def test_index_equal_empty_iterable():
|
||||
# #16844
|
||||
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
|
||||
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(a, b)
|
||||
|
||||
|
||||
def test_raise_invalid_sortorder():
|
||||
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
# Correct sortorder
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
|
||||
)
|
||||
|
||||
|
||||
def test_datetimeindex():
|
||||
idx1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
|
||||
)
|
||||
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
|
||||
idx = MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
expected1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
tm.assert_index_equal(idx.levels[0], expected1)
|
||||
tm.assert_index_equal(idx.levels[1], idx2)
|
||||
|
||||
# from datetime combos
|
||||
# GH 7888
|
||||
date1 = np.datetime64("today")
|
||||
date2 = datetime.today()
|
||||
date3 = Timestamp.today()
|
||||
|
||||
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
|
||||
index = MultiIndex.from_product([[d1], [d2]])
|
||||
assert isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
# but NOT date objects, matching Index behavior
|
||||
date4 = date.today()
|
||||
index = MultiIndex.from_product([[date4], [date2]])
|
||||
assert not isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
|
||||
def test_constructor_with_tz():
|
||||
index = pd.DatetimeIndex(
|
||||
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
|
||||
)
|
||||
columns = pd.DatetimeIndex(
|
||||
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
result = MultiIndex.from_arrays([index, columns])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
result = MultiIndex.from_arrays([Series(index), Series(columns)])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
|
||||
def test_multiindex_inference_consistency():
|
||||
# check that inference behavior matches the base class
|
||||
|
||||
v = date.today()
|
||||
|
||||
arr = [v, v]
|
||||
|
||||
idx = Index(arr)
|
||||
assert idx.dtype == object
|
||||
|
||||
mi = MultiIndex.from_arrays([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_product([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_tuples([(x,) for x in arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
|
||||
def test_dtype_representation(using_infer_string):
|
||||
# GH#46900
|
||||
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
|
||||
result = pmidx.dtypes
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = Series(
|
||||
["int64", exp],
|
||||
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,201 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_array_interface(idx):
|
||||
# https://github.com/pandas-dev/pandas/pull/60046
|
||||
result = np.asarray(idx)
|
||||
expected = np.empty((6,), dtype=object)
|
||||
expected[:] = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# it always gives a copy by default, but the values are cached, so results
|
||||
# are still sharing memory
|
||||
result_copy1 = np.asarray(idx)
|
||||
result_copy2 = np.asarray(idx)
|
||||
assert np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
# with explicit copy=True, then it is an actual copy
|
||||
result_copy1 = np.array(idx, copy=True)
|
||||
result_copy2 = np.array(idx, copy=True)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
if not np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
return
|
||||
|
||||
# for MultiIndex, copy=False is never allowed
|
||||
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
np.array(idx, copy=False)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name="first")
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=["first"])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = DataFrame(
|
||||
{
|
||||
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ["z", 0, "a"]
|
||||
mi = MultiIndex.from_arrays(
|
||||
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
||||
)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_frame_duplicate_labels():
|
||||
# GH 45245
|
||||
data = [(1, 2), (3, 4)]
|
||||
names = ["a", "a"]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
names = [None, 0]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=[0, 0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index(
|
||||
(
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
),
|
||||
tupleize_cols=False,
|
||||
)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,96 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._view()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"kwarg, value",
|
||||
[
|
||||
("names", ["third", "fourth"]),
|
||||
],
|
||||
)
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
|
||||
|
||||
def test_copy_deep_false_retains_id():
|
||||
# GH#47878
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
|
||||
res = idx.copy(deep=False)
|
||||
assert res._id is idx._id
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
||||
|
||||
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(["bar"])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop("foo")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(index)
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(["foo", "two"])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(["foo", "two"], errors="ignore")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(["foo", ("qux", "one")])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ["foo", ("qux", "one"), "two"]
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc("foo")]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == "second"
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ("two", "three")
|
||||
|
||||
dropped = index.droplevel("two")
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
|
||||
dropped = index[:2].droplevel(["three", "one"])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
msg = (
|
||||
"Cannot remove 3 levels from an index with 3 levels: "
|
||||
"at least one level must be left"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index[:2].droplevel(["one", "two", "three"])
|
||||
|
||||
with pytest.raises(KeyError, match="'Level four not found'"):
|
||||
index[:2].droplevel(["one", "four"])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
||||
assert lexsorted_mi._is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(
|
||||
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
||||
)
|
||||
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi._is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
||||
|
||||
|
||||
def test_drop_with_nan_in_index(nulls_fixture):
|
||||
# GH#18853
|
||||
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
|
||||
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(pd.Timestamp("2001"), level="date")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_drop_with_non_monotonic_duplicates():
|
||||
# GH#33494
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
|
||||
result = mi.drop((1, 2))
|
||||
expected = MultiIndex.from_tuples([(2, 3)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_single_level_drop_partially_missing_elements():
|
||||
# GH 37820
|
||||
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
|
||||
msg = r"labels \[4\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(4, level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([1, 4], level=0)
|
||||
msg = r"labels \[nan\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan], level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, 2, 3], level=0)
|
||||
|
||||
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
|
||||
msg = r"labels \['a'\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, "a"], level=0)
|
||||
|
||||
|
||||
def test_droplevel_multiindex_one_level():
|
||||
# GH#37208
|
||||
index = MultiIndex.from_tuples([(2,)], names=("b",))
|
||||
result = index.droplevel([])
|
||||
expected = Index([2], name="b")
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,363 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import (
|
||||
hashtable,
|
||||
index as libindex,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [None, ["first", "second"]])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
|
||||
)
|
||||
idx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
|
||||
eidx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
|
||||
)
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
|
||||
for idx in [
|
||||
mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, "Num"]),
|
||||
mi.set_names(["Upper", "Num"]),
|
||||
]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
# single instance of NaN
|
||||
mi_nan = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan.is_unique is True
|
||||
assert mi_nan.has_duplicates is False
|
||||
|
||||
# multiple instances of NaN
|
||||
mi_nan_dup = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan_dup.is_unique is False
|
||||
assert mi_nan_dup.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [
|
||||
("x", "out", "z", 5, "y", "in", "z", 169),
|
||||
("x", "out", "z", 7, "y", "in", "z", 119),
|
||||
("x", "out", "z", 9, "y", "in", "z", 135),
|
||||
("x", "out", "z", 13, "y", "in", "z", 145),
|
||||
("x", "out", "z", 14, "y", "in", "z", 158),
|
||||
("x", "out", "z", 16, "y", "in", "z", 122),
|
||||
("x", "out", "z", 17, "y", "in", "z", 160),
|
||||
("x", "out", "z", 18, "y", "in", "z", 180),
|
||||
("x", "out", "z", 20, "y", "in", "z", 143),
|
||||
("x", "out", "z", 21, "y", "in", "z", 128),
|
||||
("x", "out", "z", 22, "y", "in", "z", 129),
|
||||
("x", "out", "z", 25, "y", "in", "z", 111),
|
||||
("x", "out", "z", 28, "y", "in", "z", 114),
|
||||
("x", "out", "z", 29, "y", "in", "z", 121),
|
||||
("x", "out", "z", 31, "y", "in", "z", 126),
|
||||
("x", "out", "z", 32, "y", "in", "z", 155),
|
||||
("x", "out", "z", 33, "y", "in", "z", 123),
|
||||
("x", "out", "z", 12, "y", "in", "z", 144),
|
||||
]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nlevels", [4, 8])
|
||||
@pytest.mark.parametrize("with_nulls", [True, False])
|
||||
def test_has_duplicates_overflow(nlevels, with_nulls):
|
||||
# handle int64 overflow if possible
|
||||
# no overflow with 4
|
||||
# overflow possible with 8
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", np.array([False, False, False, True, True, False])),
|
||||
("last", np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_duplicated_hashtable_impl(keep, monkeypatch):
|
||||
# GH 9125
|
||||
n, k = 6, 10
|
||||
levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)]
|
||||
codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels]
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [101, 102])
|
||||
def test_duplicated_with_nan(val):
|
||||
# GH5873
|
||||
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", range(1, 6))
|
||||
@pytest.mark.parametrize("m", range(1, 5))
|
||||
def test_duplicated_with_nan_multi_shape(n, m):
|
||||
# GH5873
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(
|
||||
levels=[list("abcde")[:n], list("WXYZ")[:m]],
|
||||
codes=np.random.default_rng(2).permutation(list(codes)).T,
|
||||
)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
|
||||
|
||||
|
||||
def test_duplicated_drop_duplicates():
|
||||
# GH#4060
|
||||
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
|
||||
|
||||
expected = np.array([False, False, False, True, False, False], dtype=bool)
|
||||
duplicated = idx.duplicated()
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(), expected)
|
||||
|
||||
expected = np.array([True, False, False, False, False, False])
|
||||
duplicated = idx.duplicated(keep="last")
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
|
||||
|
||||
expected = np.array([True, False, False, True, False, False])
|
||||
duplicated = idx.duplicated(keep=False)
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
np.complex64,
|
||||
np.complex128,
|
||||
],
|
||||
)
|
||||
def test_duplicated_series_complex_numbers(dtype):
|
||||
# GH 17927
|
||||
expected = Series(
|
||||
[False, False, False, True, False, False, False, True, False, True],
|
||||
dtype=bool,
|
||||
)
|
||||
result = Series(
|
||||
[
|
||||
np.nan + np.nan * 1j,
|
||||
0,
|
||||
1j,
|
||||
1j,
|
||||
1,
|
||||
1 + 1j,
|
||||
1 + 2j,
|
||||
1 + 1j,
|
||||
np.nan,
|
||||
np.nan + np.nan * 1j,
|
||||
],
|
||||
dtype=dtype,
|
||||
).duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_midx_unique_ea_dtype():
|
||||
# GH#48335
|
||||
vals_a = Series([1, 2, NA, NA], dtype="Int64")
|
||||
vals_b = np.array([1, 2, 3, 3])
|
||||
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
|
||||
result = midx.unique()
|
||||
|
||||
exp_vals_a = Series([1, 2, NA], dtype="Int64")
|
||||
exp_vals_b = np.array([1, 2, 3])
|
||||
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_any_real_numeric_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.equals(idx.to_flat_index())
|
||||
assert idx.equals(idx.to_flat_index().astype("category"))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_compare_tuple():
|
||||
# GH#21517
|
||||
mi = MultiIndex.from_product([[1, 2]] * 2)
|
||||
|
||||
all_false = np.array([False, False, False, False])
|
||||
|
||||
result = mi == mi[0]
|
||||
expected = np.array([True, False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi != mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi < mi[0]
|
||||
tm.assert_numpy_array_equal(result, all_false)
|
||||
|
||||
result = mi <= mi[0]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi > mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi >= mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~all_false)
|
||||
|
||||
|
||||
def test_compare_tuple_strs():
|
||||
# GH#34180
|
||||
|
||||
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
|
||||
|
||||
result = mi == ("c", "a")
|
||||
expected = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi == ("c",)
|
||||
expected = np.array([False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(["new1", "new2"])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(["new1", "new2"])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi4 = Index(mi.tolist(), tupleize_cols=False)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_equals_missing_values_differently_sorted():
|
||||
# GH#38439
|
||||
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
|
||||
assert not mi1.equals(mi2)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
assert mi1.equals(mi2)
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert not mi.is_(mi.set_names(["C", "D"]))
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
mi5 = mi5.set_levels(mi5.levels)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx._is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not is_any_real_numeric_dtype(idx)
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = Series([True, True])
|
||||
result = Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = Series([False, False])
|
||||
result = Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_ea_int_regular_int():
|
||||
# GH#46026
|
||||
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
assert not mi1.equals(mi2)
|
||||
assert not mi2.equals(mi1)
|
@ -0,0 +1,249 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
# GH1538
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with pd.option_context("display.multi_sparse", False):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.format()
|
||||
assert result[1] == "foo two"
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0],
|
||||
],
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = index.format()
|
||||
assert result[3] == "1 0 0 0"
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.option_context("display.encoding", "UTF-8"):
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
def test_repr_roundtrip_raises():
|
||||
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
eval(repr(mi))
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
|
||||
class TestRepr:
|
||||
def test_unicode_repr_issues(self):
|
||||
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
|
||||
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
repr(index.levels)
|
||||
repr(index.get_level_values(1))
|
||||
|
||||
def test_repr_max_seq_items_equal_to_n(self, idx):
|
||||
# display.max_seq_items == n
|
||||
with pd.option_context("display.max_seq_items", 6):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
def test_repr(self, idx):
|
||||
result = idx[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
with pd.option_context("display.max_seq_items", 5):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
...
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
# display.max_seq_items == 1
|
||||
with pd.option_context("display.max_seq_items", 1):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([...
|
||||
('qux', 'two')],
|
||||
names=['first', ...], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
def test_rjust(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
||||
result = mi[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[::500].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:08:20'),
|
||||
('abc', 10, '2000-01-01 00:16:40'),
|
||||
('abc', 10, '2000-01-01 00:25:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:00:01'),
|
||||
( 'a', 9, '2000-01-01 00:00:02'),
|
||||
( 'a', 9, '2000-01-01 00:00:03'),
|
||||
( 'a', 9, '2000-01-01 00:00:04'),
|
||||
( 'a', 9, '2000-01-01 00:00:05'),
|
||||
( 'a', 9, '2000-01-01 00:00:06'),
|
||||
( 'a', 9, '2000-01-01 00:00:07'),
|
||||
( 'a', 9, '2000-01-01 00:00:08'),
|
||||
( 'a', 9, '2000-01-01 00:00:09'),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10'),
|
||||
('abc', 10, '2000-01-01 00:33:11'),
|
||||
('abc', 10, '2000-01-01 00:33:12'),
|
||||
('abc', 10, '2000-01-01 00:33:13'),
|
||||
('abc', 10, '2000-01-01 00:33:14'),
|
||||
('abc', 10, '2000-01-01 00:33:15'),
|
||||
('abc', 10, '2000-01-01 00:33:16'),
|
||||
('abc', 10, '2000-01-01 00:33:17'),
|
||||
('abc', 10, '2000-01-01 00:33:18'),
|
||||
('abc', 10, '2000-01-01 00:33:19')],
|
||||
names=['a', 'b', 'dti'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_tuple_width(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
levels = [ci, ci.codes + 9, dti, dti, dti]
|
||||
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
||||
mi = MultiIndex.from_arrays(levels, names=names)
|
||||
result = mi[:1].__repr__()
|
||||
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
|
||||
assert result == expected
|
||||
|
||||
result = mi[:10].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
||||
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
||||
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
||||
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
||||
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
||||
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
||||
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
||||
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
||||
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
||||
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_multiindex_long_element(self):
|
||||
# Non-regression test towards GH#52960
|
||||
data = MultiIndex.from_tuples([("c" * 62,)])
|
||||
|
||||
expected = (
|
||||
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
|
||||
"cccccccccccccccccccccc',)],\n )"
|
||||
)
|
||||
assert str(data) == expected
|
@ -0,0 +1,124 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetLevelValues:
|
||||
def test_get_level_values_box_datetime64(self):
|
||||
dates = date_range("1/1/2000", periods=4)
|
||||
levels = [dates, [0, 1]]
|
||||
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
|
||||
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
assert isinstance(index.get_level_values(0)[0], Timestamp)
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "first"
|
||||
|
||||
result = idx.get_level_values("first")
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
|
||||
)
|
||||
|
||||
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH#17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH#17924
|
||||
arrays = [["a", "b", "b"], [1, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_when_periods():
|
||||
# GH33131. See also discussion in GH32669.
|
||||
# This test can probably be removed when PeriodIndex._engine is removed.
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
)
|
||||
|
||||
idx = MultiIndex.from_arrays(
|
||||
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
|
||||
)
|
||||
idx2 = MultiIndex.from_arrays(
|
||||
[idx._get_level_values(level) for level in range(idx.nlevels)]
|
||||
)
|
||||
assert all(x.is_monotonic_increasing for x in idx2.levels)
|
||||
|
||||
|
||||
def test_values_loses_freq_of_underlying_index():
|
||||
# GH#49054
|
||||
idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
|
||||
expected = idx.copy(deep=True)
|
||||
idx2 = Index([1, 2, 3])
|
||||
midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
|
||||
midx.values
|
||||
assert idx.freq is not None
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,384 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
msg = "Too many levels: Index has only 2 levels, not 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx._get_level_number(2)
|
||||
with pytest.raises(KeyError, match="Level fourth not found"):
|
||||
idx._get_level_number("fourth")
|
||||
|
||||
|
||||
def test_get_dtypes(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# Gh37062)
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
|
||||
names=["int", "string", "dt"],
|
||||
)
|
||||
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"int": np.dtype("int64"),
|
||||
"string": exp,
|
||||
"dt": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_no_level_name(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# GH38580 )
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
)
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"level_0": np.dtype("int64"),
|
||||
"level_1": exp,
|
||||
"level_2": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_duplicate_level_names(using_infer_string):
|
||||
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
|
||||
result = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
names=["A", "A", "A"],
|
||||
).dtypes
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
|
||||
index=["A", "A", "A"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
frame.index._get_level_number(2)
|
||||
with pytest.raises(IndexError, match="not a valid level number"):
|
||||
frame.index._get_level_number(-3)
|
||||
|
||||
|
||||
def test_set_name_methods(idx):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
index_names = ["first", "second"]
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
msg = "Length of names must match number of levels in MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
msg = "Can't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.levels = new_levels
|
||||
|
||||
msg = (
|
||||
"property 'codes' of 'MultiIndex' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_levels(["c"], level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
idx.set_levels("c", level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
idx.set_codes(1, level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match="Length of levels"):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of codes"):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="Names must be a"):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
|
||||
m = MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names("first", level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, level=0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(
|
||||
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
|
||||
)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
|
||||
lev2 = ["1", "2", "3"] * 2
|
||||
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
|
||||
df = pd.DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 4)),
|
||||
columns=["one", "two", "three", "four"],
|
||||
index=idx,
|
||||
)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
df.at[("grethe", "4"), "one"] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ["black"] * 3
|
||||
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
|
||||
|
||||
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_empty_level():
|
||||
# GH#48636
|
||||
midx = MultiIndex.from_arrays([[]], names=["A"])
|
||||
result = midx.set_levels(pd.DatetimeIndex([]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_levels_pos_args_removal():
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, "one"),
|
||||
(3, "one"),
|
||||
],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_levels(["a", "b", "c"], 0)
|
||||
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_codes([[0, 1], [1, 0]], 0)
|
||||
|
||||
|
||||
def test_set_levels_categorical_keep_dtype():
|
||||
# GH#52125
|
||||
midx = MultiIndex.from_arrays([[5, 6]])
|
||||
result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.Categorical([1, 2])])
|
||||
tm.assert_index_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,289 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
assert i.codes[0].dtype == "int8"
|
||||
assert i.codes[1].dtype == "int8"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(40)])
|
||||
assert i.codes[1].dtype == "int8"
|
||||
i = MultiIndex.from_product([["a"], range(400)])
|
||||
assert i.codes[1].dtype == "int16"
|
||||
i = MultiIndex.from_product([["a"], range(40000)])
|
||||
assert i.codes[1].dtype == "int32"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.NaT),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
(1, pd.Timestamp("2000-01-04")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
]
|
||||
result = MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10**18, 10**18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
|
||||
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
||||
|
||||
idx = MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq="D")
|
||||
|
||||
idx = MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = list(range(70000))
|
||||
minor_axis = list(range(10))
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(range(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_hash_collisions(monkeypatch):
|
||||
# non-smoke test that we don't get hash collisions
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(8), np.arange(8)], names=["one", "two"]
|
||||
)
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def test_take_invalid_kwargs():
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error(monkeypatch):
|
||||
# GH12527
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
df_below_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_below_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_below_cutoff.loc[(3, 0), "dest"]
|
||||
df_above_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_above_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_above_cutoff.loc[(3, 0), "dest"]
|
||||
|
||||
|
||||
def test_mi_hashtable_populated_attribute_error(monkeypatch):
|
||||
# GH 18165
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
r = range(50)
|
||||
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df["a"].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile("does not support mutable operations")
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert ind.is_monotonic_increasing
|
||||
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic_increasing
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat(
|
||||
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
|
||||
axis=1,
|
||||
)
|
||||
df.index.names = ["fizz", "buzz"]
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"df2": np.arange(100), "df1": np.arange(100)},
|
||||
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values("fizz")
|
||||
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values("buzz")
|
||||
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_isin_nan():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, True])
|
||||
)
|
||||
|
||||
|
||||
def test_isin_missing(nulls_fixture):
|
||||
# GH48905
|
||||
mi1 = MultiIndex.from_tuples([(1, nulls_fixture)])
|
||||
mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)])
|
||||
result = mi2.isin(mi1)
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [("foo", 2), ("bar", 3), ("quux", 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
|
||||
vals_0 = ["foo", "bar", "quux"]
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
msg = "Too many levels: Index has only 2 levels, not 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=5)
|
||||
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=-5)
|
||||
|
||||
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
|
||||
idx.isin(vals_0, level=1.0)
|
||||
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
|
||||
idx.isin(vals_1, level=-1.0)
|
||||
with pytest.raises(KeyError, match="'Level A not found'"):
|
||||
idx.isin(vals_1, level="A")
|
||||
|
||||
idx.names = ["A", "B"]
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
|
||||
|
||||
with pytest.raises(KeyError, match="'Level C not found'"):
|
||||
idx.isin(vals_1, level="C")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"labels,expected,level",
|
||||
[
|
||||
([("b", np.nan)], np.array([False, False, True]), None),
|
||||
([np.nan, "a"], np.array([True, True, False]), 0),
|
||||
(["d", np.nan], np.array([False, True, True]), 1),
|
||||
],
|
||||
)
|
||||
def test_isin_multi_index_with_missing_value(labels, expected, level):
|
||||
# GH 19132
|
||||
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
|
||||
result = midx.isin(labels, level=level)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_empty():
|
||||
# GH#51599
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
result = midx.isin([])
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_generator():
|
||||
# GH#52568
|
||||
midx = MultiIndex.from_tuples([(1, 2)])
|
||||
result = midx.isin(x for x in [(1, 2)])
|
||||
expected = np.array([True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,268 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
MultiIndex,
|
||||
Series,
|
||||
StringDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])]
|
||||
)
|
||||
def test_join_level(idx, other, join_type):
|
||||
join_index, lidx, ridx = other.join(
|
||||
idx, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
exp_level = other.join(idx.levels[1], how=join_type)
|
||||
assert join_index.levels[0].equals(idx.levels[0])
|
||||
assert join_index.levels[1].equals(exp_level)
|
||||
|
||||
# pare down levels
|
||||
mask = np.array([x[1] in exp_level for x in idx], dtype=bool)
|
||||
exp_values = idx.values[mask]
|
||||
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||||
|
||||
if join_type in ("outer", "inner"):
|
||||
join_index2, ridx2, lidx2 = idx.join(
|
||||
other, how=join_type, level="second", return_indexers=True
|
||||
)
|
||||
|
||||
assert join_index.equals(join_index2)
|
||||
tm.assert_numpy_array_equal(lidx, lidx2)
|
||||
tm.assert_numpy_array_equal(ridx, ridx2)
|
||||
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||||
|
||||
|
||||
def test_join_level_corner_case(idx):
|
||||
# some corner cases
|
||||
index = Index(["three", "one", "two"])
|
||||
result = index.join(idx, level="second")
|
||||
assert isinstance(result, MultiIndex)
|
||||
|
||||
with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"):
|
||||
idx.join(idx, level=1)
|
||||
|
||||
|
||||
def test_join_self(idx, join_type):
|
||||
result = idx.join(idx, how=join_type)
|
||||
expected = idx
|
||||
if join_type == "outer":
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_multi():
|
||||
# GH 10665
|
||||
midx = MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"])
|
||||
idx = Index([1, 2, 5], name="b")
|
||||
|
||||
# inner
|
||||
jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True)
|
||||
exp_idx = MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"])
|
||||
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||||
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, exp_idx)
|
||||
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
# keep MultiIndex
|
||||
jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True)
|
||||
exp_ridx = np.array(
|
||||
[-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp
|
||||
)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
# flip
|
||||
jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True)
|
||||
tm.assert_index_equal(jidx, midx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_multi_wrong_order():
|
||||
# GH 25760
|
||||
# GH 28956
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"])
|
||||
|
||||
join_idx, lidx, ridx = midx1.join(midx2, return_indexers=True)
|
||||
|
||||
exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp)
|
||||
|
||||
tm.assert_index_equal(midx1, join_idx)
|
||||
assert lidx is None
|
||||
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||||
|
||||
|
||||
def test_join_multi_return_indexers():
|
||||
# GH 34074
|
||||
|
||||
midx1 = MultiIndex.from_product([[1, 2], [3, 4], [5, 6]], names=["a", "b", "c"])
|
||||
midx2 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = midx1.join(midx2, return_indexers=False)
|
||||
tm.assert_index_equal(result, midx1)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_level():
|
||||
# GH 44096
|
||||
idx_1 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)), # interval limit is here at 3.0, not at 2.0
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
idx_2 = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, Interval(0.0, 1.0)),
|
||||
(1, Interval(1.0, 2.0)),
|
||||
(1, Interval(2.0, 5.0)),
|
||||
(2, Interval(0.0, 1.0)),
|
||||
(2, Interval(1.0, 3.0)),
|
||||
(2, Interval(3.0, 5.0)),
|
||||
],
|
||||
names=["num", "interval"],
|
||||
)
|
||||
result = idx_1.join(idx_2, how="outer")
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_midx_ea():
|
||||
# GH#49277
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")],
|
||||
names=["a", "b"],
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"]
|
||||
)
|
||||
result = midx.join(midx2, how="inner")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 1], dtype="Int64"),
|
||||
Series([1, 2], dtype="Int64"),
|
||||
Series([3, 3], dtype="Int64"),
|
||||
],
|
||||
names=["a", "b", "c"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_midx_string():
|
||||
# GH#49277
|
||||
midx = MultiIndex.from_arrays(
|
||||
[
|
||||
Series(["a", "a", "c"], dtype=StringDtype()),
|
||||
Series(["a", "b", "c"], dtype=StringDtype()),
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())],
|
||||
names=["a", "c"],
|
||||
)
|
||||
result = midx.join(midx2, how="inner")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series(["a", "a"], dtype=StringDtype()),
|
||||
Series(["a", "b"], dtype=StringDtype()),
|
||||
Series(["c", "c"], dtype=StringDtype()),
|
||||
],
|
||||
names=["a", "b", "c"],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_multi_with_nan():
|
||||
# GH29252
|
||||
df1 = DataFrame(
|
||||
data={"col1": [1.1, 1.2]},
|
||||
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
df2 = DataFrame(
|
||||
data={"col2": [2.1, 2.2]},
|
||||
index=MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
result = df1.join(df2)
|
||||
expected = DataFrame(
|
||||
data={"col1": [1.1, 1.2], "col2": [np.nan, 2.2]},
|
||||
index=MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [0, 5])
|
||||
def test_join_dtypes(any_numeric_ea_dtype, val):
|
||||
# GH#49830
|
||||
midx = MultiIndex.from_arrays([Series([1, 2], dtype=any_numeric_ea_dtype), [3, 4]])
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, val, val], dtype=any_numeric_ea_dtype), [3, 4, 4]]
|
||||
)
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([val, val, 1, 2], dtype=any_numeric_ea_dtype), [4, 4, 3, 4]]
|
||||
).sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_dtypes_all_nan(any_numeric_ea_dtype):
|
||||
# GH#49830
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [np.nan, np.nan]]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 0, 0], dtype=any_numeric_ea_dtype), [np.nan, np.nan, np.nan]]
|
||||
)
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([0, 0, 1, 2], dtype=any_numeric_ea_dtype),
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_join_index_levels():
|
||||
# GH#53093
|
||||
midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")])
|
||||
midx2 = MultiIndex.from_tuples([("a", "2019-01-31")])
|
||||
result = midx.join(midx2, how="outer")
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "2019-01-31"), ("a", "2019-02-01"), ("a", "2019-02-01")]
|
||||
)
|
||||
tm.assert_index_equal(result.levels[1], expected.levels[1])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,46 @@
|
||||
from pandas import MultiIndex
|
||||
|
||||
|
||||
class TestIsLexsorted:
|
||||
def test_is_lexsorted(self):
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
)
|
||||
assert index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
|
||||
)
|
||||
assert not index._is_lexsorted()
|
||||
assert index._lexsort_depth == 0
|
||||
|
||||
|
||||
class TestLexsortDepth:
|
||||
def test_lexsort_depth(self):
|
||||
# Test that lexsort_depth return the correct sortorder
|
||||
# when it was given to the MultiIndex const.
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
assert index._lexsort_depth == 2
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
|
||||
)
|
||||
assert index._lexsort_depth == 1
|
||||
|
||||
index = MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
|
||||
)
|
||||
assert index._lexsort_depth == 0
|
@ -0,0 +1,111 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fillna(idx):
|
||||
# GH 11343
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
|
||||
|
||||
def test_dropna():
|
||||
# GH 6194
|
||||
idx = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, np.nan, 3, np.nan, 5],
|
||||
[1, 2, np.nan, np.nan, 5],
|
||||
["a", "b", "c", np.nan, "e"],
|
||||
]
|
||||
)
|
||||
|
||||
exp = MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
|
||||
tm.assert_index_equal(idx.dropna(), exp)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), exp)
|
||||
|
||||
exp = MultiIndex.from_arrays(
|
||||
[[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), exp)
|
||||
|
||||
msg = "invalid how option: xxx"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.dropna(how="xxx")
|
||||
|
||||
# GH26408
|
||||
# test if missing values are dropped for multiindex constructed
|
||||
# from codes and values
|
||||
idx = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]],
|
||||
codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
|
||||
)
|
||||
expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
|
||||
tm.assert_index_equal(idx.dropna(), expected)
|
||||
tm.assert_index_equal(idx.dropna(how="any"), expected)
|
||||
|
||||
expected = MultiIndex.from_arrays(
|
||||
[[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]
|
||||
)
|
||||
tm.assert_index_equal(idx.dropna(how="all"), expected)
|
||||
|
||||
|
||||
def test_nulls(idx):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="isna is not defined for MultiIndex")
|
||||
def test_hasnans_isnans(idx):
|
||||
# GH 11343, added tests for hasnans / isnans
|
||||
index = idx.copy()
|
||||
|
||||
# cases in indices doesn't include NaN
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is False
|
||||
|
||||
index = idx.copy()
|
||||
values = index.values
|
||||
values[1] = np.nan
|
||||
|
||||
index = type(idx)(values)
|
||||
|
||||
expected = np.array([False] * len(index), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(index._isnan, expected)
|
||||
assert index.hasnans is True
|
||||
|
||||
|
||||
def test_nan_stays_float():
|
||||
# GH 7031
|
||||
idx0 = MultiIndex(levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1])
|
||||
idx1 = MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1])
|
||||
idxm = idx0.join(idx1, how="outer")
|
||||
assert pd.isna(idx0.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||||
|
||||
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||||
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||||
dfm = df0 - df1
|
||||
assert pd.isna(df0.index.get_level_values(1)).all()
|
||||
# the following failed in 0.14.1
|
||||
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||||
|
||||
|
||||
def test_tuples_have_na():
|
||||
index = MultiIndex(
|
||||
levels=[[1, 0], [0, 1, 2, 3]],
|
||||
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
|
||||
)
|
||||
|
||||
assert pd.isna(index[4][0])
|
||||
assert pd.isna(index.values[4][0])
|
@ -0,0 +1,188 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_increasing_lexsorted(lexsorted_two_level_string_multiindex):
|
||||
# string ordering
|
||||
mi = lexsorted_two_level_string_multiindex
|
||||
assert mi.is_monotonic_increasing is False
|
||||
assert Index(mi.values).is_monotonic_increasing is False
|
||||
assert mi._is_strictly_monotonic_increasing is False
|
||||
assert Index(mi.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_monotonic_increasing():
|
||||
i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"])
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]])
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
assert Index(i.values).is_monotonic_increasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[1, 2, 3, 4],
|
||||
[
|
||||
"gb00b03mlx29",
|
||||
"lu0197800237",
|
||||
"nl0000289783",
|
||||
"nl0000289965",
|
||||
"nl0000301109",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_increasing is False
|
||||
assert i._is_strictly_monotonic_increasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_increasing is True
|
||||
assert Index(i.values).is_monotonic_increasing is True
|
||||
assert i._is_strictly_monotonic_increasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_increasing is True
|
||||
|
||||
|
||||
def test_is_monotonic_decreasing():
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10), np.arange(10, 0, -1)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product(
|
||||
[np.arange(10, 0, -1), np.arange(10)], names=["one", "two"]
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]])
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# string ordering
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert Index(i.values).is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is False
|
||||
|
||||
i = MultiIndex(
|
||||
levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]],
|
||||
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# mixed levels, hits the TypeError
|
||||
i = MultiIndex(
|
||||
levels=[
|
||||
[4, 3, 2, 1],
|
||||
[
|
||||
"nl0000301109",
|
||||
"nl0000289965",
|
||||
"nl0000289783",
|
||||
"lu0197800237",
|
||||
"gb00b03mlx29",
|
||||
],
|
||||
],
|
||||
codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||||
names=["household_id", "asset_id"],
|
||||
)
|
||||
|
||||
assert i.is_monotonic_decreasing is False
|
||||
assert i._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
i = MultiIndex.from_arrays([[], []])
|
||||
assert i.is_monotonic_decreasing is True
|
||||
assert Index(i.values).is_monotonic_decreasing is True
|
||||
assert i._is_strictly_monotonic_decreasing is True
|
||||
assert Index(i.values)._is_strictly_monotonic_decreasing is True
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_increasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
|
||||
|
||||
def test_is_strictly_monotonic_decreasing():
|
||||
idx = MultiIndex(
|
||||
levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]]
|
||||
)
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"])
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]],
|
||||
)
|
||||
def test_is_monotonic_with_nans(values, attr):
|
||||
# GH: 37220
|
||||
idx = MultiIndex.from_tuples(values, names=["test"])
|
||||
assert getattr(idx, attr) is False
|
@ -0,0 +1,201 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_level_names(index, names):
|
||||
assert [level.name for level in index.levels] == list(names)
|
||||
|
||||
|
||||
def test_slice_keep_name():
|
||||
x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"])
|
||||
assert x[1:].names == x.names
|
||||
|
||||
|
||||
def test_index_name_retained():
|
||||
# GH9857
|
||||
result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]})
|
||||
result = result.set_index("z")
|
||||
result.loc[10] = [9, 10]
|
||||
df_expected = pd.DataFrame(
|
||||
{"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]}
|
||||
)
|
||||
df_expected = df_expected.set_index("z")
|
||||
tm.assert_frame_equal(result, df_expected)
|
||||
|
||||
|
||||
def test_changing_names(idx):
|
||||
assert [level.name for level in idx.levels] == ["first", "second"]
|
||||
|
||||
view = idx.view()
|
||||
copy = idx.copy()
|
||||
shallow_copy = idx._view()
|
||||
|
||||
# changing names should not change level names on object
|
||||
new_names = [name + "a" for name in idx.names]
|
||||
idx.names = new_names
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
# and not on copies
|
||||
check_level_names(view, ["first", "second"])
|
||||
check_level_names(copy, ["first", "second"])
|
||||
check_level_names(shallow_copy, ["first", "second"])
|
||||
|
||||
# and copies shouldn't change original
|
||||
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||||
check_level_names(idx, ["firsta", "seconda"])
|
||||
|
||||
|
||||
def test_take_preserve_name(idx):
|
||||
taken = idx.take([3, 0, 1])
|
||||
assert taken.names == idx.names
|
||||
|
||||
|
||||
def test_copy_names():
|
||||
# Check that adding a "names" parameter to the copy is honored
|
||||
# GH14302
|
||||
multi_idx = MultiIndex.from_tuples([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
|
||||
multi_idx1 = multi_idx.copy()
|
||||
|
||||
assert multi_idx.equals(multi_idx1)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx1.names == ["MyName1", "MyName2"]
|
||||
|
||||
multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx2)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx2.names == ["NewName1", "NewName2"]
|
||||
|
||||
multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"])
|
||||
|
||||
assert multi_idx.equals(multi_idx3)
|
||||
assert multi_idx.names == ["MyName1", "MyName2"]
|
||||
assert multi_idx3.names == ["NewName1", "NewName2"]
|
||||
|
||||
# gh-35592
|
||||
with pytest.raises(ValueError, match="Length of new names must be 2, got 1"):
|
||||
multi_idx.copy(names=["mario"])
|
||||
|
||||
with pytest.raises(TypeError, match="MultiIndex.name must be a hashable type"):
|
||||
multi_idx.copy(names=[["mario"], ["luigi"]])
|
||||
|
||||
|
||||
def test_names(idx):
|
||||
# names are assigned in setup
|
||||
assert idx.names == ["first", "second"]
|
||||
level_names = [level.name for level in idx.levels]
|
||||
assert level_names == idx.names
|
||||
|
||||
# setting bad names on existing
|
||||
index = idx
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", list(index.names) + ["third"])
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
setattr(index, "names", [])
|
||||
|
||||
# initializing with bad names (should always be equivalent)
|
||||
major_axis, minor_axis = idx.levels
|
||||
major_codes, minor_codes = idx.codes
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first"],
|
||||
)
|
||||
with pytest.raises(ValueError, match="^Length of names"):
|
||||
MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["first", "second", "third"],
|
||||
)
|
||||
|
||||
# names are assigned on index, but not transferred to the levels
|
||||
index.names = ["a", "b"]
|
||||
level_names = [level.name for level in index.levels]
|
||||
assert level_names == ["a", "b"]
|
||||
|
||||
|
||||
def test_duplicate_level_names_access_raises(idx):
|
||||
# GH19029
|
||||
idx.names = ["foo", "foo"]
|
||||
with pytest.raises(ValueError, match="name foo occurs multiple times"):
|
||||
idx._get_level_number("foo")
|
||||
|
||||
|
||||
def test_get_names_from_levels():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
|
||||
assert idx.levels[0].name == "a"
|
||||
assert idx.levels[1].name == "b"
|
||||
|
||||
|
||||
def test_setting_names_from_levels_raises():
|
||||
idx = MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[0].name = "foo"
|
||||
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
idx.levels[1].name = "foo"
|
||||
|
||||
new = pd.Series(1, index=idx.levels[0])
|
||||
with pytest.raises(RuntimeError, match="set_names"):
|
||||
new.index.name = "bar"
|
||||
|
||||
assert pd.Index._no_setting_name is False
|
||||
assert pd.RangeIndex._no_setting_name is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y", "z"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x", "z"]),
|
||||
({"y": "z"}, ["x", "z", "x"]),
|
||||
({}, ["x", "y", "x"]),
|
||||
({"z": "a"}, ["x", "y", "x"]),
|
||||
({"y": "z", "a": "b"}, ["x", "z", "x"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like_duplicate_names(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=["x", "y", "x"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4], [5, 6]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["rename", "set_names"])
|
||||
@pytest.mark.parametrize(
|
||||
"rename_dict, exp_names",
|
||||
[
|
||||
({"x": "z"}, ["z", "y"]),
|
||||
({"x": "z", "y": "x"}, ["z", "x"]),
|
||||
({"a": "z"}, ["x", "y"]),
|
||||
({}, ["x", "y"]),
|
||||
],
|
||||
)
|
||||
def test_name_mi_with_dict_like(func, rename_dict, exp_names):
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
result = getattr(mi, func)(rename_dict)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=exp_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_index_name_with_dict_like_raising():
|
||||
# GH#20421
|
||||
ix = pd.Index([1, 2])
|
||||
msg = "Can only pass dict-like as `names` for MultiIndex."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ix.set_names({"x": "z"})
|
||||
|
||||
|
||||
def test_multiindex_name_and_level_raising():
|
||||
# GH#20421
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"])
|
||||
with pytest.raises(TypeError, match="Can not pass level for dictlike `names`."):
|
||||
mi.set_names(names={"x": "z"}, level={"x": "z"})
|
@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
# c1
|
||||
# 2016-01-01 00:00:00 a 0
|
||||
# b 1
|
||||
# c 2
|
||||
# 2016-01-01 12:00:00 a 3
|
||||
# b 4
|
||||
# c 5
|
||||
# 2016-01-02 00:00:00 a 6
|
||||
# b 7
|
||||
# c 8
|
||||
# 2016-01-02 12:00:00 a 9
|
||||
# b 10
|
||||
# c 11
|
||||
# 2016-01-03 00:00:00 a 12
|
||||
# b 13
|
||||
# c 14
|
||||
dr = date_range("2016-01-01", "2016-01-03", freq="12h")
|
||||
abc = ["a", "b", "c"]
|
||||
mi = MultiIndex.from_product([dr, abc])
|
||||
frame = DataFrame({"c1": range(15)}, index=mi)
|
||||
return frame
|
||||
|
||||
|
||||
def test_partial_string_matching_single_index(df):
|
||||
# partial string matching on a single index
|
||||
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
|
||||
df_swap = df_swap.sort_index()
|
||||
just_a = df_swap.loc["a"]
|
||||
result = just_a.loc["2016-01-01"]
|
||||
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
|
||||
expected.index = expected.index.droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_loc_partial_timestamp_multiindex(df):
|
||||
mi = df.index
|
||||
key = ("2016-01-01", "a")
|
||||
loc = mi.get_loc(key)
|
||||
|
||||
expected = np.zeros(len(mi), dtype=bool)
|
||||
expected[[0, 3]] = True
|
||||
tm.assert_numpy_array_equal(loc, expected)
|
||||
|
||||
key2 = ("2016-01-02", "a")
|
||||
loc2 = mi.get_loc(key2)
|
||||
expected2 = np.zeros(len(mi), dtype=bool)
|
||||
expected2[[6, 9]] = True
|
||||
tm.assert_numpy_array_equal(loc2, expected2)
|
||||
|
||||
key3 = ("2016-01", "a")
|
||||
loc3 = mi.get_loc(key3)
|
||||
expected3 = np.zeros(len(mi), dtype=bool)
|
||||
expected3[mi.get_level_values(1).get_loc("a")] = True
|
||||
tm.assert_numpy_array_equal(loc3, expected3)
|
||||
|
||||
key4 = ("2016", "a")
|
||||
loc4 = mi.get_loc(key4)
|
||||
expected4 = expected3
|
||||
tm.assert_numpy_array_equal(loc4, expected4)
|
||||
|
||||
# non-monotonic
|
||||
taker = np.arange(len(mi), dtype=np.intp)
|
||||
taker[::2] = taker[::-2]
|
||||
mi2 = mi.take(taker)
|
||||
loc5 = mi2.get_loc(key)
|
||||
expected5 = np.zeros(len(mi2), dtype=bool)
|
||||
expected5[[3, 14]] = True
|
||||
tm.assert_numpy_array_equal(loc5, expected5)
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex(df):
|
||||
# GH10331
|
||||
df_swap = df.swaplevel(0, 1).sort_index()
|
||||
SLC = IndexSlice
|
||||
|
||||
# indexing with IndexSlice
|
||||
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
|
||||
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on year only
|
||||
result = df.loc["2016"]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date
|
||||
result = df.loc["2016-01-01"]
|
||||
expected = df.iloc[0:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on date and hour, from middle
|
||||
result = df.loc["2016-01-02 12"]
|
||||
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
|
||||
# that level, so that level gets dropped
|
||||
expected = df.iloc[9:12].droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# partial string match on secondary index
|
||||
result = df_swap.loc[SLC[:, "2016-01-02"], :]
|
||||
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# tuple selector with partial string match on date
|
||||
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
|
||||
result = df.loc[("2016-01-01", "a"), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
expected = df.iloc[[0, 3]].droplevel(1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Slicing date on first level should break (of course) bc the DTI is the
|
||||
# second level on df_swap
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df_swap.loc["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_str_key_raises(df):
|
||||
# Even though this syntax works on a single index, this is somewhat
|
||||
# ambiguous and we don't want to extend this behavior forward to work
|
||||
# in multi-indexes. This would amount to selecting a scalar from a
|
||||
# column.
|
||||
with pytest.raises(KeyError, match="'2016-01-01'"):
|
||||
df["2016-01-01"]
|
||||
|
||||
|
||||
def test_partial_string_timestamp_multiindex_daily_resolution(df):
|
||||
# GH12685 (partial string with daily resolution or below)
|
||||
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
|
||||
expected = df.iloc[118:180]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,10 @@
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
|
||||
|
||||
def test_pickle_compat_construction():
|
||||
# this is testing for pickle compat
|
||||
# need an object to create with
|
||||
with pytest.raises(TypeError, match="Must pass both levels and codes"):
|
||||
MultiIndex()
|
@ -0,0 +1,174 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_reindex(idx):
|
||||
result, indexer = idx.reindex(list(idx[:4]))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
result, indexer = idx.reindex(list(idx))
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert indexer is None
|
||||
assert result.names == ["first", "second"]
|
||||
assert [level.name for level in result.levels] == ["first", "second"]
|
||||
|
||||
|
||||
def test_reindex_level(idx):
|
||||
index = Index(["one"])
|
||||
|
||||
target, indexer = idx.reindex(index, level="second")
|
||||
target2, indexer2 = index.reindex(idx, level="second")
|
||||
|
||||
exp_index = idx.join(index, level="second", how="right")
|
||||
exp_index2 = idx.join(index, level="second", how="left")
|
||||
|
||||
assert target.equals(exp_index)
|
||||
exp_indexer = np.array([0, 2, 4])
|
||||
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||||
|
||||
assert target2.equals(exp_index2)
|
||||
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||||
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||||
|
||||
with pytest.raises(TypeError, match="Fill method not supported"):
|
||||
idx.reindex(idx, method="pad", level="second")
|
||||
|
||||
|
||||
def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
|
||||
# GH6552
|
||||
idx = idx.copy()
|
||||
target = idx.copy()
|
||||
idx.names = target.names = [None, None]
|
||||
|
||||
other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
|
||||
# list & ndarray cases
|
||||
assert idx.reindex([])[0].names == [None, None]
|
||||
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||||
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(target.values)[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||||
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||||
|
||||
idx.names = ["foo", "bar"]
|
||||
assert idx.reindex([])[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(target.values)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
|
||||
assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
|
||||
assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
|
||||
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
|
||||
|
||||
|
||||
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
|
||||
using_infer_string,
|
||||
):
|
||||
# GH7774
|
||||
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||||
exp = np.object_ if not using_infer_string else str
|
||||
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
|
||||
|
||||
# case with EA levels
|
||||
cat = pd.Categorical(["foo", "bar"])
|
||||
dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
|
||||
mi = MultiIndex.from_product([cat, dti])
|
||||
assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
|
||||
assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
|
||||
|
||||
|
||||
def test_reindex_base(idx):
|
||||
expected = np.arange(idx.size, dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
|
||||
def test_reindex_non_unique():
|
||||
idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
|
||||
a = pd.Series(np.arange(4), index=idx)
|
||||
new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
|
||||
|
||||
msg = "cannot handle a non-unique multi-index!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.reindex(new_idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
||||
def test_reindex_empty_with_level(values):
|
||||
# GH41170
|
||||
idx = MultiIndex.from_arrays(values)
|
||||
result, result_indexer = idx.reindex(np.array(["b"]), level=0)
|
||||
expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
|
||||
expected_indexer = np.array([], dtype=result_indexer.dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
|
||||
def test_reindex_not_all_tuples():
|
||||
keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
|
||||
mi = MultiIndex.from_tuples(keys[:-1])
|
||||
idx = Index(keys)
|
||||
res, indexer = mi.reindex(idx)
|
||||
|
||||
tm.assert_index_equal(res, idx)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, expected)
|
||||
|
||||
|
||||
def test_reindex_limit_arg_with_multiindex():
|
||||
# GH21247
|
||||
|
||||
idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
|
||||
|
||||
df = pd.Series([0.02, 0.01, 0.012], index=idx)
|
||||
|
||||
new_idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(3, "A"),
|
||||
(3, "B"),
|
||||
(4, "A"),
|
||||
(4, "B"),
|
||||
(4, "C"),
|
||||
(5, "B"),
|
||||
(5, "C"),
|
||||
(6, "B"),
|
||||
(6, "C"),
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="limit argument only valid if doing pad, backfill or nearest reindexing",
|
||||
):
|
||||
df.reindex(new_idx, fill_value=0, limit=1)
|
||||
|
||||
|
||||
def test_reindex_with_none_in_nested_multiindex():
|
||||
# GH42883
|
||||
index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
|
||||
index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
|
||||
df1_dtype = pd.DataFrame([1, 2], index=index)
|
||||
df2_dtype = pd.DataFrame([2, 1], index=index2)
|
||||
|
||||
result = df1_dtype.reindex_like(df2_dtype)
|
||||
expected = df2_dtype
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,224 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_insert(idx):
|
||||
# key contained in all levels
|
||||
new_index = idx.insert(0, ("bar", "two"))
|
||||
assert new_index.equal_levels(idx)
|
||||
assert new_index[0] == ("bar", "two")
|
||||
|
||||
# key not contained in all levels
|
||||
new_index = idx.insert(0, ("abc", "three"))
|
||||
|
||||
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
|
||||
tm.assert_index_equal(new_index.levels[0], exp0)
|
||||
assert new_index.names == ["first", "second"]
|
||||
|
||||
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
|
||||
tm.assert_index_equal(new_index.levels[1], exp1)
|
||||
assert new_index[0] == ("abc", "three")
|
||||
|
||||
# key wrong length
|
||||
msg = "Item must have length equal to number of levels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.insert(0, ("foo2",))
|
||||
|
||||
left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
|
||||
left.set_index(["1st", "2nd"], inplace=True)
|
||||
ts = left["3rd"].copy(deep=True)
|
||||
|
||||
left.loc[("b", "x"), "3rd"] = 2
|
||||
left.loc[("b", "a"), "3rd"] = -1
|
||||
left.loc[("b", "b"), "3rd"] = 3
|
||||
left.loc[("a", "x"), "3rd"] = 4
|
||||
left.loc[("a", "w"), "3rd"] = 5
|
||||
left.loc[("a", "a"), "3rd"] = 6
|
||||
|
||||
ts.loc[("b", "x")] = 2
|
||||
ts.loc["b", "a"] = -1
|
||||
ts.loc[("b", "b")] = 3
|
||||
ts.loc["a", "x"] = 4
|
||||
ts.loc[("a", "w")] = 5
|
||||
ts.loc["a", "a"] = 6
|
||||
|
||||
right = pd.DataFrame(
|
||||
[
|
||||
["a", "b", 0],
|
||||
["b", "d", 1],
|
||||
["b", "x", 2],
|
||||
["b", "a", -1],
|
||||
["b", "b", 3],
|
||||
["a", "x", 4],
|
||||
["a", "w", 5],
|
||||
["a", "a", 6],
|
||||
],
|
||||
columns=["1st", "2nd", "3rd"],
|
||||
)
|
||||
right.set_index(["1st", "2nd"], inplace=True)
|
||||
# FIXME data types changes to float because
|
||||
# of intermediate nan insertion;
|
||||
tm.assert_frame_equal(left, right, check_dtype=False)
|
||||
tm.assert_series_equal(ts, right["3rd"])
|
||||
|
||||
|
||||
def test_insert2():
|
||||
# GH9250
|
||||
idx = (
|
||||
[("test1", i) for i in range(5)]
|
||||
+ [("test2", i) for i in range(6)]
|
||||
+ [("test", 17), ("test", 18)]
|
||||
)
|
||||
|
||||
left = pd.Series(np.linspace(0, 10, 11), MultiIndex.from_tuples(idx[:-2]))
|
||||
|
||||
left.loc[("test", 17)] = 11
|
||||
left.loc[("test", 18)] = 12
|
||||
|
||||
right = pd.Series(np.linspace(0, 12, 13), MultiIndex.from_tuples(idx))
|
||||
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
|
||||
def test_append(idx):
|
||||
result = idx[:3].append(idx[3:])
|
||||
assert result.equals(idx)
|
||||
|
||||
foos = [idx[:1], idx[1:3], idx[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
assert result.equals(idx)
|
||||
|
||||
# empty
|
||||
result = idx.append([])
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_append_index():
|
||||
idx1 = Index([1.1, 1.2, 1.3])
|
||||
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
|
||||
idx3 = Index(["A", "B", "C"])
|
||||
|
||||
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
|
||||
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
|
||||
|
||||
result = idx1.append(midx_lv2)
|
||||
|
||||
# see gh-7112
|
||||
tz = pytz.timezone("Asia/Tokyo")
|
||||
expected_tuples = [
|
||||
(1.1, tz.localize(datetime(2011, 1, 1))),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2))),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3))),
|
||||
]
|
||||
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(idx1)
|
||||
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv2)
|
||||
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv2.append(midx_lv3)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx_lv3.append(midx_lv2)
|
||||
expected = Index._simple_new(
|
||||
np.array(
|
||||
[
|
||||
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
|
||||
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
|
||||
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
|
||||
]
|
||||
+ expected_tuples,
|
||||
dtype=object,
|
||||
),
|
||||
None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)])
|
||||
def test_append_names_match(name, exp):
|
||||
# GH#48288
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name])
|
||||
result = midx.append(midx2)
|
||||
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_names_dont_match():
|
||||
# GH#48288
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"])
|
||||
result = midx.append(midx2)
|
||||
expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_overlapping_interval_levels():
|
||||
# GH 54934
|
||||
ivl1 = pd.IntervalIndex.from_breaks([0.0, 1.0, 2.0])
|
||||
ivl2 = pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5])
|
||||
mi1 = MultiIndex.from_product([ivl1, ivl1])
|
||||
mi2 = MultiIndex.from_product([ivl2, ivl2])
|
||||
result = mi1.append(mi2)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[
|
||||
(pd.Interval(0.0, 1.0), pd.Interval(0.0, 1.0)),
|
||||
(pd.Interval(0.0, 1.0), pd.Interval(1.0, 2.0)),
|
||||
(pd.Interval(1.0, 2.0), pd.Interval(0.0, 1.0)),
|
||||
(pd.Interval(1.0, 2.0), pd.Interval(1.0, 2.0)),
|
||||
(pd.Interval(0.5, 1.5), pd.Interval(0.5, 1.5)),
|
||||
(pd.Interval(0.5, 1.5), pd.Interval(1.5, 2.5)),
|
||||
(pd.Interval(1.5, 2.5), pd.Interval(0.5, 1.5)),
|
||||
(pd.Interval(1.5, 2.5), pd.Interval(1.5, 2.5)),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(m.repeat(reps), expected)
|
||||
|
||||
|
||||
def test_insert_base(idx):
|
||||
result = idx[1:4]
|
||||
|
||||
# test 0th element
|
||||
assert idx[0:4].equals(result.insert(0, idx[0]))
|
||||
|
||||
|
||||
def test_delete_base(idx):
|
||||
expected = idx[1:]
|
||||
result = idx.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = idx[:-1]
|
||||
result = idx.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
msg = "index 6 is out of bounds for axis 0 with size 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.delete(len(idx))
|
@ -0,0 +1,772 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import (
|
||||
is_float_dtype,
|
||||
is_unsigned_integer_dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("case", [0.5, "xxx"])
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_ops_error_cases(idx, case, sort, method):
|
||||
# non-iterable input
|
||||
msg = "Input must be Index or array-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)(case, sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_intersection_base(idx, sort, klass):
|
||||
first = idx[2::-1] # first 3 elements reversed
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
intersect = first.intersection(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(intersect, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.intersection([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
@pytest.mark.parametrize("klass", [MultiIndex, np.array, Series, list])
|
||||
def test_union_base(idx, sort, klass):
|
||||
first = idx[::-1]
|
||||
second = idx[:5]
|
||||
|
||||
if klass is not MultiIndex:
|
||||
second = klass(second.values)
|
||||
|
||||
union = first.union(second, sort=sort)
|
||||
if sort is None:
|
||||
expected = first.sort_values()
|
||||
else:
|
||||
expected = first
|
||||
tm.assert_index_equal(union, expected)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.union([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_difference_base(idx, sort):
|
||||
second = idx[4:]
|
||||
answer = idx[:4]
|
||||
result = idx.difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
assert result.equals(answer)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = idx.difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_symmetric_difference(idx, sort):
|
||||
first = idx[1:]
|
||||
second = idx[:-1]
|
||||
answer = idx[[-1, 0]]
|
||||
result = first.symmetric_difference(second, sort=sort)
|
||||
|
||||
if sort is None:
|
||||
answer = answer.sort_values()
|
||||
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.symmetric_difference(case, sort=sort)
|
||||
tm.assert_index_equal(result, answer)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.symmetric_difference([1, 2, 3], sort=sort)
|
||||
|
||||
|
||||
def test_multiindex_symmetric_difference():
|
||||
# GH 13490
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"])
|
||||
result = idx.symmetric_difference(idx)
|
||||
assert result.names == idx.names
|
||||
|
||||
idx2 = idx.copy().rename(["A", "B"])
|
||||
result = idx.symmetric_difference(idx2)
|
||||
assert result.names == [None, None]
|
||||
|
||||
|
||||
def test_empty(idx):
|
||||
# GH 15270
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
|
||||
def test_difference(idx, sort):
|
||||
first = idx
|
||||
result = first.difference(idx[-3:], sort=sort)
|
||||
vals = idx[:-3].values
|
||||
|
||||
if sort is None:
|
||||
vals = sorted(vals)
|
||||
|
||||
expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names)
|
||||
|
||||
assert isinstance(result, MultiIndex)
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty difference: reflexive
|
||||
result = idx.difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: superset
|
||||
result = idx[-3:].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# empty difference: degenerate
|
||||
result = idx[:0].difference(idx, sort=sort)
|
||||
expected = idx[:0]
|
||||
assert result.equals(expected)
|
||||
assert result.names == idx.names
|
||||
|
||||
# names not the same
|
||||
chunklet = idx[-3:]
|
||||
chunklet.names = ["foo", "baz"]
|
||||
result = first.difference(chunklet, sort=sort)
|
||||
assert result.names == (None, None)
|
||||
|
||||
# empty, but non-equal
|
||||
result = idx.difference(idx.sortlevel(1)[0], sort=sort)
|
||||
assert len(result) == 0
|
||||
|
||||
# raise Exception called with non-MultiIndex
|
||||
result = first.difference(first.values, sort=sort)
|
||||
assert result.equals(first[:0])
|
||||
|
||||
# name from empty array
|
||||
result = first.difference([], sort=sort)
|
||||
assert first.equals(result)
|
||||
assert first.names == result.names
|
||||
|
||||
# name from non-empty array
|
||||
result = first.difference([("foo", "one")], sort=sort)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")]
|
||||
)
|
||||
expected.names = first.names
|
||||
assert first.names == result.names
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.difference([1, 2, 3, 4, 5], sort=sort)
|
||||
|
||||
|
||||
def test_difference_sort_special():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
# sort=None, the default
|
||||
result = idx.difference([])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_difference_sort_special_true():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
result = idx.difference([], sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
# sort=None, the default
|
||||
msg = "sort order is undefined for incomparable objects"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result = idx.difference(other)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.difference(other, sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_difference_sort_incomparable_true():
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]])
|
||||
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
|
||||
|
||||
# TODO: this is raising in constructing a Categorical when calling
|
||||
# algos.safe_sort. Should we catch and re-raise with a better message?
|
||||
msg = "'values' is not ordered, please explicitly specify the categories order "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.difference(other, sort=True)
|
||||
|
||||
|
||||
def test_union(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_union = piece1.union(piece2, sort=sort)
|
||||
|
||||
if sort in (None, False):
|
||||
tm.assert_index_equal(the_union.sort_values(), idx.sort_values())
|
||||
else:
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
# corner case, pass self or empty thing:
|
||||
the_union = idx.union(idx, sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
the_union = idx.union(idx[:0], sort=sort)
|
||||
tm.assert_index_equal(the_union, idx)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx[:4].union(tuples[4:], sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result.sort_values(), idx.sort_values())
|
||||
else:
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
def test_union_with_regular_index(idx, using_infer_string):
|
||||
other = Index(["A", "B", "C"])
|
||||
|
||||
result = other.union(idx)
|
||||
assert ("foo", "one") in result
|
||||
assert "B" in result
|
||||
|
||||
if using_infer_string:
|
||||
with pytest.raises(NotImplementedError, match="Can only union"):
|
||||
idx.union(other)
|
||||
else:
|
||||
msg = "The values in the array are unorderable"
|
||||
with tm.assert_produces_warning(RuntimeWarning, match=msg):
|
||||
result2 = idx.union(other)
|
||||
# This is more consistent now, if sorting fails then we don't sort at all
|
||||
# in the MultiIndex case.
|
||||
assert not result.equals(result2)
|
||||
|
||||
|
||||
def test_intersection(idx, sort):
|
||||
piece1 = idx[:5][::-1]
|
||||
piece2 = idx[3:]
|
||||
|
||||
the_int = piece1.intersection(piece2, sort=sort)
|
||||
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(the_int, idx[3:5])
|
||||
else:
|
||||
tm.assert_index_equal(the_int.sort_values(), idx[3:5])
|
||||
|
||||
# corner case, pass self
|
||||
the_int = idx.intersection(idx, sort=sort)
|
||||
tm.assert_index_equal(the_int, idx)
|
||||
|
||||
# empty intersection: disjoint
|
||||
empty = idx[:2].intersection(idx[2:], sort=sort)
|
||||
expected = idx[:0]
|
||||
assert empty.equals(expected)
|
||||
|
||||
tuples = idx.values
|
||||
result = idx.intersection(tuples)
|
||||
assert result.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["intersection", "union", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setop_with_categorical(idx, sort, method):
|
||||
other = idx.to_flat_index().astype("category")
|
||||
res_names = [None] * idx.nlevels
|
||||
|
||||
result = getattr(idx, method)(other, sort=sort)
|
||||
expected = getattr(idx, method)(idx, sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = getattr(idx, method)(other[:5], sort=sort)
|
||||
expected = getattr(idx, method)(idx[:5], sort=sort).rename(res_names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_non_object(idx, sort):
|
||||
other = Index(range(3), name="foo")
|
||||
|
||||
result = idx.intersection(other, sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
|
||||
result = idx.intersection(np.asarray(other)[:0], sort=sort)
|
||||
expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
msg = "other must be a MultiIndex or a list of tuples"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# With non-zero length non-index, we try and fail to convert to tuples
|
||||
idx.intersection(np.asarray(other), sort=sort)
|
||||
|
||||
|
||||
def test_intersect_equal_sort():
|
||||
# GH-24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
|
||||
def test_intersect_equal_sort_true():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
result = idx.intersection(idx, sort=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
|
||||
def test_union_sort_other_empty(slice_):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
other = idx[slice_]
|
||||
tm.assert_index_equal(idx.union(other), idx)
|
||||
tm.assert_index_equal(other.union(idx), idx)
|
||||
|
||||
# sort=False
|
||||
tm.assert_index_equal(idx.union(other, sort=False), idx)
|
||||
|
||||
|
||||
def test_union_sort_other_empty_sort():
|
||||
idx = MultiIndex.from_product([[1, 0], ["a", "b"]])
|
||||
other = idx[:0]
|
||||
result = idx.union(other, sort=True)
|
||||
expected = MultiIndex.from_product([[0, 1], ["a", "b"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable():
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
|
||||
# default, sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
def test_union_sort_other_incomparable_sort():
|
||||
idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]])
|
||||
msg = "'<' not supported between instances of 'Timestamp' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
|
||||
def test_union_non_object_dtype_raises():
|
||||
# GH#32646 raise NotImplementedError instead of less-informative error
|
||||
mi = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
|
||||
idx = mi.levels[1]
|
||||
|
||||
msg = "Can only union MultiIndex with MultiIndex or Index of tuples"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
mi.union(idx)
|
||||
|
||||
|
||||
def test_union_empty_self_different_names():
|
||||
# GH#38423
|
||||
mi = MultiIndex.from_arrays([[]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
result = mi.union(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_multiindex_empty_rangeindex():
|
||||
# GH#41234
|
||||
mi = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
ri = pd.RangeIndex(0)
|
||||
|
||||
result_left = mi.union(ri)
|
||||
tm.assert_index_equal(mi, result_left, check_names=False)
|
||||
|
||||
result_right = ri.union(mi)
|
||||
tm.assert_index_equal(mi, result_right, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_sort_validation(method):
|
||||
idx1 = MultiIndex.from_product([["a", "b"], [1, 2]])
|
||||
idx2 = MultiIndex.from_product([["b", "c"], [1, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=2)
|
||||
|
||||
# sort=True is supported as of GH#?
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 100])
|
||||
def test_difference_keep_ea_dtypes(any_numeric_ea_dtype, val):
|
||||
# GH#48606
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.difference(midx2)
|
||||
expected = MultiIndex.from_arrays([Series([1], dtype=any_numeric_ea_dtype), [2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = midx.difference(midx.sort_values(ascending=False))
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([], dtype=any_numeric_ea_dtype), Series([], dtype=np.int64)],
|
||||
names=["a", None],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 5])
|
||||
def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
|
||||
# GH#48607
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.symmetric_difference(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("tuples", "exp_tuples"),
|
||||
[
|
||||
([("val1", "test1")], [("val1", "test1")]),
|
||||
([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
|
||||
(
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
[("val2", "test2"), ("val1", "test1")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_intersect_with_duplicates(tuples, exp_tuples):
|
||||
# GH#36915
|
||||
left = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
right = MultiIndex.from_tuples(
|
||||
[("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = left.intersection(right)
|
||||
expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, names, expected",
|
||||
[
|
||||
((1,), None, [None, None]),
|
||||
((1,), ["a"], [None, None]),
|
||||
((1,), ["b"], [None, None]),
|
||||
((1, 2), ["c", "d"], [None, None]),
|
||||
((1, 2), ["b", "a"], [None, None]),
|
||||
((1, 2, 3), ["a", "b", "c"], [None, None]),
|
||||
((1, 2), ["a", "c"], ["a", None]),
|
||||
((1, 2), ["c", "b"], [None, "b"]),
|
||||
((1, 2), ["a", "b"], ["a", "b"]),
|
||||
((1, 2), [None, "b"], [None, "b"]),
|
||||
],
|
||||
)
|
||||
def test_maybe_match_names(data, names, expected):
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
mi2 = MultiIndex.from_tuples([data], names=names)
|
||||
result = mi._maybe_match_names(mi2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_intersection_equal_different_names():
|
||||
# GH#30302
|
||||
mi1 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"])
|
||||
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 2], [3, 4]], names=[None, "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_intersection_different_names():
|
||||
# GH#38323
|
||||
mi = MultiIndex.from_arrays([[1], [3]], names=["c", "b"])
|
||||
mi2 = MultiIndex.from_arrays([[1], [3]])
|
||||
result = mi.intersection(mi2)
|
||||
tm.assert_index_equal(result, mi2)
|
||||
|
||||
|
||||
def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
|
||||
# GH#38623
|
||||
mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]])
|
||||
mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]])
|
||||
result = mi1.intersection(mi2)
|
||||
expected = MultiIndex.from_arrays([[3, nulls_fixture], [1, 2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_with_missing_values_on_both_sides(nulls_fixture):
|
||||
# GH#38623
|
||||
mi1 = MultiIndex.from_arrays([[1, nulls_fixture]])
|
||||
mi2 = MultiIndex.from_arrays([[1, nulls_fixture, 3]])
|
||||
result = mi1.union(mi2)
|
||||
expected = MultiIndex.from_arrays([[1, 3, nulls_fixture]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
@pytest.mark.parametrize("sort", [None, False])
|
||||
def test_union_nan_got_duplicated(dtype, sort):
|
||||
# GH#38977, GH#49010
|
||||
mi1 = MultiIndex.from_arrays([pd.array([1.0, np.nan], dtype=dtype), [2, 3]])
|
||||
mi2 = MultiIndex.from_arrays([pd.array([1.0, np.nan, 3.0], dtype=dtype), [2, 3, 4]])
|
||||
result = mi1.union(mi2, sort=sort)
|
||||
if sort is None:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[pd.array([1.0, 3.0, np.nan], dtype=dtype), [2, 4, 3]]
|
||||
)
|
||||
else:
|
||||
expected = mi2
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [4, 1])
|
||||
def test_union_keep_ea_dtype(any_numeric_ea_dtype, val):
|
||||
# GH#48505
|
||||
|
||||
arr1 = Series([val, 2], dtype=any_numeric_ea_dtype)
|
||||
arr2 = Series([2, 1], dtype=any_numeric_ea_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [1, 2]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [2, 1]])
|
||||
result = midx.union(midx2)
|
||||
if val == 4:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 2, 4], dtype=any_numeric_ea_dtype), [1, 2, 1]]
|
||||
)
|
||||
else:
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [1, 2]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dupe_val", [3, pd.NA])
|
||||
def test_union_with_duplicates_keep_ea_dtype(dupe_val, any_numeric_ea_dtype):
|
||||
# GH48900
|
||||
mi1 = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
||||
Series([1, dupe_val, 2], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
mi2 = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
Series([2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
result = mi1.union(mi2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
Series([1, 2, dupe_val, dupe_val], dtype=any_numeric_ea_dtype),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_union_duplicates(index, request):
|
||||
# GH#38977
|
||||
if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
|
||||
pytest.skip(f"No duplicates in an empty {type(index).__name__}")
|
||||
|
||||
values = index.unique().values.tolist()
|
||||
mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
|
||||
mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
|
||||
result = mi2.union(mi1)
|
||||
expected = mi2.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if (
|
||||
is_unsigned_integer_dtype(mi2.levels[0])
|
||||
and (mi2.get_level_values(0) < 2**63).all()
|
||||
):
|
||||
# GH#47294 - union uses lib.fast_zip, converting data to Python integers
|
||||
# and loses type information. Result is then unsigned only when values are
|
||||
# sufficiently large to require unsigned dtype. This happens only if other
|
||||
# has dups or one of both have missing values
|
||||
expected = expected.set_levels(
|
||||
[expected.levels[0].astype(np.int64), expected.levels[1]]
|
||||
)
|
||||
elif is_float_dtype(mi2.levels[0]):
|
||||
# mi2 has duplicates witch is a different path than above, Fix that path
|
||||
# to use correct float dtype?
|
||||
expected = expected.set_levels(
|
||||
[expected.levels[0].astype(float), expected.levels[1]]
|
||||
)
|
||||
|
||||
result = mi1.union(mi2)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_keep_dtype_precision(any_real_numeric_dtype):
|
||||
# GH#48498
|
||||
arr1 = Series([4, 1, 1], dtype=any_real_numeric_dtype)
|
||||
arr2 = Series([1, 4], dtype=any_real_numeric_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [2, 1, 1]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [1, 2]], names=["a", None])
|
||||
|
||||
result = midx.union(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
([Series([1, 1, 4], dtype=any_real_numeric_dtype), [1, 1, 2]]),
|
||||
names=["a", None],
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_keep_ea_dtype_with_na(any_numeric_ea_dtype):
|
||||
# GH#48498
|
||||
arr1 = Series([4, pd.NA], dtype=any_numeric_ea_dtype)
|
||||
arr2 = Series([1, pd.NA], dtype=any_numeric_ea_dtype)
|
||||
midx = MultiIndex.from_arrays([arr1, [2, 1]], names=["a", None])
|
||||
midx2 = MultiIndex.from_arrays([arr2, [1, 2]])
|
||||
result = midx.union(midx2)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[Series([1, 4, pd.NA, pd.NA], dtype=any_numeric_ea_dtype), [1, 2, 1, 2]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"levels1, levels2, codes1, codes2, names",
|
||||
[
|
||||
(
|
||||
[["a", "b", "c"], [0, ""]],
|
||||
[["c", "d", "b"], [""]],
|
||||
[[0, 1, 2], [1, 1, 1]],
|
||||
[[0, 1, 2], [0, 0, 0]],
|
||||
["name1", "name2"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_intersection_lexsort_depth(levels1, levels2, codes1, codes2, names):
|
||||
# GH#25169
|
||||
mi1 = MultiIndex(levels=levels1, codes=codes1, names=names)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes2, names=names)
|
||||
mi_int = mi1.intersection(mi2)
|
||||
assert mi_int._lexsort_depth == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a",
|
||||
[pd.Categorical(["a", "b"], categories=["a", "b"]), ["a", "b"]],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"b",
|
||||
[
|
||||
pd.Categorical(["a", "b"], categories=["b", "a"], ordered=True),
|
||||
pd.Categorical(["a", "b"], categories=["b", "a"]),
|
||||
],
|
||||
)
|
||||
def test_intersection_with_non_lex_sorted_categories(a, b):
|
||||
# GH#49974
|
||||
other = ["1", "2"]
|
||||
|
||||
df1 = DataFrame({"x": a, "y": other})
|
||||
df2 = DataFrame({"x": b, "y": other})
|
||||
|
||||
expected = MultiIndex.from_arrays([a, other], names=["x", "y"])
|
||||
|
||||
res1 = MultiIndex.from_frame(df1).intersection(
|
||||
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
||||
)
|
||||
res2 = MultiIndex.from_frame(df1).intersection(MultiIndex.from_frame(df2))
|
||||
res3 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
||||
MultiIndex.from_frame(df2)
|
||||
)
|
||||
res4 = MultiIndex.from_frame(df1.sort_values(["x", "y"])).intersection(
|
||||
MultiIndex.from_frame(df2.sort_values(["x", "y"]))
|
||||
)
|
||||
|
||||
tm.assert_index_equal(res1, expected)
|
||||
tm.assert_index_equal(res2, expected)
|
||||
tm.assert_index_equal(res3, expected)
|
||||
tm.assert_index_equal(res4, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [pd.NA, 100])
|
||||
def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
|
||||
# GH#48604
|
||||
midx = MultiIndex.from_arrays(
|
||||
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
|
||||
)
|
||||
midx2 = MultiIndex.from_arrays(
|
||||
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
|
||||
)
|
||||
result = midx.intersection(midx2)
|
||||
expected = MultiIndex.from_arrays([Series([2], dtype=any_numeric_ea_dtype), [1]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_union_with_na_when_constructing_dataframe():
|
||||
# GH43222
|
||||
series1 = Series(
|
||||
(1,),
|
||||
index=MultiIndex.from_arrays(
|
||||
[Series([None], dtype="str"), Series([None], dtype="str")]
|
||||
),
|
||||
)
|
||||
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
|
||||
result = DataFrame([series1, series2])
|
||||
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,349 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
PerformanceWarning,
|
||||
UnsortedIndexError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.frozen import FrozenList
|
||||
|
||||
|
||||
def test_sortlevel(idx):
|
||||
tuples = list(idx)
|
||||
np.random.default_rng(2).shuffle(tuples)
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_not_sort_remaining():
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
sorted_idx, _ = mi.sortlevel("A", sort_remaining=False)
|
||||
assert sorted_idx.equals(mi)
|
||||
|
||||
|
||||
def test_sortlevel_deterministic():
|
||||
tuples = [
|
||||
("bar", "one"),
|
||||
("foo", "two"),
|
||||
("qux", "two"),
|
||||
("foo", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0)
|
||||
expected = MultiIndex.from_tuples(sorted(tuples))
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1)
|
||||
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||||
expected = MultiIndex.from_tuples(by1)
|
||||
assert sorted_idx.equals(expected)
|
||||
|
||||
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||||
assert sorted_idx.equals(expected[::-1])
|
||||
|
||||
|
||||
def test_sortlevel_na_position():
|
||||
# GH#51612
|
||||
midx = MultiIndex.from_tuples([(1, np.nan), (1, 1)])
|
||||
result = midx.sortlevel(level=[0, 1], na_position="last")[0]
|
||||
expected = MultiIndex.from_tuples([(1, 1), (1, np.nan)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_argsort(idx):
|
||||
result = np.argsort(idx)
|
||||
expected = idx.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(type(idx), (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, axis=1)
|
||||
|
||||
msg = "the 'kind' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, kind="mergesort")
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(idx, order=("a", "b"))
|
||||
|
||||
|
||||
def test_unsortedindex():
|
||||
# GH 11897
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
df = DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"])
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc(axis=0)["z", "a"]
|
||||
expected = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc(axis=0)["z", slice("a")]
|
||||
df.sort_index(inplace=True)
|
||||
assert len(df.loc(axis=0)["z", :]) == 2
|
||||
|
||||
with pytest.raises(KeyError, match="'q'"):
|
||||
df.loc(axis=0)["q", :]
|
||||
|
||||
|
||||
def test_unsortedindex_doc_examples():
|
||||
# https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex
|
||||
dfm = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.default_rng(2).random(4),
|
||||
}
|
||||
)
|
||||
|
||||
dfm = dfm.set_index(["jim", "joe"])
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
dfm.loc[(1, "z")]
|
||||
|
||||
msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)"
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert not dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 1
|
||||
|
||||
# sort it
|
||||
dfm = dfm.sort_index()
|
||||
dfm.loc[(1, "z")]
|
||||
dfm.loc[(0, "y"):(1, "z")]
|
||||
|
||||
assert dfm.index._is_lexsorted()
|
||||
assert dfm.index._lexsort_depth == 2
|
||||
|
||||
|
||||
def test_reconstruct_sort():
|
||||
# starts off lexsorted & monotonic
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert recons.is_monotonic_increasing
|
||||
assert mi is recons
|
||||
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")],
|
||||
names=["one", "two"],
|
||||
)
|
||||
assert not mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic_increasing
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
# cannot convert to lexsorted
|
||||
mi = MultiIndex(
|
||||
levels=[["b", "d", "a"], [1, 2, 3]],
|
||||
codes=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||||
names=["col1", "col2"],
|
||||
)
|
||||
assert not mi.is_monotonic_increasing
|
||||
recons = mi._sort_levels_monotonic()
|
||||
assert not recons.is_monotonic_increasing
|
||||
assert mi.equals(recons)
|
||||
assert Index(mi.values).equals(Index(recons.values))
|
||||
|
||||
|
||||
def test_reconstruct_remove_unused():
|
||||
# xref to GH 2770
|
||||
df = DataFrame(
|
||||
[["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]],
|
||||
columns=["first", "second", "third"],
|
||||
)
|
||||
df2 = df.set_index(["first", "second"], drop=False)
|
||||
df2 = df2[df2["first"] != "deleteMe"]
|
||||
|
||||
# removed levels are there
|
||||
expected = MultiIndex(
|
||||
levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]],
|
||||
codes=[[1, 2], [1, 2]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[["keepMe", "keepMeToo"], [2, 3]],
|
||||
codes=[[0, 1], [0, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
result = df2.index.remove_unused_levels()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# idempotent
|
||||
result2 = result.remove_unused_levels()
|
||||
tm.assert_index_equal(result2, expected)
|
||||
assert result2.is_(result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")]
|
||||
)
|
||||
def test_remove_unused_levels_large(first_type, second_type):
|
||||
# GH16556
|
||||
|
||||
# because tests should be deterministic (and this test in particular
|
||||
# checks that levels are removed, which is not the case for every
|
||||
# random input):
|
||||
rng = np.random.default_rng(10) # seed is arbitrary value that works
|
||||
|
||||
size = 1 << 16
|
||||
df = DataFrame(
|
||||
{
|
||||
"first": rng.integers(0, 1 << 13, size).astype(first_type),
|
||||
"second": rng.integers(0, 1 << 10, size).astype(second_type),
|
||||
"third": rng.random(size),
|
||||
}
|
||||
)
|
||||
df = df.groupby(["first", "second"]).sum()
|
||||
df = df[df.third < 0.1]
|
||||
|
||||
result = df.index.remove_unused_levels()
|
||||
assert len(result.levels[0]) < len(df.index.levels[0])
|
||||
assert len(result.levels[1]) < len(df.index.levels[1])
|
||||
assert result.equals(df.index)
|
||||
|
||||
expected = df.reset_index().set_index(["first", "second"]).index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]])
|
||||
@pytest.mark.parametrize(
|
||||
"level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]]
|
||||
)
|
||||
def test_remove_unused_nan(level0, level1):
|
||||
# GH 18417
|
||||
mi = MultiIndex(levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
||||
|
||||
result = mi.remove_unused_levels()
|
||||
tm.assert_index_equal(result, mi)
|
||||
for level in 0, 1:
|
||||
assert "unused" not in result.levels[level]
|
||||
|
||||
|
||||
def test_argsort(idx):
|
||||
result = idx.argsort()
|
||||
expected = idx.values.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_remove_unused_levels_with_nan():
|
||||
# GH 37510
|
||||
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
|
||||
idx = idx.set_levels(["a", np.nan], level="id1")
|
||||
idx = idx.remove_unused_levels()
|
||||
result = idx.levels
|
||||
expected = FrozenList([["a", np.nan], [4]])
|
||||
assert str(result) == str(expected)
|
||||
|
||||
|
||||
def test_sort_values_nan():
|
||||
# GH48495, GH48626
|
||||
midx = MultiIndex(levels=[["A", "B", "C"], ["D"]], codes=[[1, 0, 2], [-1, -1, 0]])
|
||||
result = midx.sort_values()
|
||||
expected = MultiIndex(
|
||||
levels=[["A", "B", "C"], ["D"]], codes=[[0, 1, 2], [-1, -1, 0]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_sort_values_incomparable():
|
||||
# GH48495
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, Timestamp("2000-01-01")],
|
||||
[3, 4],
|
||||
]
|
||||
)
|
||||
match = "'<' not supported between instances of 'Timestamp' and 'int'"
|
||||
with pytest.raises(TypeError, match=match):
|
||||
mi.sort_values()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_position", ["first", "last"])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Int64", "Float64"])
|
||||
def test_sort_values_with_na_na_position(dtype, na_position):
|
||||
# 51612
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([1, None, 3], dtype=dtype),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.sort_values(na_position=na_position)
|
||||
if na_position == "first":
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([None, 1, 3], dtype=dtype),
|
||||
]
|
||||
else:
|
||||
arrays = [
|
||||
Series([1, 1, 2], dtype=dtype),
|
||||
Series([1, None, 3], dtype=dtype),
|
||||
]
|
||||
expected = MultiIndex.from_arrays(arrays)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_sort_unnecessary_warning():
|
||||
# GH#55386
|
||||
midx = MultiIndex.from_tuples([(1.5, 2), (3.5, 3), (0, 1)])
|
||||
midx = midx.set_levels([2.5, np.nan, 1], level=0)
|
||||
result = midx.sort_values()
|
||||
expected = MultiIndex.from_tuples([(1, 3), (2.5, 1), (np.nan, 2)])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take(idx):
|
||||
indexer = [4, 3, 0, 2]
|
||||
result = idx.take(indexer)
|
||||
expected = idx[indexer]
|
||||
assert result.equals(expected)
|
||||
|
||||
# GH 10791
|
||||
msg = "'MultiIndex' object has no attribute 'freq'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.freq
|
||||
|
||||
|
||||
def test_take_invalid_kwargs(idx):
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_take_fill_value():
|
||||
# GH 12631
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = pd.MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
(np.nan, pd.NaT),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
exp_vals = [
|
||||
("A", pd.Timestamp("2011-01-02")),
|
||||
("A", pd.Timestamp("2011-01-01")),
|
||||
("B", pd.Timestamp("2011-01-02")),
|
||||
]
|
||||
expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for( axis 0 with)? size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
Reference in New Issue
Block a user