done
This commit is contained in:
@ -0,0 +1,426 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray
|
||||
will never be held in an Index.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import NumpyEADtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_object_dtype
|
||||
from pandas.core.arrays.numpy_ import NumpyExtensionArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
orig_assert_attr_equal = tm.assert_attr_equal
|
||||
|
||||
|
||||
def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
|
||||
"""
|
||||
patch tm.assert_attr_equal so NumpyEADtype("object") is closed enough to
|
||||
np.dtype("object")
|
||||
"""
|
||||
if attr == "dtype":
|
||||
lattr = getattr(left, "dtype", None)
|
||||
rattr = getattr(right, "dtype", None)
|
||||
if isinstance(lattr, NumpyEADtype) and not isinstance(rattr, NumpyEADtype):
|
||||
left = left.astype(lattr.numpy_dtype)
|
||||
elif isinstance(rattr, NumpyEADtype) and not isinstance(lattr, NumpyEADtype):
|
||||
right = right.astype(rattr.numpy_dtype)
|
||||
|
||||
orig_assert_attr_equal(attr, left, right, obj)
|
||||
|
||||
|
||||
@pytest.fixture(params=["float", "object"])
|
||||
def dtype(request):
|
||||
return NumpyEADtype(np.dtype(request.param))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def allow_in_pandas(monkeypatch):
|
||||
"""
|
||||
A monkeypatch to tells pandas to let us in.
|
||||
|
||||
By default, passing a NumpyExtensionArray to an index / series / frame
|
||||
constructor will unbox that NumpyExtensionArray to an ndarray, and treat
|
||||
it as a non-EA column. We don't want people using EAs without
|
||||
reason.
|
||||
|
||||
The mechanism for this is a check against ABCNumpyExtensionArray
|
||||
in each constructor.
|
||||
|
||||
But, for testing, we need to allow them in pandas. So we patch
|
||||
the _typ of NumpyExtensionArray, so that we evade the ABCNumpyExtensionArray
|
||||
check.
|
||||
"""
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(NumpyExtensionArray, "_typ", "extension")
|
||||
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(allow_in_pandas, dtype):
|
||||
if dtype.numpy_dtype == "object":
|
||||
return pd.Series([(i,) for i in range(100)]).array
|
||||
return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(allow_in_pandas, dtype):
|
||||
if dtype.numpy_dtype == "object":
|
||||
return NumpyExtensionArray(np.array([np.nan, (1,)], dtype=object))
|
||||
return NumpyExtensionArray(np.array([np.nan, 1.0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return np.isnan(a) and np.isnan(b)
|
||||
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
# Use an empty tuple for first element, then remove,
|
||||
# to disable np.array's shape inference.
|
||||
return NumpyExtensionArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
|
||||
return NumpyExtensionArray(np.array([1, 2, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
return NumpyExtensionArray(np.array([(1,), np.nan, (0,)], dtype=object))
|
||||
return NumpyExtensionArray(np.array([1, np.nan, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(allow_in_pandas, dtype):
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
a, b, c = (1,), (2,), (3,)
|
||||
else:
|
||||
a, b, c = np.arange(3)
|
||||
return NumpyExtensionArray(
|
||||
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
if dtype.kind == "O":
|
||||
pytest.skip(f"{dtype} is not a numeric dtype")
|
||||
arr = np.ones(100) * 2
|
||||
return NumpyExtensionArray._from_sequence(arr, dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def skip_numpy_object(dtype, request):
|
||||
"""
|
||||
Tests for NumpyExtensionArray with nested data. Users typically won't create
|
||||
these objects via `pd.array`, but they can show up through `.array`
|
||||
on a Series with nested data. Many of the base tests fail, as they aren't
|
||||
appropriate for nested data.
|
||||
|
||||
This fixture allows these tests to be skipped when used as a usefixtures
|
||||
marker to either an individual test or a test class.
|
||||
"""
|
||||
if dtype == "object":
|
||||
mark = pytest.mark.xfail(reason="Fails for object dtype")
|
||||
request.applymarker(mark)
|
||||
|
||||
|
||||
skip_nested = pytest.mark.usefixtures("skip_numpy_object")
|
||||
|
||||
|
||||
class TestNumpyExtensionArray(base.ExtensionTests):
|
||||
@pytest.mark.skip(reason="We don't register our dtype")
|
||||
# We don't want to register. This test should probably be split in two.
|
||||
def test_from_dtype(self, data):
|
||||
pass
|
||||
|
||||
@skip_nested
|
||||
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||||
# ValueError: Length of passed values is 1, index implies 3.
|
||||
super().test_series_constructor_scalar_with_index(data, dtype)
|
||||
|
||||
def test_check_dtype(self, data, request, using_infer_string):
|
||||
if data.dtype.numpy_dtype == "object":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"NumpyExtensionArray expectedly clashes with a "
|
||||
f"NumPy name: {data.dtype.numpy_dtype}"
|
||||
)
|
||||
)
|
||||
super().test_check_dtype(data)
|
||||
|
||||
def test_is_not_object_type(self, dtype, request):
|
||||
if dtype.numpy_dtype == "object":
|
||||
# Different from BaseDtypeTests.test_is_not_object_type
|
||||
# because NumpyEADtype(object) is an object type
|
||||
assert is_object_dtype(dtype)
|
||||
else:
|
||||
super().test_is_not_object_type(dtype)
|
||||
|
||||
@skip_nested
|
||||
def test_getitem_scalar(self, data):
|
||||
# AssertionError
|
||||
super().test_getitem_scalar(data)
|
||||
|
||||
@skip_nested
|
||||
def test_shift_fill_value(self, data):
|
||||
# np.array shape inference. Shift implementation fails.
|
||||
super().test_shift_fill_value(data)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_frame(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
# TODO: NumpyExtensionArray.searchsorted calls ndarray.searchsorted which
|
||||
# isn't quite what we want in nested data cases. Instead we need to
|
||||
# adapt something like libindex._bin_search.
|
||||
super().test_searchsorted(data_for_sorting, as_series)
|
||||
|
||||
@pytest.mark.xfail(reason="NumpyExtensionArray.diff may fail on dtype")
|
||||
def test_diff(self, data, periods):
|
||||
return super().test_diff(data, periods)
|
||||
|
||||
def test_insert(self, data, request):
|
||||
if data.dtype.numpy_dtype == object:
|
||||
mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
|
||||
request.applymarker(mark)
|
||||
|
||||
super().test_insert(data)
|
||||
|
||||
@skip_nested
|
||||
def test_insert_invalid(self, data, invalid_scalar):
|
||||
# NumpyExtensionArray[object] can hold anything, so skip
|
||||
super().test_insert_invalid(data, invalid_scalar)
|
||||
|
||||
divmod_exc = None
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def test_divmod(self, data):
|
||||
divmod_exc = None
|
||||
if data.dtype.kind == "O":
|
||||
divmod_exc = TypeError
|
||||
self.divmod_exc = divmod_exc
|
||||
super().test_divmod(data)
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
exc = None
|
||||
if data.dtype.kind == "O":
|
||||
exc = TypeError
|
||||
self.divmod_exc = exc
|
||||
self._check_divmod_op(ser, divmod, data)
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
opname = all_arithmetic_operators
|
||||
series_scalar_exc = None
|
||||
if data.dtype.numpy_dtype == object:
|
||||
if opname in ["__mul__", "__rmul__"]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="the Series.combine step raises but not the Series method."
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
series_scalar_exc = TypeError
|
||||
self.series_scalar_exc = series_scalar_exc
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
opname = all_arithmetic_operators
|
||||
series_array_exc = None
|
||||
if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
|
||||
series_array_exc = TypeError
|
||||
self.series_array_exc = series_array_exc
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
opname = all_arithmetic_operators
|
||||
frame_scalar_exc = None
|
||||
if data.dtype.numpy_dtype == object:
|
||||
if opname in ["__mul__", "__rmul__"]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="the Series.combine step raises but not the Series method."
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
frame_scalar_exc = TypeError
|
||||
self.frame_scalar_exc = frame_scalar_exc
|
||||
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if ser.dtype.kind == "O":
|
||||
return op_name in ["sum", "min", "max", "any", "all"]
|
||||
return True
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
res_op = getattr(ser, op_name)
|
||||
# avoid coercing int -> float. Just cast to the actual numpy type.
|
||||
# error: Item "ExtensionDtype" of "dtype[Any] | ExtensionDtype" has
|
||||
# no attribute "numpy_dtype"
|
||||
cmp_dtype = ser.dtype.numpy_dtype # type: ignore[union-attr]
|
||||
alt = ser.astype(cmp_dtype)
|
||||
exp_op = getattr(alt, op_name)
|
||||
if op_name == "count":
|
||||
result = res_op()
|
||||
expected = exp_op()
|
||||
else:
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
@pytest.mark.skip("TODO: tests not written yet")
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
|
||||
pass
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_series(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_frame(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_invalid(self, data, invalid_scalar):
|
||||
# object dtype can hold anything, so doesn't raise
|
||||
super().test_setitem_invalid(data, invalid_scalar)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_sequence_broadcasts(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_mask_broadcast(data, setter)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
# Failed: DID NOT RAISE <class 'ValueError'>
|
||||
super().test_setitem_scalar_key_sequence_raise(data)
|
||||
|
||||
# TODO: there is some issue with NumpyExtensionArray, therefore,
|
||||
# skip the setitem test for now, and fix it later (GH 31446)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
np.array([True, True, True, False, False]),
|
||||
pd.array([True, True, True, False, False], dtype="boolean"),
|
||||
],
|
||||
ids=["numpy-array", "boolean-array"],
|
||||
)
|
||||
def test_setitem_mask(self, data, mask, box_in_series):
|
||||
super().test_setitem_mask(data, mask, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_setitem_integer_array(self, data, idx, box_in_series):
|
||||
super().test_setitem_integer_array(data, idx, box_in_series)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, box_in_series",
|
||||
[
|
||||
([0, 1, 2, pd.NA], False),
|
||||
pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
],
|
||||
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||||
)
|
||||
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||||
super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_slice(self, data, box_in_series):
|
||||
super().test_setitem_slice(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_loc_iloc_slice(self, data):
|
||||
super().test_setitem_loc_iloc_slice(data)
|
||||
|
||||
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
df = expected = pd.DataFrame({"data": pd.Series(data)})
|
||||
result = pd.DataFrame(index=df.index)
|
||||
|
||||
# because result has object dtype, the attempt to do setting inplace
|
||||
# is successful, and object dtype is retained
|
||||
key = full_indexer(df)
|
||||
result.loc[key, "data"] = df["data"]
|
||||
|
||||
# base class method has expected = df; NumpyExtensionArray behaves oddly because
|
||||
# we patch _typ for these tests.
|
||||
if data.dtype.numpy_dtype != object:
|
||||
if not isinstance(key, slice) or key != slice(None):
|
||||
expected = pd.DataFrame({"data": data.to_numpy()})
|
||||
tm.assert_frame_equal(result, expected, check_column_type=False)
|
||||
|
||||
@pytest.mark.xfail(reason="NumpyEADtype is unpacked")
|
||||
def test_index_from_listlike_with_dtype(self, data):
|
||||
super().test_index_from_listlike_with_dtype(data)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
super().test_EA_types(engine, data, request)
|
||||
|
||||
|
||||
class Test2DCompat(base.NDArrayBacked2DTests):
|
||||
pass
|
Reference in New Issue
Block a user