done
This commit is contained in:
@ -0,0 +1,6 @@
|
||||
from pandas.tests.extension.array_with_attr.array import (
|
||||
FloatAttrArray,
|
||||
FloatAttrDtype,
|
||||
)
|
||||
|
||||
__all__ = ["FloatAttrArray", "FloatAttrDtype"]
|
@ -0,0 +1,89 @@
|
||||
"""
|
||||
Test extension array that has custom attribute information (not stored on the dtype).
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import numbers
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import type_t
|
||||
|
||||
|
||||
class FloatAttrDtype(ExtensionDtype):
|
||||
type = float
|
||||
name = "float_attr"
|
||||
na_value = np.nan
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls) -> type_t[FloatAttrArray]:
|
||||
"""
|
||||
Return the array type associated with this dtype.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return FloatAttrArray
|
||||
|
||||
|
||||
class FloatAttrArray(ExtensionArray):
|
||||
dtype = FloatAttrDtype()
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, attr=None) -> None:
|
||||
if not isinstance(values, np.ndarray):
|
||||
raise TypeError("Need to pass a numpy array of float64 dtype as values")
|
||||
if not values.dtype == "float64":
|
||||
raise TypeError("Need to pass a numpy array of float64 dtype as values")
|
||||
self.data = values
|
||||
self.attr = attr
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
if not copy:
|
||||
data = np.asarray(scalars, dtype="float64")
|
||||
else:
|
||||
data = np.array(scalars, dtype="float64", copy=copy)
|
||||
return cls(data)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self.data[item]
|
||||
else:
|
||||
# slice, list-like, mask
|
||||
item = pd.api.indexers.check_array_indexer(self, item)
|
||||
return type(self)(self.data[item], self.attr)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.data)
|
||||
|
||||
def isna(self):
|
||||
return np.isnan(self.data)
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
from pandas.api.extensions import take
|
||||
|
||||
data = self.data
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
|
||||
return type(self)(result, self.attr)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self.data.copy(), self.attr)
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = np.concatenate([x.data for x in to_concat])
|
||||
attr = to_concat[0].attr if len(to_concat) else None
|
||||
return cls(data, attr)
|
@ -0,0 +1,33 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.extension.array_with_attr import FloatAttrArray
|
||||
|
||||
|
||||
def test_concat_with_all_na():
|
||||
# https://github.com/pandas-dev/pandas/pull/47762
|
||||
# ensure that attribute of the column array is preserved (when it gets
|
||||
# preserved in reindexing the array) during merge/concat
|
||||
arr = FloatAttrArray(np.array([np.nan, np.nan], dtype="float64"), attr="test")
|
||||
|
||||
df1 = pd.DataFrame({"col": arr, "key": [0, 1]})
|
||||
df2 = pd.DataFrame({"key": [0, 1], "col2": [1, 2]})
|
||||
result = pd.merge(df1, df2, on="key")
|
||||
expected = pd.DataFrame({"col": arr, "key": [0, 1], "col2": [1, 2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result["col"].array.attr == "test"
|
||||
|
||||
df1 = pd.DataFrame({"col": arr, "key": [0, 1]})
|
||||
df2 = pd.DataFrame({"key": [0, 2], "col2": [1, 2]})
|
||||
result = pd.merge(df1, df2, on="key")
|
||||
expected = pd.DataFrame({"col": arr.take([0]), "key": [0], "col2": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result["col"].array.attr == "test"
|
||||
|
||||
result = pd.concat([df1.set_index("key"), df2.set_index("key")], axis=1)
|
||||
expected = pd.DataFrame(
|
||||
{"col": arr.take([0, 1, -1]), "col2": [1, np.nan, 2], "key": [0, 1, 2]}
|
||||
).set_index("key")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result["col"].array.attr == "test"
|
@ -0,0 +1,131 @@
|
||||
"""
|
||||
Base test suite for extension arrays.
|
||||
|
||||
These tests are intended for third-party libraries to subclass to validate
|
||||
that their extension arrays and dtypes satisfy the interface. Moving or
|
||||
renaming the tests should not be done lightly.
|
||||
|
||||
Libraries are expected to implement a few pytest fixtures to provide data
|
||||
for the tests. The fixtures may be located in either
|
||||
|
||||
* The same module as your test class.
|
||||
* A ``conftest.py`` in the same directory as your test class.
|
||||
|
||||
The full list of fixtures may be found in the ``conftest.py`` next to this
|
||||
file.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import pytest
|
||||
from pandas.tests.extension.base import BaseDtypeTests
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return MyDtype()
|
||||
|
||||
|
||||
class TestMyDtype(BaseDtypeTests):
|
||||
pass
|
||||
|
||||
|
||||
Your class ``TestDtype`` will inherit all the tests defined on
|
||||
``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype``
|
||||
wherever the test requires it. You're free to implement additional tests.
|
||||
|
||||
"""
|
||||
from pandas.tests.extension.base.accumulate import BaseAccumulateTests
|
||||
from pandas.tests.extension.base.casting import BaseCastingTests
|
||||
from pandas.tests.extension.base.constructors import BaseConstructorsTests
|
||||
from pandas.tests.extension.base.dim2 import ( # noqa: F401
|
||||
Dim2CompatTests,
|
||||
NDArrayBacked2DTests,
|
||||
)
|
||||
from pandas.tests.extension.base.dtype import BaseDtypeTests
|
||||
from pandas.tests.extension.base.getitem import BaseGetitemTests
|
||||
from pandas.tests.extension.base.groupby import BaseGroupbyTests
|
||||
from pandas.tests.extension.base.index import BaseIndexTests
|
||||
from pandas.tests.extension.base.interface import BaseInterfaceTests
|
||||
from pandas.tests.extension.base.io import BaseParsingTests
|
||||
from pandas.tests.extension.base.methods import BaseMethodsTests
|
||||
from pandas.tests.extension.base.missing import BaseMissingTests
|
||||
from pandas.tests.extension.base.ops import ( # noqa: F401
|
||||
BaseArithmeticOpsTests,
|
||||
BaseComparisonOpsTests,
|
||||
BaseOpsUtil,
|
||||
BaseUnaryOpsTests,
|
||||
)
|
||||
from pandas.tests.extension.base.printing import BasePrintingTests
|
||||
from pandas.tests.extension.base.reduce import BaseReduceTests
|
||||
from pandas.tests.extension.base.reshaping import BaseReshapingTests
|
||||
from pandas.tests.extension.base.setitem import BaseSetitemTests
|
||||
|
||||
|
||||
# One test class that you can inherit as an alternative to inheriting all the
|
||||
# test classes above.
|
||||
# Note 1) this excludes Dim2CompatTests and NDArrayBacked2DTests.
|
||||
# Note 2) this uses BaseReduceTests and and _not_ BaseBooleanReduceTests,
|
||||
# BaseNoReduceTests, or BaseNumericReduceTests
|
||||
class ExtensionTests(
|
||||
BaseAccumulateTests,
|
||||
BaseCastingTests,
|
||||
BaseConstructorsTests,
|
||||
BaseDtypeTests,
|
||||
BaseGetitemTests,
|
||||
BaseGroupbyTests,
|
||||
BaseIndexTests,
|
||||
BaseInterfaceTests,
|
||||
BaseParsingTests,
|
||||
BaseMethodsTests,
|
||||
BaseMissingTests,
|
||||
BaseArithmeticOpsTests,
|
||||
BaseComparisonOpsTests,
|
||||
BaseUnaryOpsTests,
|
||||
BasePrintingTests,
|
||||
BaseReduceTests,
|
||||
BaseReshapingTests,
|
||||
BaseSetitemTests,
|
||||
Dim2CompatTests,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
import warnings
|
||||
|
||||
if name == "BaseNoReduceTests":
|
||||
warnings.warn(
|
||||
"BaseNoReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseNoReduceTests
|
||||
|
||||
return BaseNoReduceTests
|
||||
|
||||
elif name == "BaseNumericReduceTests":
|
||||
warnings.warn(
|
||||
"BaseNumericReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseNumericReduceTests
|
||||
|
||||
return BaseNumericReduceTests
|
||||
|
||||
elif name == "BaseBooleanReduceTests":
|
||||
warnings.warn(
|
||||
"BaseBooleanReduceTests is deprecated and will be removed in a "
|
||||
"future version. Use BaseReduceTests and override "
|
||||
"`_supports_reduction` instead.",
|
||||
FutureWarning,
|
||||
)
|
||||
from pandas.tests.extension.base.reduce import BaseBooleanReduceTests
|
||||
|
||||
return BaseBooleanReduceTests
|
||||
|
||||
raise AttributeError(
|
||||
f"module 'pandas.tests.extension.base' has no attribute '{name}'"
|
||||
)
|
@ -0,0 +1,40 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseAccumulateTests:
|
||||
"""
|
||||
Accumulation specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
|
||||
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
|
||||
# Do we expect this accumulation to be supported for this dtype?
|
||||
# We default to assuming "no"; subclass authors should override here.
|
||||
return False
|
||||
|
||||
def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
try:
|
||||
alt = ser.astype("float64")
|
||||
except (TypeError, ValueError):
|
||||
# e.g. Period can't be cast to float64 (TypeError)
|
||||
# String can't be cast to float64 (ValueError)
|
||||
alt = ser.astype(object)
|
||||
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = getattr(alt, op_name)(skipna=skipna)
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_accumulate_series(self, data, all_numeric_accumulations, skipna):
|
||||
op_name = all_numeric_accumulations
|
||||
ser = pd.Series(data)
|
||||
|
||||
if self._supports_accumulation(ser, op_name):
|
||||
self.check_accumulate(ser, op_name, skipna)
|
||||
else:
|
||||
with pytest.raises((NotImplementedError, TypeError)):
|
||||
# TODO: require TypeError for things that will _never_ work?
|
||||
getattr(ser, op_name)(skipna=skipna)
|
@ -0,0 +1,2 @@
|
||||
class BaseExtensionTests:
|
||||
pass
|
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.internals.blocks import NumpyBlock
|
||||
|
||||
|
||||
class BaseCastingTests:
|
||||
"""Casting to and from ExtensionDtypes"""
|
||||
|
||||
def test_astype_object_series(self, all_data):
|
||||
ser = pd.Series(all_data, name="A")
|
||||
result = ser.astype(object)
|
||||
assert result.dtype == np.dtype(object)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
blk = result._mgr.blocks[0]
|
||||
assert isinstance(blk, NumpyBlock)
|
||||
assert blk.is_object
|
||||
assert isinstance(result._mgr.array, np.ndarray)
|
||||
assert result._mgr.array.dtype == np.dtype(object)
|
||||
|
||||
def test_astype_object_frame(self, all_data):
|
||||
df = pd.DataFrame({"A": all_data})
|
||||
|
||||
result = df.astype(object)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
blk = result._mgr.blocks[0]
|
||||
assert isinstance(blk, NumpyBlock), type(blk)
|
||||
assert blk.is_object
|
||||
assert isinstance(result._mgr.arrays[0], np.ndarray)
|
||||
assert result._mgr.arrays[0].dtype == np.dtype(object)
|
||||
|
||||
# check that we can compare the dtypes
|
||||
comp = result.dtypes == df.dtypes
|
||||
assert not comp.any()
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = pd.Series(data).tolist()
|
||||
expected = list(data)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str(self, data):
|
||||
result = pd.Series(data[:2]).astype(str)
|
||||
expected = pd.Series([str(x) for x in data[:2]], dtype=str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"nullable_string_dtype",
|
||||
[
|
||||
"string[python]",
|
||||
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||||
],
|
||||
)
|
||||
def test_astype_string(self, data, nullable_string_dtype):
|
||||
# GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj)
|
||||
result = pd.Series(data[:5]).astype(nullable_string_dtype)
|
||||
expected = pd.Series(
|
||||
[str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]],
|
||||
dtype=nullable_string_dtype,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_to_numpy(self, data):
|
||||
expected = np.asarray(data)
|
||||
|
||||
result = data.to_numpy()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = pd.Series(data).to_numpy()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_astype_empty_dataframe(self, dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/33113
|
||||
df = pd.DataFrame()
|
||||
result = df.astype(dtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False])
|
||||
def test_astype_own_type(self, data, copy):
|
||||
# ensure that astype returns the original object for equal dtype and copy=False
|
||||
# https://github.com/pandas-dev/pandas/issues/28488
|
||||
result = data.astype(data.dtype, copy=copy)
|
||||
assert (result is data) is (not copy)
|
||||
tm.assert_extension_array_equal(result, data)
|
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.extensions import ExtensionArray
|
||||
from pandas.core.internals.blocks import EABackedBlock
|
||||
|
||||
|
||||
class BaseConstructorsTests:
|
||||
def test_from_sequence_from_cls(self, data):
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
data = data[:0]
|
||||
result = type(data)._from_sequence(data, dtype=data.dtype)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_array_from_scalars(self, data):
|
||||
scalars = [data[0], data[1], data[2]]
|
||||
result = data._from_sequence(scalars, dtype=data.dtype)
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
result = pd.Series(data, copy=False)
|
||||
assert result.dtype == data.dtype
|
||||
assert len(result) == len(data)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert result._mgr.array is data
|
||||
|
||||
# Series[EA] is unboxed / boxed correctly
|
||||
result2 = pd.Series(result)
|
||||
assert result2.dtype == data.dtype
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result2._mgr.blocks[0], EABackedBlock)
|
||||
|
||||
def test_series_constructor_no_data_with_index(self, dtype, na_value):
|
||||
result = pd.Series(index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 33559 - empty index
|
||||
result = pd.Series(index=[], dtype=dtype)
|
||||
expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
|
||||
result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||||
scalar = data[0]
|
||||
result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype)
|
||||
expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(scalar, index=["foo"], dtype=dtype)
|
||||
expected = pd.Series([scalar], index=["foo"], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("from_series", [True, False])
|
||||
def test_dataframe_constructor_from_dict(self, data, from_series):
|
||||
if from_series:
|
||||
data = pd.Series(data)
|
||||
result = pd.DataFrame({"A": data})
|
||||
assert result.dtypes["A"] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
def test_dataframe_from_series(self, data):
|
||||
result = pd.DataFrame(pd.Series(data))
|
||||
assert result.dtypes[0] == data.dtype
|
||||
assert result.shape == (len(data), 1)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
def test_series_given_mismatched_index_raises(self, data):
|
||||
msg = r"Length of values \(3\) does not match length of index \(5\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(data[:3], index=[0, 1, 2, 3, 4])
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(list(data), dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(list(data), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# gh-30280
|
||||
|
||||
expected = pd.DataFrame(data).astype(dtype)
|
||||
result = pd.DataFrame(list(data), dtype=dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.DataFrame(list(data), dtype=str(dtype))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_pandas_array(self, data):
|
||||
# pd.array(extension_array) should be idempotent...
|
||||
result = pd.array(data)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_pandas_array_dtype(self, data):
|
||||
# ... but specifying dtype will override idempotency
|
||||
result = pd.array(data, dtype=np.dtype(object))
|
||||
expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_construct_empty_dataframe(self, dtype):
|
||||
# GH 33623
|
||||
result = pd.DataFrame(columns=["a"], dtype=dtype)
|
||||
expected = pd.DataFrame(
|
||||
{"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_empty(self, dtype):
|
||||
cls = dtype.construct_array_type()
|
||||
result = cls._empty((4,), dtype=dtype)
|
||||
assert isinstance(result, cls)
|
||||
assert result.dtype == dtype
|
||||
assert result.shape == (4,)
|
||||
|
||||
# GH#19600 method on ExtensionDtype
|
||||
result2 = dtype.empty((4,))
|
||||
assert isinstance(result2, cls)
|
||||
assert result2.dtype == dtype
|
||||
assert result2.shape == (4,)
|
||||
|
||||
result2 = dtype.empty(4)
|
||||
assert isinstance(result2, cls)
|
||||
assert result2.dtype == dtype
|
||||
assert result2.shape == (4,)
|
345
lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py
Normal file
345
lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py
Normal file
@ -0,0 +1,345 @@
|
||||
"""
|
||||
Tests for 2D compatibility.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.missing import is_matching_na
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
|
||||
|
||||
|
||||
class Dim2CompatTests:
|
||||
# Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
|
||||
# i.e. not for pyarrow-backed EAs.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_doesnt_support_2d(self, dtype, request):
|
||||
if not dtype._supports_2d:
|
||||
node = request.node
|
||||
# In cases where we are mixed in to ExtensionTests, we only want to
|
||||
# skip tests that are defined in Dim2CompatTests
|
||||
test_func = node._obj
|
||||
if test_func.__qualname__.startswith("Dim2CompatTests"):
|
||||
# TODO: is there a less hacky way of checking this?
|
||||
pytest.skip(f"{dtype} does not support 2D.")
|
||||
|
||||
def test_transpose(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
shape = arr2d.shape
|
||||
assert shape[0] != shape[-1] # otherwise the rest of the test is useless
|
||||
|
||||
assert arr2d.T.shape == shape[::-1]
|
||||
|
||||
def test_frame_from_2d_array(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
|
||||
df = pd.DataFrame(arr2d)
|
||||
expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_swapaxes(self, data):
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
|
||||
result = arr2d.swapaxes(0, 1)
|
||||
expected = arr2d.T
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_delete_2d(self, data):
|
||||
arr2d = data.repeat(3).reshape(-1, 3)
|
||||
|
||||
# axis = 0
|
||||
result = arr2d.delete(1, axis=0)
|
||||
expected = data.delete(1).repeat(3).reshape(-1, 3)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = arr2d.delete(1, axis=1)
|
||||
expected = data.repeat(2).reshape(-1, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_2d(self, data):
|
||||
arr2d = data.reshape(-1, 1)
|
||||
|
||||
result = arr2d.take([0, 0, -1], axis=0)
|
||||
|
||||
expected = data.take([0, 0, -1]).reshape(-1, 1)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_repr_2d(self, data):
|
||||
# this could fail in a corner case where an element contained the name
|
||||
res = repr(data.reshape(1, -1))
|
||||
assert res.count(f"<{type(data).__name__}") == 1
|
||||
|
||||
res = repr(data.reshape(-1, 1))
|
||||
assert res.count(f"<{type(data).__name__}") == 1
|
||||
|
||||
def test_reshape(self, data):
|
||||
arr2d = data.reshape(-1, 1)
|
||||
assert arr2d.shape == (data.size, 1)
|
||||
assert len(arr2d) == len(data)
|
||||
|
||||
arr2d = data.reshape((-1, 1))
|
||||
assert arr2d.shape == (data.size, 1)
|
||||
assert len(arr2d) == len(data)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
data.reshape((data.size, 2))
|
||||
with pytest.raises(ValueError):
|
||||
data.reshape(data.size, 2)
|
||||
|
||||
def test_getitem_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
result = arr2d[0]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
arr2d[1]
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
arr2d[-2]
|
||||
|
||||
result = arr2d[:]
|
||||
tm.assert_extension_array_equal(result, arr2d)
|
||||
|
||||
result = arr2d[:, :]
|
||||
tm.assert_extension_array_equal(result, arr2d)
|
||||
|
||||
result = arr2d[:, 0]
|
||||
expected = data[[0]]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# dimension-expanding getitem on 1D
|
||||
result = data[:, np.newaxis]
|
||||
tm.assert_extension_array_equal(result, arr2d.T)
|
||||
|
||||
def test_iter_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
objs = list(iter(arr2d))
|
||||
assert len(objs) == arr2d.shape[0]
|
||||
|
||||
for obj in objs:
|
||||
assert isinstance(obj, type(data))
|
||||
assert obj.dtype == data.dtype
|
||||
assert obj.ndim == 1
|
||||
assert len(obj) == arr2d.shape[1]
|
||||
|
||||
def test_tolist_2d(self, data):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
result = arr2d.tolist()
|
||||
expected = [data.tolist()]
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert all(isinstance(x, list) for x in result)
|
||||
|
||||
assert result == expected
|
||||
|
||||
def test_concat_2d(self, data):
|
||||
left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
|
||||
right = left.copy()
|
||||
|
||||
# axis=0
|
||||
result = left._concat_same_type([left, right], axis=0)
|
||||
expected = data._concat_same_type([data] * 4).reshape(-1, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# axis=1
|
||||
result = left._concat_same_type([left, right], axis=1)
|
||||
assert result.shape == (len(data), 4)
|
||||
tm.assert_extension_array_equal(result[:, :2], left)
|
||||
tm.assert_extension_array_equal(result[:, 2:], right)
|
||||
|
||||
# axis > 1 -> invalid
|
||||
msg = "axis 2 is out of bounds for array of dimension 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
left._concat_same_type([left, right], axis=2)
|
||||
|
||||
@pytest.mark.parametrize("method", ["backfill", "pad"])
|
||||
def test_fillna_2d_method(self, data_missing, method):
|
||||
# pad_or_backfill is always along axis=0
|
||||
arr = data_missing.repeat(2).reshape(2, 2)
|
||||
assert arr[0].isna().all()
|
||||
assert not arr[1].isna().any()
|
||||
|
||||
result = arr._pad_or_backfill(method=method, limit=None)
|
||||
|
||||
expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# Reverse so that backfill is not a no-op.
|
||||
arr2 = arr[::-1]
|
||||
assert not arr2[0].isna().any()
|
||||
assert arr2[1].isna().all()
|
||||
|
||||
result2 = arr2._pad_or_backfill(method=method, limit=None)
|
||||
|
||||
expected2 = (
|
||||
data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2)
|
||||
)
|
||||
tm.assert_extension_array_equal(result2, expected2)
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
def test_reductions_2d_axis_none(self, data, method):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
err_expected = None
|
||||
err_result = None
|
||||
try:
|
||||
expected = getattr(data, method)()
|
||||
except Exception as err:
|
||||
# if the 1D reduction is invalid, the 2D reduction should be as well
|
||||
err_expected = err
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=None)
|
||||
except Exception as err2:
|
||||
err_result = err2
|
||||
|
||||
else:
|
||||
result = getattr(arr2d, method)(axis=None)
|
||||
|
||||
if err_result is not None or err_expected is not None:
|
||||
assert type(err_result) == type(err_expected)
|
||||
return
|
||||
|
||||
assert is_matching_na(result, expected) or result == expected
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
@pytest.mark.parametrize("min_count", [0, 1])
|
||||
def test_reductions_2d_axis0(self, data, method, min_count):
|
||||
if min_count == 1 and method not in ["sum", "prod"]:
|
||||
pytest.skip(f"min_count not relevant for {method}")
|
||||
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
kwargs = {}
|
||||
if method in ["std", "var"]:
|
||||
# pass ddof=0 so we get all-zero std instead of all-NA std
|
||||
kwargs["ddof"] = 0
|
||||
elif method in ["prod", "sum"]:
|
||||
kwargs["min_count"] = min_count
|
||||
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=0, **kwargs)
|
||||
except Exception as err:
|
||||
try:
|
||||
getattr(data, method)()
|
||||
except Exception as err2:
|
||||
assert type(err) == type(err2)
|
||||
return
|
||||
else:
|
||||
raise AssertionError("Both reductions should raise or neither")
|
||||
|
||||
def get_reduction_result_dtype(dtype):
|
||||
# windows and 32bit builds will in some cases have int32/uint32
|
||||
# where other builds will have int64/uint64.
|
||||
if dtype.itemsize == 8:
|
||||
return dtype
|
||||
elif dtype.kind in "ib":
|
||||
return NUMPY_INT_TO_DTYPE[np.dtype(int)]
|
||||
else:
|
||||
# i.e. dtype.kind == "u"
|
||||
return NUMPY_INT_TO_DTYPE[np.dtype("uint")]
|
||||
|
||||
if method in ["sum", "prod"]:
|
||||
# std and var are not dtype-preserving
|
||||
expected = data
|
||||
if data.dtype.kind in "iub":
|
||||
dtype = get_reduction_result_dtype(data.dtype)
|
||||
expected = data.astype(dtype)
|
||||
assert dtype == expected.dtype
|
||||
|
||||
if min_count == 0:
|
||||
fill_value = 1 if method == "prod" else 0
|
||||
expected = expected.fillna(fill_value)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
elif method == "median":
|
||||
# std and var are not dtype-preserving
|
||||
expected = data
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
elif method in ["mean", "std", "var"]:
|
||||
if is_integer_dtype(data) or is_bool_dtype(data):
|
||||
data = data.astype("Float64")
|
||||
if method == "mean":
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
else:
|
||||
tm.assert_extension_array_equal(result, data - data)
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
|
||||
def test_reductions_2d_axis1(self, data, method):
|
||||
arr2d = data.reshape(1, -1)
|
||||
|
||||
try:
|
||||
result = getattr(arr2d, method)(axis=1)
|
||||
except Exception as err:
|
||||
try:
|
||||
getattr(data, method)()
|
||||
except Exception as err2:
|
||||
assert type(err) == type(err2)
|
||||
return
|
||||
else:
|
||||
raise AssertionError("Both reductions should raise or neither")
|
||||
|
||||
# not necessarily type/dtype-preserving, so weaker assertions
|
||||
assert result.shape == (1,)
|
||||
expected_scalar = getattr(data, method)()
|
||||
res = result[0]
|
||||
assert is_matching_na(res, expected_scalar) or res == expected_scalar
|
||||
|
||||
|
||||
class NDArrayBacked2DTests(Dim2CompatTests):
|
||||
# More specific tests for NDArrayBackedExtensionArray subclasses
|
||||
|
||||
def test_copy_order(self, data):
|
||||
# We should be matching numpy semantics for the "order" keyword in 'copy'
|
||||
arr2d = data.repeat(2).reshape(-1, 2)
|
||||
assert arr2d._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy()
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d[::2, ::2].copy()
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy("F")
|
||||
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert res._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.copy("K")
|
||||
assert res._ndarray.flags["C_CONTIGUOUS"]
|
||||
|
||||
res = arr2d.T.copy("K")
|
||||
assert not res._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert res._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
# order not accepted by numpy
|
||||
msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr2d.copy("Q")
|
||||
|
||||
# neither contiguity
|
||||
arr_nc = arr2d[::2]
|
||||
assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
|
||||
|
||||
assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
|
||||
assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]
|
@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import (
|
||||
infer_dtype,
|
||||
is_object_dtype,
|
||||
is_string_dtype,
|
||||
)
|
||||
|
||||
|
||||
class BaseDtypeTests:
|
||||
"""Base class for ExtensionDtype classes"""
|
||||
|
||||
def test_name(self, dtype):
|
||||
assert isinstance(dtype.name, str)
|
||||
|
||||
def test_kind(self, dtype):
|
||||
valid = set("biufcmMOSUV")
|
||||
assert dtype.kind in valid
|
||||
|
||||
def test_is_dtype_from_name(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_unboxes_dtype(self, data, dtype):
|
||||
assert dtype.is_dtype(data) is True
|
||||
|
||||
def test_is_dtype_from_self(self, dtype):
|
||||
result = type(dtype).is_dtype(dtype)
|
||||
assert result is True
|
||||
|
||||
def test_is_dtype_other_input(self, dtype):
|
||||
assert dtype.is_dtype([1, 2, 3]) is False
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
assert not is_string_dtype(dtype)
|
||||
|
||||
def test_is_not_object_type(self, dtype):
|
||||
assert not is_object_dtype(dtype)
|
||||
|
||||
def test_eq_with_str(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != dtype.name + "-suffix"
|
||||
|
||||
def test_eq_with_numpy_object(self, dtype):
|
||||
assert dtype != np.dtype("object")
|
||||
|
||||
def test_eq_with_self(self, dtype):
|
||||
assert dtype == dtype
|
||||
assert dtype != object()
|
||||
|
||||
def test_array_type(self, data, dtype):
|
||||
assert dtype.construct_array_type() is type(data)
|
||||
|
||||
def test_check_dtype(self, data):
|
||||
dtype = data.dtype
|
||||
|
||||
# check equivalency for using .dtypes
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series(data, dtype=dtype),
|
||||
"B": data,
|
||||
"C": pd.Series(["foo"] * len(data), dtype=object),
|
||||
"D": 1,
|
||||
}
|
||||
)
|
||||
result = df.dtypes == str(dtype)
|
||||
assert np.dtype("int64") != "Int64"
|
||||
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series([True, True, False, False], index=list("ABCD"))
|
||||
result = df.dtypes.apply(str) == str(dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_hashable(self, dtype):
|
||||
hash(dtype) # no error
|
||||
|
||||
def test_str(self, dtype):
|
||||
assert str(dtype) == dtype.name
|
||||
|
||||
def test_eq(self, dtype):
|
||||
assert dtype == dtype.name
|
||||
assert dtype != "anonther_type"
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype):
|
||||
result = dtype.construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
# check OK as classmethod
|
||||
result = type(dtype).construct_from_string(dtype.name)
|
||||
assert type(result) is type(dtype)
|
||||
|
||||
def test_construct_from_string_another_type_raises(self, dtype):
|
||||
msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
type(dtype).construct_from_string("another_type")
|
||||
|
||||
def test_construct_from_string_wrong_type_raises(self, dtype):
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match="'construct_from_string' expects a string, got <class 'int'>",
|
||||
):
|
||||
type(dtype).construct_from_string(0)
|
||||
|
||||
def test_get_common_dtype(self, dtype):
|
||||
# in practice we will not typically call this with a 1-length list
|
||||
# (we shortcut to just use that dtype as the common dtype), but
|
||||
# still testing as good practice to have this working (and it is the
|
||||
# only case we can test in general)
|
||||
assert dtype._get_common_dtype([dtype]) == dtype
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_infer_dtype(self, data, data_missing, skipna):
|
||||
# only testing that this works without raising an error
|
||||
res = infer_dtype(data, skipna=skipna)
|
||||
assert isinstance(res, str)
|
||||
res = infer_dtype(data_missing, skipna=skipna)
|
||||
assert isinstance(res, str)
|
@ -0,0 +1,469 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseGetitemTests:
|
||||
"""Tests for ExtensionArray.__getitem__."""
|
||||
|
||||
def test_iloc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.iloc[:4]
|
||||
expected = pd.Series(data[:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.iloc[[0, 1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_iloc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.iloc[:4, [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.iloc[[0, 1, 2, 3], [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.iloc[:4, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.iloc[:4, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH#32959 slice columns with step
|
||||
result = df.iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df[["A"]])
|
||||
result = df[["B", "A"]].iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df[["B"]])
|
||||
|
||||
def test_iloc_frame_single_block(self, data):
|
||||
# GH#32959 null slice along index, slice along columns with single-block
|
||||
df = pd.DataFrame({"A": data})
|
||||
|
||||
result = df.iloc[:, :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, :1]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, :2]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, ::2]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.iloc[:, 1:2]
|
||||
tm.assert_frame_equal(result, df.iloc[:, :0])
|
||||
|
||||
result = df.iloc[:, -1:]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_loc_series(self, data):
|
||||
ser = pd.Series(data)
|
||||
result = ser.loc[:3]
|
||||
expected = pd.Series(data[:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.loc[[0, 1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_frame(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")})
|
||||
expected = pd.DataFrame({"A": data[:4]})
|
||||
|
||||
# slice -> frame
|
||||
result = df.loc[:3, ["A"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# sequence -> frame
|
||||
result = df.loc[[0, 1, 2, 3], ["A"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.Series(data[:4], name="A")
|
||||
|
||||
# slice -> series
|
||||
result = df.loc[:3, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# sequence -> series
|
||||
result = df.loc[:3, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_iloc_frame_single_dtype(self, data):
|
||||
# GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly
|
||||
# return a scalar
|
||||
df = pd.DataFrame({"A": data})
|
||||
expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype)
|
||||
|
||||
result = df.loc[2]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = pd.Series(
|
||||
[data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype
|
||||
)
|
||||
result = df.iloc[-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_scalar(self, data):
|
||||
result = data[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
result = pd.Series(data)[0]
|
||||
assert isinstance(result, data.dtype.type)
|
||||
|
||||
def test_getitem_invalid(self, data):
|
||||
# TODO: box over scalar, [scalar], (scalar,)?
|
||||
|
||||
msg = (
|
||||
r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis "
|
||||
r"\(`None`\) and integer or boolean arrays are valid indices"
|
||||
)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data["foo"]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[2.5]
|
||||
|
||||
ub = len(data)
|
||||
msg = "|".join(
|
||||
[
|
||||
"list index out of range", # json
|
||||
"index out of bounds", # pyarrow
|
||||
"Out of bounds access", # Sparse
|
||||
f"loc must be an integer between -{ub} and {ub}", # Sparse
|
||||
f"index {ub+1} is out of bounds for axis 0 with size {ub}",
|
||||
f"index -{ub+1} is out of bounds for axis 0 with size {ub}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[ub + 1]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[-ub - 1]
|
||||
|
||||
def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
|
||||
result = data_missing[0]
|
||||
assert na_cmp(result, na_value)
|
||||
|
||||
def test_getitem_empty(self, data):
|
||||
# Indexing with empty list
|
||||
result = data[[]]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
expected = data[np.array([], dtype="int64")]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_getitem_mask(self, data):
|
||||
# Empty mask, raw array
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# Empty mask, in series
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
# non-empty mask, raw array
|
||||
mask[0] = True
|
||||
result = data[mask]
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
# non-empty mask, in series
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 1
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
def test_getitem_mask_raises(self, data):
|
||||
mask = np.array([True, False])
|
||||
msg = f"Boolean index has wrong length: 2 instead of {len(data)}"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[mask]
|
||||
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
data[mask]
|
||||
|
||||
def test_getitem_boolean_array_mask(self, data):
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
result = data[mask]
|
||||
assert len(result) == 0
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = pd.Series(data)[mask]
|
||||
assert len(result) == 0
|
||||
assert result.dtype == data.dtype
|
||||
|
||||
mask[:5] = True
|
||||
expected = data.take([0, 1, 2, 3, 4])
|
||||
result = data[mask]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
expected = pd.Series(expected)
|
||||
result = pd.Series(data)[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_boolean_na_treated_as_false(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
mask[:2] = pd.NA
|
||||
mask[2:4] = True
|
||||
|
||||
result = data[mask]
|
||||
expected = data[mask.fillna(False)]
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(data)
|
||||
|
||||
result = s[mask]
|
||||
expected = s[mask.fillna(False)]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_getitem_integer_array(self, data, idx):
|
||||
result = data[idx]
|
||||
assert len(result) == 3
|
||||
assert isinstance(result, type(data))
|
||||
expected = data.take([0, 1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
expected = pd.Series(expected)
|
||||
result = pd.Series(data)[idx]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
|
||||
ids=["list", "integer-array"],
|
||||
)
|
||||
def test_getitem_integer_with_missing_raises(self, data, idx):
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data[idx]
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="Tries label-based and raises KeyError; "
|
||||
"in some cases raises when calling np.asarray"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
|
||||
ids=["list", "integer-array"],
|
||||
)
|
||||
def test_getitem_series_integer_with_missing_raises(self, data, idx):
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
# TODO: this raises KeyError about labels not found (it tries label-based)
|
||||
|
||||
ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[idx]
|
||||
|
||||
def test_getitem_slice(self, data):
|
||||
# getitem[slice] should return an array
|
||||
result = data[slice(0)] # empty
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
result = data[slice(1)] # scalar
|
||||
assert isinstance(result, type(data))
|
||||
|
||||
def test_getitem_ellipsis_and_slice(self, data):
|
||||
# GH#40353 this is called from slice_block_rows
|
||||
result = data[..., :]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[:, ...]
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[..., :3]
|
||||
tm.assert_extension_array_equal(result, data[:3])
|
||||
|
||||
result = data[:3, ...]
|
||||
tm.assert_extension_array_equal(result, data[:3])
|
||||
|
||||
result = data[..., ::2]
|
||||
tm.assert_extension_array_equal(result, data[::2])
|
||||
|
||||
result = data[::2, ...]
|
||||
tm.assert_extension_array_equal(result, data[::2])
|
||||
|
||||
def test_get(self, data):
|
||||
# GH 20882
|
||||
s = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
assert s.get(4) == s.iloc[2]
|
||||
|
||||
result = s.get([4, 6])
|
||||
expected = s.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get(slice(2))
|
||||
expected = s.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert s.get(-1) is None
|
||||
assert s.get(s.index.max() + 1) is None
|
||||
|
||||
s = pd.Series(data[:6], index=list("abcdef"))
|
||||
assert s.get("c") == s.iloc[2]
|
||||
|
||||
result = s.get(slice("b", "d"))
|
||||
expected = s.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.get("Z")
|
||||
assert result is None
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert s.get(4) == s.iloc[4]
|
||||
assert s.get(-1) == s.iloc[-1]
|
||||
assert s.get(len(s)) is None
|
||||
|
||||
# GH 21257
|
||||
s = pd.Series(data)
|
||||
with tm.assert_produces_warning(None):
|
||||
# GH#45324 make sure we aren't giving a spurious FutureWarning
|
||||
s2 = s[::2]
|
||||
assert s2.get(1) is None
|
||||
|
||||
def test_take_sequence(self, data):
|
||||
result = pd.Series(data)[[0, 1, 3]]
|
||||
assert result.iloc[0] == data[0]
|
||||
assert result.iloc[1] == data[1]
|
||||
assert result.iloc[2] == data[3]
|
||||
|
||||
def test_take(self, data, na_value, na_cmp):
|
||||
result = data.take([0, -1])
|
||||
assert result.dtype == data.dtype
|
||||
assert result[0] == data[0]
|
||||
assert result[1] == data[-1]
|
||||
|
||||
result = data.take([0, -1], allow_fill=True, fill_value=na_value)
|
||||
assert result[0] == data[0]
|
||||
assert na_cmp(result[1], na_value)
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds"):
|
||||
data.take([len(data) + 1])
|
||||
|
||||
def test_take_empty(self, data, na_value, na_cmp):
|
||||
empty = data[:0]
|
||||
|
||||
result = empty.take([-1], allow_fill=True)
|
||||
assert na_cmp(result[0], na_value)
|
||||
|
||||
msg = "cannot do a non-empty take from an empty axes|out of bounds"
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
empty.take([-1])
|
||||
|
||||
with pytest.raises(IndexError, match="cannot do a non-empty take"):
|
||||
empty.take([0, 1])
|
||||
|
||||
def test_take_negative(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20640
|
||||
n = len(data)
|
||||
result = data.take([0, -n, n - 1, -1])
|
||||
expected = data.take([0, 0, n - 1, n - 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_non_na_fill_value(self, data_missing):
|
||||
fill_value = data_missing[1] # valid
|
||||
na = data_missing[0]
|
||||
|
||||
arr = data_missing._from_sequence(
|
||||
[na, fill_value, na], dtype=data_missing.dtype
|
||||
)
|
||||
result = arr.take([-1, 1], fill_value=fill_value, allow_fill=True)
|
||||
expected = arr.take([1, 1])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_take_pandas_style_negative_raises(self, data, na_value):
|
||||
with pytest.raises(ValueError, match=""):
|
||||
data.take([0, -2], fill_value=na_value, allow_fill=True)
|
||||
|
||||
@pytest.mark.parametrize("allow_fill", [True, False])
|
||||
def test_take_out_of_bounds_raises(self, data, allow_fill):
|
||||
arr = data[:3]
|
||||
|
||||
with pytest.raises(IndexError, match="out of bounds|out-of-bounds"):
|
||||
arr.take(np.asarray([0, 3]), allow_fill=allow_fill)
|
||||
|
||||
def test_take_series(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.take([0, -1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
|
||||
index=[0, len(data) - 1],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
s = pd.Series(data)
|
||||
result = s.reindex([0, 1, 3])
|
||||
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
n = len(data)
|
||||
result = s.reindex([-1, 0, n])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, data[0], na_value], dtype=s.dtype),
|
||||
index=[-1, 0, n],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.reindex([n, n + 1])
|
||||
expected = pd.Series(
|
||||
data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reindex_non_na_fill_value(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
na = data_missing[0]
|
||||
|
||||
arr = data_missing._from_sequence([na, valid], dtype=data_missing.dtype)
|
||||
ser = pd.Series(arr)
|
||||
result = ser.reindex([0, 1, 2], fill_value=valid)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_len1(self, data):
|
||||
# see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim
|
||||
df = pd.DataFrame({"A": data})
|
||||
res = df.loc[[0], "A"]
|
||||
assert res.ndim == 1
|
||||
assert res._mgr.arrays[0].ndim == 1
|
||||
if hasattr(res._mgr, "blocks"):
|
||||
assert res._mgr._block.ndim == 1
|
||||
|
||||
def test_item(self, data):
|
||||
# https://github.com/pandas-dev/pandas/pull/30175
|
||||
s = pd.Series(data)
|
||||
result = s[:1].item()
|
||||
assert result == data[0]
|
||||
|
||||
msg = "can only convert an array of size 1 to a Python scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[:0].item()
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.item()
|
@ -0,0 +1,174 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_numeric_dtype,
|
||||
is_object_dtype,
|
||||
is_string_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The default of observed=False is deprecated:FutureWarning"
|
||||
)
|
||||
class BaseGroupbyTests:
|
||||
"""Groupby-specific tests."""
|
||||
|
||||
def test_grouping_grouper(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series(
|
||||
["B", "B", None, None, "A", "A", "B", "C"], dtype=object
|
||||
),
|
||||
"B": data_for_grouping,
|
||||
}
|
||||
)
|
||||
gr1 = df.groupby("A")._grouper.groupings[0]
|
||||
gr2 = df.groupby("B")._grouper.groupings[0]
|
||||
|
||||
tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
|
||||
tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B", as_index=as_index).A.mean()
|
||||
_, uniques = pd.factorize(data_for_grouping, sort=True)
|
||||
|
||||
exp_vals = [3.0, 1.0, 4.0]
|
||||
if is_bool:
|
||||
exp_vals = exp_vals[:-1]
|
||||
if as_index:
|
||||
index = pd.Index(uniques, name="B")
|
||||
expected = pd.Series(exp_vals, index=index, name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected = pd.DataFrame({"B": uniques, "A": exp_vals})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_agg_extension(self, data_for_grouping):
|
||||
# GH#38980 groupby agg on extension type fails for non-numeric types
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
expected = df.iloc[[0, 2, 4, 7]]
|
||||
expected = expected.set_index("A")
|
||||
|
||||
result = df.groupby("A").agg({"B": "first"})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.groupby("A").agg("first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.groupby("A").first()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_no_sort(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B", sort=False).A.mean()
|
||||
_, index = pd.factorize(data_for_grouping, sort=False)
|
||||
|
||||
index = pd.Index(index, name="B")
|
||||
exp_vals = [1.0, 3.0, 4.0]
|
||||
if is_bool:
|
||||
exp_vals = exp_vals[:-1]
|
||||
expected = pd.Series(exp_vals, index=index, name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
|
||||
valid = data_for_grouping[~data_for_grouping.isna()]
|
||||
df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid})
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values, and the final entry has c==b
|
||||
# (see data_for_grouping docstring)
|
||||
df = df.iloc[:-1]
|
||||
|
||||
result = df.groupby("B").A.transform(len)
|
||||
expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
|
||||
if is_bool:
|
||||
expected = expected[:-1]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.groupby("B", group_keys=False, observed=False).apply(groupby_apply_op)
|
||||
df.groupby("B", group_keys=False, observed=False).A.apply(groupby_apply_op)
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.groupby("A", group_keys=False, observed=False).apply(groupby_apply_op)
|
||||
df.groupby("A", group_keys=False, observed=False).B.apply(groupby_apply_op)
|
||||
|
||||
def test_groupby_apply_identity(self, data_for_grouping):
|
||||
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
|
||||
result = df.groupby("A").B.apply(lambda x: x.array)
|
||||
expected = pd.Series(
|
||||
[
|
||||
df.B.iloc[[0, 1, 6]].array,
|
||||
df.B.iloc[[2, 3]].array,
|
||||
df.B.iloc[[4, 5]].array,
|
||||
df.B.iloc[[7]].array,
|
||||
],
|
||||
index=pd.Index([1, 2, 3, 4], name="A"),
|
||||
name="B",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_in_numeric_groupby(self, data_for_grouping):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": [1, 1, 2, 2, 3, 3, 1, 4],
|
||||
"B": data_for_grouping,
|
||||
"C": [1, 1, 1, 1, 1, 1, 1, 1],
|
||||
}
|
||||
)
|
||||
|
||||
dtype = data_for_grouping.dtype
|
||||
if (
|
||||
is_numeric_dtype(dtype)
|
||||
or is_bool_dtype(dtype)
|
||||
or dtype.name == "decimal"
|
||||
or is_string_dtype(dtype)
|
||||
or is_object_dtype(dtype)
|
||||
or dtype.kind == "m" # in particular duration[*][pyarrow]
|
||||
):
|
||||
expected = pd.Index(["B", "C"])
|
||||
result = df.groupby("A").sum().columns
|
||||
else:
|
||||
expected = pd.Index(["C"])
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
# period/datetime
|
||||
"does not support sum operations",
|
||||
# all others
|
||||
re.escape(f"agg function failed [how->sum,dtype->{dtype}"),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby("A").sum()
|
||||
result = df.groupby("A").sum(numeric_only=True).columns
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,19 @@
|
||||
"""
|
||||
Tests for Indexes backed by arbitrary ExtensionArrays.
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class BaseIndexTests:
|
||||
"""Tests for Index object backed by an ExtensionArray"""
|
||||
|
||||
def test_index_from_array(self, data):
|
||||
idx = pd.Index(data)
|
||||
assert data.dtype == idx.dtype
|
||||
|
||||
def test_index_from_listlike_with_dtype(self, data):
|
||||
idx = pd.Index(data, dtype=data.dtype)
|
||||
assert idx.dtype == data.dtype
|
||||
|
||||
idx = pd.Index(list(data), dtype=data.dtype)
|
||||
assert idx.dtype == data.dtype
|
@ -0,0 +1,172 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseInterfaceTests:
|
||||
"""Tests that the basic interface is satisfied."""
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Interface
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
def test_len(self, data):
|
||||
assert len(data) == 100
|
||||
|
||||
def test_size(self, data):
|
||||
assert data.size == 100
|
||||
|
||||
def test_ndim(self, data):
|
||||
assert data.ndim == 1
|
||||
|
||||
def test_can_hold_na_valid(self, data):
|
||||
# GH-20761
|
||||
assert data._can_hold_na is True
|
||||
|
||||
def test_contains(self, data, data_missing):
|
||||
# GH-37867
|
||||
# Tests for membership checks. Membership checks for nan-likes is tricky and
|
||||
# the settled on rule is: `nan_like in arr` is True if nan_like is
|
||||
# arr.dtype.na_value and arr.isna().any() is True. Else the check returns False.
|
||||
|
||||
na_value = data.dtype.na_value
|
||||
# ensure data without missing values
|
||||
data = data[~data.isna()]
|
||||
|
||||
# first elements are non-missing
|
||||
assert data[0] in data
|
||||
assert data_missing[0] in data_missing
|
||||
|
||||
# check the presence of na_value
|
||||
assert na_value in data_missing
|
||||
assert na_value not in data
|
||||
|
||||
# the data can never contain other nan-likes than na_value
|
||||
for na_value_obj in tm.NULL_OBJECTS:
|
||||
if na_value_obj is na_value or type(na_value_obj) == type(na_value):
|
||||
# type check for e.g. two instances of Decimal("NAN")
|
||||
continue
|
||||
assert na_value_obj not in data
|
||||
assert na_value_obj not in data_missing
|
||||
|
||||
def test_memory_usage(self, data):
|
||||
s = pd.Series(data)
|
||||
result = s.memory_usage(index=False)
|
||||
assert result == s.nbytes
|
||||
|
||||
def test_array_interface(self, data):
|
||||
result = np.array(data)
|
||||
assert result[0] == data[0]
|
||||
|
||||
result = np.array(data, dtype=object)
|
||||
expected = np.array(list(data), dtype=object)
|
||||
if expected.ndim > 1:
|
||||
# nested data, explicitly construct as 1D
|
||||
expected = construct_1d_object_array_from_listlike(list(data))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_array_interface_copy(self, data):
|
||||
result_copy1 = np.array(data, copy=True)
|
||||
result_copy2 = np.array(data, copy=True)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
if not np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
return
|
||||
|
||||
warning_raised = False
|
||||
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
result_nocopy1 = np.array(data, copy=False)
|
||||
assert len(w) <= 1
|
||||
if len(w):
|
||||
warning_raised = True
|
||||
assert msg in str(w[0].message)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
result_nocopy2 = np.array(data, copy=False)
|
||||
assert len(w) <= 1
|
||||
if len(w):
|
||||
warning_raised = True
|
||||
assert msg in str(w[0].message)
|
||||
|
||||
if not warning_raised:
|
||||
# If copy=False was given and did not raise, these must share the same data
|
||||
assert np.may_share_memory(result_nocopy1, result_nocopy2)
|
||||
|
||||
def test_is_extension_array_dtype(self, data):
|
||||
assert is_extension_array_dtype(data)
|
||||
assert is_extension_array_dtype(data.dtype)
|
||||
assert is_extension_array_dtype(pd.Series(data))
|
||||
assert isinstance(data.dtype, ExtensionDtype)
|
||||
|
||||
def test_no_values_attribute(self, data):
|
||||
# GH-20735: EA's with .values attribute give problems with internal
|
||||
# code, disallowing this for now until solved
|
||||
assert not hasattr(data, "values")
|
||||
assert not hasattr(data, "_values")
|
||||
|
||||
def test_is_numeric_honored(self, data):
|
||||
result = pd.Series(data)
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric
|
||||
|
||||
def test_isna_extension_array(self, data_missing):
|
||||
# If your `isna` returns an ExtensionArray, you must also implement
|
||||
# _reduce. At the *very* least, you must implement any and all
|
||||
na = data_missing.isna()
|
||||
if is_extension_array_dtype(na):
|
||||
assert na._reduce("any")
|
||||
assert na.any()
|
||||
|
||||
assert not na._reduce("all")
|
||||
assert not na.all()
|
||||
|
||||
assert na.dtype._is_boolean
|
||||
|
||||
def test_copy(self, data):
|
||||
# GH#27083 removing deep keyword from EA.copy
|
||||
assert data[0] != data[1]
|
||||
result = data.copy()
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_copy assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
data[1] = data[0]
|
||||
assert result[1] != result[0]
|
||||
|
||||
def test_view(self, data):
|
||||
# view with no dtype should return a shallow copy, *not* the same
|
||||
# object
|
||||
assert data[1] != data[0]
|
||||
|
||||
result = data.view()
|
||||
assert result is not data
|
||||
assert type(result) == type(data)
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_view assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
result[1] = result[0]
|
||||
assert data[1] == data[0]
|
||||
|
||||
# check specifically that the `dtype` kwarg is accepted
|
||||
data.view(dtype=None)
|
||||
|
||||
def test_tolist(self, data):
|
||||
result = data.tolist()
|
||||
expected = list(data)
|
||||
assert isinstance(result, list)
|
||||
assert result == expected
|
@ -0,0 +1,39 @@
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class BaseParsingTests:
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
if isinstance(data.dtype, pd.CategoricalDtype):
|
||||
# in parsers.pyx _convert_with_dtype there is special-casing for
|
||||
# Categorical that pre-empts _from_sequence_of_strings
|
||||
pass
|
||||
elif isinstance(data.dtype, pd.core.dtypes.dtypes.NumpyEADtype):
|
||||
# These get unwrapped internally so are treated as numpy dtypes
|
||||
# in the parsers.pyx code
|
||||
pass
|
||||
elif (
|
||||
type(data)._from_sequence_of_strings.__func__
|
||||
is ExtensionArray._from_sequence_of_strings.__func__
|
||||
):
|
||||
# i.e. the EA hasn't overridden _from_sequence_of_strings
|
||||
mark = pytest.mark.xfail(
|
||||
reason="_from_sequence_of_strings not implemented",
|
||||
raises=NotImplementedError,
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
|
||||
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
|
||||
csv_output = df.to_csv(index=False, na_rep=np.nan)
|
||||
result = pd.read_csv(
|
||||
StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine
|
||||
)
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,720 @@
|
||||
import inspect
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._typing import Dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_bool_dtype
|
||||
from pandas.core.dtypes.dtypes import NumpyEADtype
|
||||
from pandas.core.dtypes.missing import na_value_for_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.sorting import nargsort
|
||||
|
||||
|
||||
class BaseMethodsTests:
|
||||
"""Various Series and DataFrame methods."""
|
||||
|
||||
def test_hash_pandas_object(self, data):
|
||||
# _hash_pandas_object should return a uint64 ndarray of the same length
|
||||
# as the data
|
||||
from pandas.core.util.hashing import _default_hash_key
|
||||
|
||||
res = data._hash_pandas_object(
|
||||
encoding="utf-8", hash_key=_default_hash_key, categorize=False
|
||||
)
|
||||
assert res.dtype == np.uint64
|
||||
assert res.shape == data.shape
|
||||
|
||||
def test_value_counts_default_dropna(self, data):
|
||||
# make sure we have consistent default dropna kwarg
|
||||
if not hasattr(data, "value_counts"):
|
||||
pytest.skip(f"value_counts is not implemented for {type(data)}")
|
||||
sig = inspect.signature(data.value_counts)
|
||||
kwarg = sig.parameters["dropna"]
|
||||
assert kwarg.default is True
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
|
||||
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_value_counts_with_normalize(self, data):
|
||||
# GH 33172
|
||||
data = data[:10].unique()
|
||||
values = np.array(data[~data.isna()])
|
||||
ser = pd.Series(data, dtype=data.dtype)
|
||||
|
||||
result = ser.value_counts(normalize=True).sort_index()
|
||||
|
||||
if not isinstance(data, pd.Categorical):
|
||||
expected = pd.Series(
|
||||
[1 / len(values)] * len(values), index=result.index, name="proportion"
|
||||
)
|
||||
else:
|
||||
expected = pd.Series(0.0, index=result.index, name="proportion")
|
||||
expected[result > 0] = 1 / len(values)
|
||||
|
||||
if isinstance(data.dtype, pd.StringDtype) and data.dtype.na_value is np.nan:
|
||||
# TODO: avoid special-casing
|
||||
expected = expected.astype("float64")
|
||||
elif getattr(data.dtype, "storage", "") == "pyarrow" or isinstance(
|
||||
data.dtype, pd.ArrowDtype
|
||||
):
|
||||
# TODO: avoid special-casing
|
||||
expected = expected.astype("double[pyarrow]")
|
||||
elif na_value_for_dtype(data.dtype) is pd.NA:
|
||||
# TODO(GH#44692): avoid special-casing
|
||||
expected = expected.astype("Float64")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_count(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df.count(axis="columns")
|
||||
expected = pd.Series([0, 1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_count(self, data_missing):
|
||||
# GH#26835
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.count()
|
||||
expected = 1
|
||||
assert result == expected
|
||||
|
||||
def test_apply_simple_series(self, data):
|
||||
result = pd.Series(data).apply(id)
|
||||
assert isinstance(result, pd.Series)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data_missing, na_action):
|
||||
result = data_missing.map(lambda x: x, na_action=na_action)
|
||||
expected = data_missing.to_numpy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort(self, data_for_sorting):
|
||||
result = pd.Series(data_for_sorting).argsort()
|
||||
# argsort result gets passed to take, so should be np.intp
|
||||
expected = pd.Series(np.array([2, 0, 1], dtype=np.intp))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_missing_array(self, data_missing_for_sorting):
|
||||
result = data_missing_for_sorting.argsort()
|
||||
# argsort result gets passed to take, so should be np.intp
|
||||
expected = np.array([2, 0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort_missing(self, data_missing_for_sorting):
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = pd.Series(data_missing_for_sorting).argsort()
|
||||
expected = pd.Series(np.array([1, -1, 0], dtype=np.intp))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
|
||||
# GH 24382
|
||||
is_bool = data_for_sorting.dtype._is_boolean
|
||||
|
||||
exp_argmax = 1
|
||||
exp_argmax_repeated = 3
|
||||
if is_bool:
|
||||
# See data_for_sorting docstring
|
||||
exp_argmax = 0
|
||||
exp_argmax_repeated = 1
|
||||
|
||||
# data_for_sorting -> [B, C, A] with A < B < C
|
||||
assert data_for_sorting.argmax() == exp_argmax
|
||||
assert data_for_sorting.argmin() == 2
|
||||
|
||||
# with repeated values -> first occurrence
|
||||
data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
|
||||
assert data.argmax() == exp_argmax_repeated
|
||||
assert data.argmin() == 0
|
||||
|
||||
# with missing values
|
||||
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
|
||||
assert data_missing_for_sorting.argmax() == 0
|
||||
assert data_missing_for_sorting.argmin() == 2
|
||||
|
||||
@pytest.mark.parametrize("method", ["argmax", "argmin"])
|
||||
def test_argmin_argmax_empty_array(self, method, data):
|
||||
# GH 24382
|
||||
err_msg = "attempt to get"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
getattr(data[:0], method)()
|
||||
|
||||
@pytest.mark.parametrize("method", ["argmax", "argmin"])
|
||||
def test_argmin_argmax_all_na(self, method, data, na_value):
|
||||
# all missing with skipna=True is the same as empty
|
||||
err_msg = "attempt to get"
|
||||
data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype)
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
getattr(data_na, method)()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_name, skipna, expected",
|
||||
[
|
||||
("idxmax", True, 0),
|
||||
("idxmin", True, 2),
|
||||
("argmax", True, 0),
|
||||
("argmin", True, 2),
|
||||
("idxmax", False, np.nan),
|
||||
("idxmin", False, np.nan),
|
||||
("argmax", False, -1),
|
||||
("argmin", False, -1),
|
||||
],
|
||||
)
|
||||
def test_argreduce_series(
|
||||
self, data_missing_for_sorting, op_name, skipna, expected
|
||||
):
|
||||
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
|
||||
warn = None
|
||||
msg = "The behavior of Series.argmax/argmin"
|
||||
if op_name.startswith("arg") and expected == -1:
|
||||
warn = FutureWarning
|
||||
if op_name.startswith("idx") and np.isnan(expected):
|
||||
warn = FutureWarning
|
||||
msg = f"The behavior of Series.{op_name}"
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting):
|
||||
# GH#38733
|
||||
data = data_missing_for_sorting
|
||||
|
||||
with pytest.raises(NotImplementedError, match=""):
|
||||
data.argmin(skipna=False)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=""):
|
||||
data.argmax(skipna=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_position, expected",
|
||||
[
|
||||
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
|
||||
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
|
||||
],
|
||||
)
|
||||
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
|
||||
# GH 25439
|
||||
result = nargsort(data_missing_for_sorting, na_position=na_position)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
|
||||
ser = pd.Series(data_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending, key=sort_by_key)
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
if not ascending:
|
||||
# GH 35922. Expect stable sort
|
||||
if ser.nunique() == 2:
|
||||
expected = ser.iloc[[0, 1, 2]]
|
||||
else:
|
||||
expected = ser.iloc[[1, 0, 2]]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_missing(
|
||||
self, data_missing_for_sorting, ascending, sort_by_key
|
||||
):
|
||||
ser = pd.Series(data_missing_for_sorting)
|
||||
result = ser.sort_values(ascending=ascending, key=sort_by_key)
|
||||
if ascending:
|
||||
expected = ser.iloc[[2, 0, 1]]
|
||||
else:
|
||||
expected = ser.iloc[[0, 2, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting})
|
||||
result = df.sort_values(["A", "B"])
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("keep", ["first", "last", False])
|
||||
def test_duplicated(self, data, keep):
|
||||
arr = data.take([0, 1, 0, 1])
|
||||
result = arr.duplicated(keep=keep)
|
||||
if keep == "first":
|
||||
expected = np.array([False, False, True, True])
|
||||
elif keep == "last":
|
||||
expected = np.array([True, True, False, False])
|
||||
else:
|
||||
expected = np.array([True, True, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
|
||||
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
|
||||
def test_unique(self, data, box, method):
|
||||
duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype))
|
||||
|
||||
result = method(duplicated)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result, type(data))
|
||||
assert result[0] == duplicated[0]
|
||||
|
||||
def test_factorize(self, data_for_grouping):
|
||||
codes, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True)
|
||||
|
||||
is_bool = data_for_grouping.dtype._is_boolean
|
||||
if is_bool:
|
||||
# only 2 unique values
|
||||
expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 0], dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4])
|
||||
else:
|
||||
expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 2], dtype=np.intp)
|
||||
expected_uniques = data_for_grouping.take([0, 4, 7])
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_factorize_equivalence(self, data_for_grouping):
|
||||
codes_1, uniques_1 = pd.factorize(data_for_grouping, use_na_sentinel=True)
|
||||
codes_2, uniques_2 = data_for_grouping.factorize(use_na_sentinel=True)
|
||||
|
||||
tm.assert_numpy_array_equal(codes_1, codes_2)
|
||||
tm.assert_extension_array_equal(uniques_1, uniques_2)
|
||||
assert len(uniques_1) == len(pd.unique(uniques_1))
|
||||
assert uniques_1.dtype == data_for_grouping.dtype
|
||||
|
||||
def test_factorize_empty(self, data):
|
||||
codes, uniques = pd.factorize(data[:0])
|
||||
expected_codes = np.array([], dtype=np.intp)
|
||||
expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_extension_array_equal(uniques, expected_uniques)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr})
|
||||
df_orig = df.copy()
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
result.iloc[0, 0] = filled_val
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr, copy=False)
|
||||
ser_orig = ser.copy()
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
result.iloc[0] = filled_val
|
||||
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
# Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool]
|
||||
_combine_le_expected_dtype: Dtype = NumpyEADtype("bool")
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# GH 20825
|
||||
# Test that combine works when doing a <= (le) comparison
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
|
||||
dtype=self._combine_le_expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 <= x2)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
[a <= val for a in list(orig_data1)],
|
||||
dtype=self._combine_le_expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
|
||||
# Check if the operation is supported pointwise for our scalars. If not,
|
||||
# we will expect Series.combine to raise as well.
|
||||
try:
|
||||
with np.errstate(over="ignore"):
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence(
|
||||
[a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]
|
||||
)
|
||||
)
|
||||
except TypeError:
|
||||
# If the operation is not supported pointwise for our scalars,
|
||||
# then Series.combine should also raise
|
||||
with pytest.raises(TypeError):
|
||||
s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
return
|
||||
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
orig_data1._from_sequence([a + val for a in list(orig_data1)])
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/24147
|
||||
a = pd.Series(data[:3])
|
||||
b = pd.Series(data[2:5], index=[2, 3, 4])
|
||||
result = a.combine_first(b)
|
||||
expected = pd.Series(data[:5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])],
|
||||
)
|
||||
def test_container_shift(self, data, frame, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/22386
|
||||
subset = data[:5]
|
||||
data = pd.Series(subset, name="A")
|
||||
expected = pd.Series(subset.take(indices, allow_fill=True), name="A")
|
||||
|
||||
if frame:
|
||||
result = data.to_frame(name="A").assign(B=1).shift(periods)
|
||||
expected = pd.concat(
|
||||
[expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1
|
||||
)
|
||||
compare = tm.assert_frame_equal
|
||||
else:
|
||||
result = data.shift(periods)
|
||||
compare = tm.assert_series_equal
|
||||
|
||||
compare(result, expected)
|
||||
|
||||
def test_shift_0_periods(self, data):
|
||||
# GH#33856 shifting with periods=0 should return a copy, not same obj
|
||||
result = data.shift(0)
|
||||
assert data[0] != data[1] # otherwise below is invalid
|
||||
data[0] = data[1]
|
||||
assert result[0] != result[1] # i.e. not the same object/view
|
||||
|
||||
@pytest.mark.parametrize("periods", [1, -2])
|
||||
def test_diff(self, data, periods):
|
||||
data = data[:5]
|
||||
if is_bool_dtype(data.dtype):
|
||||
op = operator.xor
|
||||
else:
|
||||
op = operator.sub
|
||||
try:
|
||||
# does this array implement ops?
|
||||
op(data, data)
|
||||
except Exception:
|
||||
pytest.skip(f"{type(data)} does not support diff")
|
||||
s = pd.Series(data)
|
||||
result = s.diff(periods)
|
||||
expected = pd.Series(op(data, data.shift(periods)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": data, "B": [1.0] * 5})
|
||||
result = df.diff(periods)
|
||||
if periods == 1:
|
||||
b = [np.nan, 0, 0, 0, 0]
|
||||
else:
|
||||
b = [0, 0, 0, np.nan, np.nan]
|
||||
expected = pd.DataFrame({"A": expected, "B": b})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, indices",
|
||||
[[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]],
|
||||
)
|
||||
def test_shift_non_empty_array(self, data, periods, indices):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
subset = data[:2]
|
||||
result = subset.shift(periods)
|
||||
expected = subset.take(indices, allow_fill=True)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4])
|
||||
def test_shift_empty_array(self, data, periods):
|
||||
# https://github.com/pandas-dev/pandas/issues/23911
|
||||
empty = data[:0]
|
||||
result = empty.shift(periods)
|
||||
expected = empty
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_shift_zero_copies(self, data):
|
||||
# GH#31502
|
||||
result = data.shift(0)
|
||||
assert result is not data
|
||||
|
||||
result = data[:0].shift(2)
|
||||
assert result is not data
|
||||
|
||||
def test_shift_fill_value(self, data):
|
||||
arr = data[:4]
|
||||
fill_value = data[0]
|
||||
result = arr.shift(1, fill_value=fill_value)
|
||||
expected = data.take([0, 0, 1, 2])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr.shift(-2, fill_value=fill_value)
|
||||
expected = data.take([2, 3, 0, 0])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_not_hashable(self, data):
|
||||
# We are in general mutable, so not hashable
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
hash(data)
|
||||
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
# https://github.com/pandas-dev/pandas/issues/23066
|
||||
data = pd.Series(data)
|
||||
if as_frame:
|
||||
data = data.to_frame()
|
||||
a = pd.util.hash_pandas_object(data)
|
||||
b = pd.util.hash_pandas_object(data)
|
||||
tm.assert_equal(a, b)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
if data_for_sorting.dtype._is_boolean:
|
||||
return self._test_searchsorted_bool_dtypes(data_for_sorting, as_series)
|
||||
|
||||
b, c, a = data_for_sorting
|
||||
arr = data_for_sorting.take([2, 0, 1]) # to get [a, b, c]
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
assert arr.searchsorted(c) == 2
|
||||
assert arr.searchsorted(c, side="right") == 3
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 2]))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 2, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series):
|
||||
# We call this from test_searchsorted in cases where we have a
|
||||
# boolean-like dtype. The non-bool test assumes we have more than 2
|
||||
# unique values.
|
||||
dtype = data_for_sorting.dtype
|
||||
data_for_sorting = pd.array([True, False], dtype=dtype)
|
||||
b, a = data_for_sorting
|
||||
arr = type(data_for_sorting)._from_sequence([a, b])
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
assert arr.searchsorted(a) == 0
|
||||
assert arr.searchsorted(a, side="right") == 1
|
||||
|
||||
assert arr.searchsorted(b) == 1
|
||||
assert arr.searchsorted(b, side="right") == 2
|
||||
|
||||
result = arr.searchsorted(arr.take([0, 1]))
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# sorter
|
||||
sorter = np.array([1, 0])
|
||||
assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
|
||||
|
||||
def test_where_series(self, data, na_value, as_frame):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
orig = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
ser = orig.copy()
|
||||
cond = np.array([True, True, False, False])
|
||||
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name="a")
|
||||
cond = cond.reshape(-1, 1)
|
||||
|
||||
result = ser.where(cond)
|
||||
expected = pd.Series(
|
||||
cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype)
|
||||
)
|
||||
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
ser.mask(~cond, inplace=True)
|
||||
tm.assert_equal(ser, expected)
|
||||
|
||||
# array other
|
||||
ser = orig.copy()
|
||||
if as_frame:
|
||||
ser = ser.to_frame(name="a")
|
||||
cond = np.array([True, False, True, True])
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
if as_frame:
|
||||
other = pd.DataFrame({"a": other})
|
||||
cond = pd.DataFrame({"a": cond})
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
|
||||
if as_frame:
|
||||
expected = expected.to_frame(name="a")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
ser.mask(~cond, other, inplace=True)
|
||||
tm.assert_equal(ser, expected)
|
||||
|
||||
@pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]])
|
||||
def test_repeat(self, data, repeats, as_series, use_numpy):
|
||||
arr = type(data)._from_sequence(data[:3], dtype=data.dtype)
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats)
|
||||
|
||||
repeats = [repeats] * 3 if isinstance(repeats, int) else repeats
|
||||
expected = [x for x, n in zip(arr, repeats) for _ in range(n)]
|
||||
expected = type(data)._from_sequence(expected, dtype=data.dtype)
|
||||
if as_series:
|
||||
expected = pd.Series(expected, index=arr.index.repeat(repeats))
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"repeats, kwargs, error, msg",
|
||||
[
|
||||
(2, {"axis": 1}, ValueError, "axis"),
|
||||
(-1, {}, ValueError, "negative"),
|
||||
([1, 2], {}, ValueError, "shape"),
|
||||
(2, {"foo": "bar"}, TypeError, "'foo'"),
|
||||
],
|
||||
)
|
||||
def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy):
|
||||
with pytest.raises(error, match=msg):
|
||||
if use_numpy:
|
||||
np.repeat(data, repeats, **kwargs)
|
||||
else:
|
||||
data.repeat(repeats, **kwargs)
|
||||
|
||||
def test_delete(self, data):
|
||||
result = data.delete(0)
|
||||
expected = data[1:]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = data.delete([1, 3])
|
||||
expected = data._concat_same_type([data[[0]], data[[2]], data[4:]])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_insert(self, data):
|
||||
# insert at the beginning
|
||||
result = data[1:].insert(0, data[0])
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data[1:].insert(-len(data[1:]), data[0])
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
# insert at the middle
|
||||
result = data[:-1].insert(4, data[-1])
|
||||
|
||||
taker = np.arange(len(data))
|
||||
taker[5:] = taker[4:-1]
|
||||
taker[4] = len(data) - 1
|
||||
expected = data.take(taker)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_insert_invalid(self, data, invalid_scalar):
|
||||
item = invalid_scalar
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(0, item)
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(4, item)
|
||||
|
||||
with pytest.raises((TypeError, ValueError)):
|
||||
data.insert(len(data) - 1, item)
|
||||
|
||||
def test_insert_invalid_loc(self, data):
|
||||
ub = len(data)
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
data.insert(ub + 1, data[0])
|
||||
|
||||
with pytest.raises(IndexError):
|
||||
data.insert(-ub - 1, data[0])
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
# we expect TypeError here instead of IndexError to match np.insert
|
||||
data.insert(1.5, data[0])
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame])
|
||||
def test_equals(self, data, na_value, as_series, box):
|
||||
data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype)
|
||||
data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype)
|
||||
|
||||
data = tm.box_expected(data, box, transpose=False)
|
||||
data2 = tm.box_expected(data2, box, transpose=False)
|
||||
data_na = tm.box_expected(data_na, box, transpose=False)
|
||||
|
||||
# we are asserting with `is True/False` explicitly, to test that the
|
||||
# result is an actual Python bool, and not something "truthy"
|
||||
|
||||
assert data.equals(data) is True
|
||||
assert data.equals(data.copy()) is True
|
||||
|
||||
# unequal other data
|
||||
assert data.equals(data2) is False
|
||||
assert data.equals(data_na) is False
|
||||
|
||||
# different length
|
||||
assert data[:2].equals(data[:3]) is False
|
||||
|
||||
# empty are equal
|
||||
assert data[:0].equals(data[:0]) is True
|
||||
|
||||
# other types
|
||||
assert data.equals(None) is False
|
||||
assert data[[0]].equals(data[0]) is False
|
||||
|
||||
def test_equals_same_data_different_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/34660
|
||||
assert pd.Series(data).equals(pd.Series(data))
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseMissingTests:
|
||||
def test_isna(self, data_missing):
|
||||
expected = np.array([True, False])
|
||||
|
||||
result = pd.isna(data_missing)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(data_missing).isna()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 21189
|
||||
result = pd.Series(data_missing).drop([0, 1]).isna()
|
||||
expected = pd.Series([], dtype=bool)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("na_func", ["isna", "notna"])
|
||||
def test_isna_returns_copy(self, data_missing, na_func):
|
||||
result = pd.Series(data_missing)
|
||||
expected = result.copy()
|
||||
mask = getattr(result, na_func)()
|
||||
if isinstance(mask.dtype, pd.SparseDtype):
|
||||
# TODO: GH 57739
|
||||
mask = np.array(mask)
|
||||
mask.flags.writeable = True
|
||||
|
||||
mask[:] = True
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_dropna_series(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[[1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_frame(self, data_missing):
|
||||
df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object))
|
||||
|
||||
# defaults
|
||||
result = df.dropna()
|
||||
expected = df.iloc[[1]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.dropna(axis="columns")
|
||||
expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple
|
||||
df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
|
||||
result = df.dropna()
|
||||
expected = df.iloc[:0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_scalar(self, data_missing):
|
||||
valid = data_missing[1]
|
||||
result = data_missing.fillna(valid)
|
||||
expected = data_missing.fillna(valid)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
||||
)
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).ffill(limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"limit_area, input_ilocs, expected_ilocs",
|
||||
[
|
||||
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
||||
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
||||
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
||||
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
||||
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
||||
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
||||
],
|
||||
)
|
||||
def test_ffill_limit_area(
|
||||
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
):
|
||||
# GH#56616
|
||||
arr = data_missing.take(input_ilocs)
|
||||
result = pd.Series(arr).ffill(limit_area=limit_area)
|
||||
expected = pd.Series(data_missing.take(expected_ilocs))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
||||
)
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
arr = data_missing.take([1, 0, 0, 0, 1])
|
||||
result = pd.Series(arr).fillna(method="backfill", limit=2)
|
||||
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
data = data[~data.isna()]
|
||||
|
||||
valid = data[0]
|
||||
result = data.fillna(valid)
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data._pad_or_backfill(method="backfill")
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
result = ser.fillna(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
result = ser.fillna(ser)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
if fillna_method == "ffill":
|
||||
data_missing = data_missing[::-1]
|
||||
|
||||
result = getattr(pd.Series(data_missing), fillna_method)()
|
||||
expected = pd.Series(
|
||||
data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data_missing._from_sequence(
|
||||
[fill_value, fill_value], dtype=data_missing.dtype
|
||||
),
|
||||
"B": [1, 2],
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fillna_fill_other(self, data):
|
||||
result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
|
||||
|
||||
expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_use_inf_as_na_no_effect(self, data_missing):
|
||||
ser = pd.Series(data_missing)
|
||||
expected = ser.isna()
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
result = ser.isna()
|
||||
tm.assert_series_equal(result, expected)
|
289
lib/python3.11/site-packages/pandas/tests/extension/base/ops.py
Normal file
289
lib/python3.11/site-packages/pandas/tests/extension/base/ops.py
Normal file
@ -0,0 +1,289 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import final
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_string_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
|
||||
|
||||
class BaseOpsUtil:
|
||||
series_scalar_exc: type[Exception] | None = TypeError
|
||||
frame_scalar_exc: type[Exception] | None = TypeError
|
||||
series_array_exc: type[Exception] | None = TypeError
|
||||
divmod_exc: type[Exception] | None = TypeError
|
||||
|
||||
def _get_expected_exception(
|
||||
self, op_name: str, obj, other
|
||||
) -> type[Exception] | tuple[type[Exception], ...] | None:
|
||||
# Find the Exception, if any we expect to raise calling
|
||||
# obj.__op_name__(other)
|
||||
|
||||
# The self.obj_bar_exc pattern isn't great in part because it can depend
|
||||
# on op_name or dtypes, but we use it here for backward-compatibility.
|
||||
if op_name in ["__divmod__", "__rdivmod__"]:
|
||||
result = self.divmod_exc
|
||||
elif isinstance(obj, pd.Series) and isinstance(other, pd.Series):
|
||||
result = self.series_array_exc
|
||||
elif isinstance(obj, pd.Series):
|
||||
result = self.series_scalar_exc
|
||||
else:
|
||||
result = self.frame_scalar_exc
|
||||
|
||||
return result
|
||||
|
||||
def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
|
||||
# In _check_op we check that the result of a pointwise operation
|
||||
# (found via _combine) matches the result of the vectorized
|
||||
# operation obj.__op_name__(other).
|
||||
# In some cases pandas dtype inference on the scalar result may not
|
||||
# give a matching dtype even if both operations are behaving "correctly".
|
||||
# In these cases, do extra required casting here.
|
||||
return pointwise_result
|
||||
|
||||
def get_op_from_name(self, op_name: str):
|
||||
return tm.get_op_from_name(op_name)
|
||||
|
||||
# Subclasses are not expected to need to override check_opname, _check_op,
|
||||
# _check_divmod_op, or _combine.
|
||||
# Ideally any relevant overriding can be done in _cast_pointwise_result,
|
||||
# get_op_from_name, and the specification of `exc`. If you find a use
|
||||
# case that still requires overriding _check_op or _combine, please let
|
||||
# us know at github.com/pandas-dev/pandas/issues
|
||||
@final
|
||||
def check_opname(self, ser: pd.Series, op_name: str, other):
|
||||
exc = self._get_expected_exception(op_name, ser, other)
|
||||
op = self.get_op_from_name(op_name)
|
||||
|
||||
self._check_op(ser, op, other, op_name, exc)
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _combine(self, obj, other, op):
|
||||
if isinstance(obj, pd.DataFrame):
|
||||
if len(obj.columns) != 1:
|
||||
raise NotImplementedError
|
||||
expected = obj.iloc[:, 0].combine(other, op).to_frame()
|
||||
else:
|
||||
expected = obj.combine(other, op)
|
||||
return expected
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _check_op(
|
||||
self, ser: pd.Series, op, other, op_name: str, exc=NotImplementedError
|
||||
):
|
||||
# Check that the Series/DataFrame arithmetic/comparison method matches
|
||||
# the pointwise result from _combine.
|
||||
|
||||
if exc is None:
|
||||
result = op(ser, other)
|
||||
expected = self._combine(ser, other, op)
|
||||
expected = self._cast_pointwise_result(op_name, ser, other, expected)
|
||||
assert isinstance(result, type(ser))
|
||||
tm.assert_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
op(ser, other)
|
||||
|
||||
# see comment on check_opname
|
||||
@final
|
||||
def _check_divmod_op(self, ser: pd.Series, op, other):
|
||||
# check that divmod behavior matches behavior of floordiv+mod
|
||||
if op is divmod:
|
||||
exc = self._get_expected_exception("__divmod__", ser, other)
|
||||
else:
|
||||
exc = self._get_expected_exception("__rdivmod__", ser, other)
|
||||
if exc is None:
|
||||
result_div, result_mod = op(ser, other)
|
||||
if op is divmod:
|
||||
expected_div, expected_mod = ser // other, ser % other
|
||||
else:
|
||||
expected_div, expected_mod = other // ser, other % ser
|
||||
tm.assert_series_equal(result_div, expected_div)
|
||||
tm.assert_series_equal(result_mod, expected_mod)
|
||||
else:
|
||||
with pytest.raises(exc):
|
||||
divmod(ser, other)
|
||||
|
||||
|
||||
class BaseArithmeticOpsTests(BaseOpsUtil):
|
||||
"""
|
||||
Various Series and DataFrame arithmetic ops methods.
|
||||
|
||||
Subclasses supporting various ops should set the class variables
|
||||
to indicate that they support ops of that kind
|
||||
|
||||
* series_scalar_exc = TypeError
|
||||
* frame_scalar_exc = TypeError
|
||||
* series_array_exc = TypeError
|
||||
* divmod_exc = TypeError
|
||||
"""
|
||||
|
||||
series_scalar_exc: type[Exception] | None = TypeError
|
||||
frame_scalar_exc: type[Exception] | None = TypeError
|
||||
series_array_exc: type[Exception] | None = TypeError
|
||||
divmod_exc: type[Exception] | None = TypeError
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
# series & scalar
|
||||
if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
|
||||
pytest.skip("Skip testing Python string formatting")
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
ser = pd.Series(data)
|
||||
self.check_opname(ser, op_name, ser.iloc[0])
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
|
||||
# frame & scalar
|
||||
if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
|
||||
pytest.skip("Skip testing Python string formatting")
|
||||
|
||||
op_name = all_arithmetic_operators
|
||||
df = pd.DataFrame({"A": data})
|
||||
self.check_opname(df, op_name, data[0])
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
# ndarray & other series
|
||||
op_name = all_arithmetic_operators
|
||||
ser = pd.Series(data)
|
||||
self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
|
||||
|
||||
def test_divmod(self, data):
|
||||
ser = pd.Series(data)
|
||||
self._check_divmod_op(ser, divmod, 1)
|
||||
self._check_divmod_op(1, ops.rdivmod, ser)
|
||||
|
||||
def test_divmod_series_array(self, data, data_for_twos):
|
||||
ser = pd.Series(data)
|
||||
self._check_divmod_op(ser, divmod, data)
|
||||
|
||||
other = data_for_twos
|
||||
self._check_divmod_op(other, ops.rdivmod, ser)
|
||||
|
||||
other = pd.Series(other)
|
||||
self._check_divmod_op(other, ops.rdivmod, ser)
|
||||
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
# Check adding an ExtensionArray to a Series of the same dtype matches
|
||||
# the behavior of adding the arrays directly and then wrapping in a
|
||||
# Series.
|
||||
|
||||
ser = pd.Series(data)
|
||||
|
||||
exc = self._get_expected_exception("__add__", ser, data)
|
||||
if exc is not None:
|
||||
with pytest.raises(exc):
|
||||
ser + data
|
||||
return
|
||||
|
||||
result = ser + data
|
||||
expected = pd.Series(data + data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [pd.Series, pd.DataFrame, pd.Index])
|
||||
@pytest.mark.parametrize(
|
||||
"op_name",
|
||||
[
|
||||
x
|
||||
for x in tm.arithmetic_dunder_methods + tm.comparison_dunder_methods
|
||||
if not x.startswith("__r")
|
||||
],
|
||||
)
|
||||
def test_direct_arith_with_ndframe_returns_not_implemented(
|
||||
self, data, box, op_name
|
||||
):
|
||||
# EAs should return NotImplemented for ops with Series/DataFrame/Index
|
||||
# Pandas takes care of unboxing the series and calling the EA's op.
|
||||
other = box(data)
|
||||
|
||||
if hasattr(data, op_name):
|
||||
result = getattr(data, op_name)(other)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
class BaseComparisonOpsTests(BaseOpsUtil):
|
||||
"""Various Series and DataFrame comparison ops methods."""
|
||||
|
||||
def _compare_other(self, ser: pd.Series, data, op, other):
|
||||
if op.__name__ in ["eq", "ne"]:
|
||||
# comparison should match point-wise comparisons
|
||||
result = op(ser, other)
|
||||
expected = ser.combine(other, op)
|
||||
expected = self._cast_pointwise_result(op.__name__, ser, other, expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
else:
|
||||
exc = None
|
||||
try:
|
||||
result = op(ser, other)
|
||||
except Exception as err:
|
||||
exc = err
|
||||
|
||||
if exc is None:
|
||||
# Didn't error, then should match pointwise behavior
|
||||
expected = ser.combine(other, op)
|
||||
expected = self._cast_pointwise_result(
|
||||
op.__name__, ser, other, expected
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(type(exc)):
|
||||
ser.combine(other, op)
|
||||
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
self._compare_other(ser, data, comparison_op, 0)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data), dtype=data.dtype)
|
||||
self._compare_other(ser, data, comparison_op, other)
|
||||
|
||||
|
||||
class BaseUnaryOpsTests(BaseOpsUtil):
|
||||
def test_invert(self, data):
|
||||
ser = pd.Series(data, name="name")
|
||||
try:
|
||||
# 10 is an arbitrary choice here, just avoid iterating over
|
||||
# the whole array to trim test runtime
|
||||
[~x for x in data[:10]]
|
||||
except TypeError:
|
||||
# scalars don't support invert -> we don't expect the vectorized
|
||||
# operation to succeed
|
||||
with pytest.raises(TypeError):
|
||||
~ser
|
||||
with pytest.raises(TypeError):
|
||||
~data
|
||||
else:
|
||||
# Note we do not reuse the pointwise result to construct expected
|
||||
# because python semantics for negating bools are weird see GH#54569
|
||||
result = ~ser
|
||||
expected = pd.Series(~data, name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
|
||||
def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
|
||||
# the dunder __pos__ works if and only if np.positive works,
|
||||
# same for __neg__/np.negative and __abs__/np.abs
|
||||
attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[
|
||||
ufunc
|
||||
]
|
||||
|
||||
exc = None
|
||||
try:
|
||||
result = getattr(data, attr)()
|
||||
except Exception as err:
|
||||
exc = err
|
||||
|
||||
# if __pos__ raised, then so should the ufunc
|
||||
with pytest.raises((type(exc), TypeError)):
|
||||
ufunc(data)
|
||||
else:
|
||||
alt = ufunc(data)
|
||||
tm.assert_extension_array_equal(result, alt)
|
@ -0,0 +1,41 @@
|
||||
import io
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class BasePrintingTests:
|
||||
"""Tests checking the formatting of your EA when printed."""
|
||||
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
if size == "small":
|
||||
data = data[:5]
|
||||
else:
|
||||
data = type(data)._concat_same_type([data] * 5)
|
||||
|
||||
result = repr(data)
|
||||
assert type(data).__name__ in result
|
||||
assert f"Length: {len(data)}" in result
|
||||
assert str(data.dtype) in result
|
||||
if size == "big":
|
||||
assert "..." in result
|
||||
|
||||
def test_array_repr_unicode(self, data):
|
||||
result = str(data)
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
|
||||
def test_dataframe_repr(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
repr(df)
|
||||
|
||||
def test_dtype_name_in_info(self, data):
|
||||
buf = io.StringIO()
|
||||
pd.DataFrame({"A": data}).info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
assert data.dtype.name in result
|
@ -0,0 +1,153 @@
|
||||
from typing import final
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_numeric_dtype
|
||||
|
||||
|
||||
class BaseReduceTests:
|
||||
"""
|
||||
Reduction specific tests. Generally these only
|
||||
make sense for numeric/boolean operations.
|
||||
"""
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
# Specify if we expect this reduction to succeed.
|
||||
return False
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# We perform the same operation on the np.float64 data and check
|
||||
# that the results match. Override if you need to cast to something
|
||||
# other than float64.
|
||||
res_op = getattr(ser, op_name)
|
||||
|
||||
try:
|
||||
alt = ser.astype("float64")
|
||||
except (TypeError, ValueError):
|
||||
# e.g. Interval can't cast (TypeError), StringArray can't cast
|
||||
# (ValueError), so let's cast to object and do
|
||||
# the reduction pointwise
|
||||
alt = ser.astype(object)
|
||||
|
||||
exp_op = getattr(alt, op_name)
|
||||
if op_name == "count":
|
||||
result = res_op()
|
||||
expected = exp_op()
|
||||
else:
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
|
||||
# Find the expected dtype when the given reduction is done on a DataFrame
|
||||
# column with this array. The default assumes float64-like behavior,
|
||||
# i.e. retains the dtype.
|
||||
return arr.dtype
|
||||
|
||||
# We anticipate that authors should not need to override check_reduce_frame,
|
||||
# but should be able to do any necessary overriding in
|
||||
# _get_expected_reduction_dtype. If you have a use case where this
|
||||
# does not hold, please let us know at github.com/pandas-dev/pandas/issues.
|
||||
@final
|
||||
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# Check that the 2D reduction done in a DataFrame reduction "looks like"
|
||||
# a wrapped version of the 1D reduction done by Series.
|
||||
arr = ser.array
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
|
||||
|
||||
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
|
||||
|
||||
# The DataFrame method just calls arr._reduce with keepdims=True,
|
||||
# so this first check is perfunctory.
|
||||
result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
|
||||
result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
|
||||
tm.assert_extension_array_equal(result1, result2)
|
||||
|
||||
# Check that the 2D reduction looks like a wrapped version of the
|
||||
# 1D reduction
|
||||
if not skipna and ser.isna().any():
|
||||
expected = pd.array([pd.NA], dtype=cmp_dtype)
|
||||
else:
|
||||
exp_value = getattr(ser.dropna(), op_name)()
|
||||
expected = pd.array([exp_value], dtype=cmp_dtype)
|
||||
|
||||
tm.assert_extension_array_equal(result1, expected)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
op_name = all_boolean_reductions
|
||||
ser = pd.Series(data)
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
# TODO: the message being checked here isn't actually checking anything
|
||||
msg = (
|
||||
"[Cc]annot perform|Categorical is not ordered for operation|"
|
||||
"does not support reduction|"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(ser, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
self.check_reduce(ser, op_name, skipna)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
ser = pd.Series(data)
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
# TODO: the message being checked here isn't actually checking anything
|
||||
msg = (
|
||||
"[Cc]annot perform|Categorical is not ordered for operation|"
|
||||
"does not support reduction|"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(ser, op_name)(skipna=skipna)
|
||||
|
||||
else:
|
||||
# min/max with empty produce numpy warnings
|
||||
self.check_reduce(ser, op_name, skipna)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
|
||||
op_name = all_numeric_reductions
|
||||
ser = pd.Series(data)
|
||||
if not is_numeric_dtype(ser.dtype):
|
||||
pytest.skip(f"{ser.dtype} is not numeric dtype")
|
||||
|
||||
if op_name in ["count", "kurt", "sem"]:
|
||||
pytest.skip(f"{op_name} not an array method")
|
||||
|
||||
if not self._supports_reduction(ser, op_name):
|
||||
pytest.skip(f"Reduction {op_name} not supported for this dtype")
|
||||
|
||||
self.check_reduce_frame(ser, op_name, skipna)
|
||||
|
||||
|
||||
# TODO(3.0): remove BaseNoReduceTests, BaseNumericReduceTests,
|
||||
# BaseBooleanReduceTests
|
||||
class BaseNoReduceTests(BaseReduceTests):
|
||||
"""we don't define any reductions"""
|
||||
|
||||
|
||||
class BaseNumericReduceTests(BaseReduceTests):
|
||||
# For backward compatibility only, this only runs the numeric reductions
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if op_name in ["any", "all"]:
|
||||
pytest.skip("These are tested in BaseBooleanReduceTests")
|
||||
return True
|
||||
|
||||
|
||||
class BaseBooleanReduceTests(BaseReduceTests):
|
||||
# For backward compatibility only, this only runs the numeric reductions
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if op_name not in ["any", "all"]:
|
||||
pytest.skip("These are tested in BaseNumericReduceTests")
|
||||
return True
|
@ -0,0 +1,379 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.extensions import ExtensionArray
|
||||
from pandas.core.internals.blocks import EABackedBlock
|
||||
|
||||
|
||||
class BaseReshapingTests:
|
||||
"""Tests for reshaping and concatenation."""
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat(self, data, in_frame):
|
||||
wrapped = pd.Series(data)
|
||||
if in_frame:
|
||||
wrapped = pd.DataFrame(wrapped)
|
||||
result = pd.concat([wrapped, wrapped], ignore_index=True)
|
||||
|
||||
assert len(result) == len(data) * 2
|
||||
|
||||
if in_frame:
|
||||
dtype = result.dtypes[0]
|
||||
else:
|
||||
dtype = result.dtype
|
||||
|
||||
assert dtype == data.dtype
|
||||
if hasattr(result._mgr, "blocks"):
|
||||
assert isinstance(result._mgr.blocks[0], EABackedBlock)
|
||||
assert isinstance(result._mgr.arrays[0], ExtensionArray)
|
||||
|
||||
@pytest.mark.parametrize("in_frame", [True, False])
|
||||
def test_concat_all_na_block(self, data_missing, in_frame):
|
||||
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
|
||||
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
|
||||
if in_frame:
|
||||
valid_block = pd.DataFrame({"a": valid_block})
|
||||
na_block = pd.DataFrame({"a": na_block})
|
||||
result = pd.concat([valid_block, na_block])
|
||||
if in_frame:
|
||||
expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
expected = pd.Series(data_missing.take([1, 1, 0, 0]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category")
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat([x.astype(object) for x in dfs])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = pd.concat([x["A"] for x in dfs])
|
||||
expected = pd.concat([x["A"].astype(object) for x in dfs])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# simple test for just EA and one other
|
||||
result = pd.concat([df1, df2.astype(object)])
|
||||
expected = pd.concat([df1.astype("object"), df2.astype("object")])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = pd.concat([df1["A"], df2["A"].astype(object)])
|
||||
expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]})
|
||||
|
||||
expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]})
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-aligned
|
||||
df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3])
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype),
|
||||
"B": [np.nan, 1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = pd.concat([df1, df2], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = pd.concat([df1["A"], df2["B"]], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_extension_arrays_copy_false(self, data, na_value):
|
||||
# GH 20756
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"B": data[3:7]})
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype),
|
||||
"B": data[3:7],
|
||||
}
|
||||
)
|
||||
result = pd.concat([df1, df2], axis=1, copy=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_concat_with_reindex(self, data):
|
||||
# GH-33027
|
||||
a = pd.DataFrame({"a": data[:5]})
|
||||
b = pd.DataFrame({"b": data[:5]})
|
||||
result = pd.concat([a, b], ignore_index=True)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": data.take(list(range(5)) + ([-1] * 5), allow_fill=True),
|
||||
"b": data.take(([-1] * 5) + list(range(5)), allow_fill=True),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype))
|
||||
e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype))
|
||||
tm.assert_series_equal(r1, e1)
|
||||
tm.assert_series_equal(r2, e2)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
a = data[:3]
|
||||
b = data[2:5]
|
||||
r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3]))
|
||||
|
||||
# Assumes that the ctor can take a list of scalars of the type
|
||||
e1 = pd.DataFrame(
|
||||
{"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)}
|
||||
)
|
||||
e2 = pd.DataFrame(
|
||||
{"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)}
|
||||
)
|
||||
tm.assert_frame_equal(r1, e1)
|
||||
tm.assert_frame_equal(r2, e2)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/20576
|
||||
ser = pd.Series(data, name="a")
|
||||
df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
|
||||
r1, r2 = ser.align(df)
|
||||
|
||||
e1 = pd.Series(
|
||||
data._from_sequence(list(data) + [na_value], dtype=data.dtype),
|
||||
name=ser.name,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(r1, e1)
|
||||
tm.assert_frame_equal(r2, df)
|
||||
|
||||
def test_set_frame_expand_regular_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
df["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_expand_extension_with_regular(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
df["B"] = [1] * len(data)
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_set_frame_overwrite_object(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20555
|
||||
df = pd.DataFrame({"A": [1] * len(data)}, dtype=object)
|
||||
df["A"] = data
|
||||
assert df.dtypes["A"] == data.dtype
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
# GH-20743
|
||||
df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]})
|
||||
df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]})
|
||||
|
||||
res = pd.merge(df1, df2)
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2],
|
||||
"int2": [1, 2, 3],
|
||||
"key": [0, 0, 1],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1]], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
res = pd.merge(df1, df2, how="outer")
|
||||
exp = pd.DataFrame(
|
||||
{
|
||||
"int1": [1, 1, 2, 3, np.nan],
|
||||
"int2": [1, 2, 3, np.nan, 4],
|
||||
"key": [0, 0, 1, 2, 3],
|
||||
"ext": data._from_sequence(
|
||||
[data[0], data[0], data[1], data[2], na_value], dtype=data.dtype
|
||||
),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]])
|
||||
|
||||
def test_merge_on_extension_array(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b], dtype=data.dtype)
|
||||
|
||||
df = pd.DataFrame({"key": key, "val": [1, 2]})
|
||||
result = pd.merge(df, df, on="key")
|
||||
expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# order
|
||||
result = pd.merge(df.iloc[[1, 0]], df, on="key")
|
||||
expected = expected.iloc[[1, 0]].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_merge_on_extension_array_duplicates(self, data):
|
||||
# GH 23020
|
||||
a, b = data[:2]
|
||||
key = type(data)._from_sequence([a, b, a], dtype=data.dtype)
|
||||
df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]})
|
||||
|
||||
result = pd.merge(df1, df2, on="key")
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"key": key.take([0, 0, 1, 2, 2]),
|
||||
"val_x": [1, 1, 2, 3, 3],
|
||||
"val_y": [1, 3, 2, 1, 3],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The previous implementation of stack is deprecated"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b")], names=["outer", "inner"]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("future_stack", [True, False])
|
||||
def test_stack(self, data, columns, future_stack):
|
||||
df = pd.DataFrame({"A": data[:5], "B": data[:5]})
|
||||
df.columns = columns
|
||||
result = df.stack(future_stack=future_stack)
|
||||
expected = df.astype(object).stack(future_stack=future_stack)
|
||||
# we need a second astype(object), in case the constructor inferred
|
||||
# object -> specialized, as is done for period.
|
||||
expected = expected.astype(object)
|
||||
|
||||
if isinstance(expected, pd.Series):
|
||||
assert result.dtype == df.iloc[:, 0].dtype
|
||||
else:
|
||||
assert all(result.dtypes == df.iloc[:, 0].dtype)
|
||||
|
||||
result = result.astype(object)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "a", 1),
|
||||
("A", "b", 0),
|
||||
("A", "a", 0),
|
||||
("B", "a", 0),
|
||||
("B", "c", 1),
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[: len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(
|
||||
isinstance(result[col].array, type(data)) for col in result.columns
|
||||
)
|
||||
|
||||
if obj == "series":
|
||||
# We should get the same result with to_frame+unstack+droplevel
|
||||
df = ser.to_frame()
|
||||
|
||||
alt = df.unstack(level=level).droplevel(0, axis=1)
|
||||
tm.assert_frame_equal(result, alt)
|
||||
|
||||
obj_ser = ser.astype(object)
|
||||
|
||||
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
|
||||
if obj == "series":
|
||||
assert (expected.dtypes == object).all()
|
||||
|
||||
result = result.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ravel(self, data):
|
||||
# as long as EA is 1D-only, ravel is a no-op
|
||||
result = data.ravel()
|
||||
assert type(result) == type(data)
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(f"test_ravel assumes mutability and {data.dtype} is immutable")
|
||||
|
||||
# Check that we have a view, not a copy
|
||||
result[0] = result[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_transpose(self, data):
|
||||
result = data.transpose()
|
||||
assert type(result) == type(data)
|
||||
|
||||
# check we get a new object
|
||||
assert result is not data
|
||||
|
||||
# If we ever _did_ support 2D, shape should be reversed
|
||||
assert result.shape == data.shape[::-1]
|
||||
|
||||
if data.dtype._is_immutable:
|
||||
pytest.skip(
|
||||
f"test_transpose assumes mutability and {data.dtype} is immutable"
|
||||
)
|
||||
|
||||
# Check that we have a view, not a copy
|
||||
result[0] = result[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_transpose_frame(self, data):
|
||||
df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"])
|
||||
result = df.T
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype),
|
||||
"b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype),
|
||||
"c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype),
|
||||
"d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype),
|
||||
},
|
||||
index=["A", "B"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(np.transpose(np.transpose(df)), df)
|
||||
tm.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]])
|
@ -0,0 +1,451 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class BaseSetitemTests:
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: x.index,
|
||||
lambda x: list(x.index),
|
||||
lambda x: slice(None),
|
||||
lambda x: slice(0, len(x)),
|
||||
lambda x: range(len(x)),
|
||||
lambda x: list(range(len(x))),
|
||||
lambda x: np.ones(len(x), dtype=bool),
|
||||
],
|
||||
ids=[
|
||||
"index",
|
||||
"list[index]",
|
||||
"null_slice",
|
||||
"full_slice",
|
||||
"range",
|
||||
"list(range)",
|
||||
"mask",
|
||||
],
|
||||
)
|
||||
def full_indexer(self, request):
|
||||
"""
|
||||
Fixture for an indexer to pass to obj.loc to get/set the full length of the
|
||||
object.
|
||||
|
||||
In some cases, assumes that obj.index is the default RangeIndex.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_if_immutable(self, dtype, request):
|
||||
if dtype._is_immutable:
|
||||
node = request.node
|
||||
if node.name.split("[")[0] == "test_is_immutable":
|
||||
# This fixture is auto-used, but we want to not-skip
|
||||
# test_is_immutable.
|
||||
return
|
||||
|
||||
# When BaseSetitemTests is mixed into ExtensionTests, we only
|
||||
# want this fixture to operate on the tests defined in this
|
||||
# class/file.
|
||||
defined_in = node.function.__qualname__.split(".")[0]
|
||||
if defined_in == "BaseSetitemTests":
|
||||
pytest.skip("__setitem__ test not applicable with immutable dtype")
|
||||
|
||||
def test_is_immutable(self, data):
|
||||
if data.dtype._is_immutable:
|
||||
with pytest.raises(TypeError):
|
||||
data[0] = data[0]
|
||||
else:
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_scalar_series(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[0] = data[1]
|
||||
assert data[0] == data[1]
|
||||
|
||||
def test_setitem_sequence(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
|
||||
data[[0, 1]] = [data[1], data[0]]
|
||||
assert data[0] == original[1]
|
||||
assert data[1] == original[0]
|
||||
|
||||
def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
|
||||
ser = pd.Series(data)
|
||||
original = ser.copy()
|
||||
value = [data[0]]
|
||||
if as_array:
|
||||
value = data._from_sequence(value, dtype=data.dtype)
|
||||
|
||||
xpr = "cannot set using a {} indexer with a different length"
|
||||
with pytest.raises(ValueError, match=xpr.format("list-like")):
|
||||
ser[[0, 1]] = value
|
||||
# Ensure no modifications made before the exception
|
||||
tm.assert_series_equal(ser, original)
|
||||
|
||||
with pytest.raises(ValueError, match=xpr.format("slice")):
|
||||
ser[slice(3)] = value
|
||||
tm.assert_series_equal(ser, original)
|
||||
|
||||
def test_setitem_empty_indexer(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
original = data.copy()
|
||||
data[np.array([], dtype=int)] = []
|
||||
tm.assert_equal(data, original)
|
||||
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
data[[0, 1]] = data[2]
|
||||
assert data[0] == data[2]
|
||||
assert data[1] == data[2]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", "iloc"])
|
||||
def test_setitem_scalar(self, data, setter):
|
||||
arr = pd.Series(data)
|
||||
setter = getattr(arr, setter)
|
||||
setter[0] = data[1]
|
||||
assert arr[0] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.loc[0, "B"] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.loc[10, "B"] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
|
||||
df.iloc[0, 1] = data[1]
|
||||
assert df.loc[0, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_single(self, data):
|
||||
df = pd.DataFrame({"B": data})
|
||||
df.iloc[10, 0] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
df = pd.DataFrame({"A": data, "B": data})
|
||||
df.iloc[10, 1] = data[1]
|
||||
assert df.loc[10, "B"] == data[1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
np.array([True, True, True, False, False]),
|
||||
pd.array([True, True, True, False, False], dtype="boolean"),
|
||||
pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
|
||||
],
|
||||
ids=["numpy-array", "boolean-array", "boolean-array-na"],
|
||||
)
|
||||
def test_setitem_mask(self, data, mask, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = arr.take([0, 0, 0, 3, 4])
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
arr[mask] = data[0]
|
||||
tm.assert_equal(expected, arr)
|
||||
|
||||
def test_setitem_mask_raises(self, data, box_in_series):
|
||||
# wrong length
|
||||
mask = np.array([True, False])
|
||||
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
|
||||
with pytest.raises(IndexError, match="wrong length"):
|
||||
data[mask] = data[0]
|
||||
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
with pytest.raises(IndexError, match="wrong length"):
|
||||
data[mask] = data[0]
|
||||
|
||||
def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
|
||||
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
|
||||
mask[:3] = True
|
||||
mask[3:5] = pd.NA
|
||||
|
||||
if box_in_series:
|
||||
data = pd.Series(data)
|
||||
|
||||
data[mask] = data[0]
|
||||
|
||||
assert (data[:3] == data[0]).all()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_setitem_integer_array(self, data, idx, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = data.take([0, 0, 0, 3, 4])
|
||||
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
|
||||
arr[idx] = arr[0]
|
||||
tm.assert_equal(arr, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, box_in_series",
|
||||
[
|
||||
([0, 1, 2, pd.NA], False),
|
||||
pytest.param(
|
||||
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
|
||||
),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
],
|
||||
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||||
)
|
||||
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||||
arr = data.copy()
|
||||
|
||||
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
|
||||
# for list of labels with Series
|
||||
if box_in_series:
|
||||
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
|
||||
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[idx] = arr[0]
|
||||
|
||||
@pytest.mark.parametrize("as_callable", [True, False])
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_aligned(self, data, as_callable, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if as_callable:
|
||||
mask2 = lambda x: mask
|
||||
else:
|
||||
mask2 = mask
|
||||
|
||||
if setter:
|
||||
# loc
|
||||
target = getattr(ser, setter)
|
||||
else:
|
||||
# Series.__setitem__
|
||||
target = ser
|
||||
|
||||
target[mask2] = data[5:7]
|
||||
|
||||
ser[mask2] = data[5:7]
|
||||
assert ser[0] == data[5]
|
||||
assert ser[1] == data[6]
|
||||
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
ser = pd.Series(data)
|
||||
mask = np.zeros(len(data), dtype=bool)
|
||||
mask[:2] = True
|
||||
|
||||
if setter: # loc
|
||||
target = getattr(ser, setter)
|
||||
else: # __setitem__
|
||||
target = ser
|
||||
|
||||
target[mask] = data[10]
|
||||
assert ser[0] == data[10]
|
||||
assert ser[1] == data[10]
|
||||
|
||||
def test_setitem_expand_columns(self, data):
|
||||
df = pd.DataFrame({"A": data})
|
||||
result = df.copy()
|
||||
result["B"] = 1
|
||||
expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = 1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite with new type
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": data, "B": data})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_expand_with_extension(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
result = df.copy()
|
||||
result["B"] = data
|
||||
expected = pd.DataFrame({"A": [1] * len(data), "B": data})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[:, "B"] = data
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_invalid_length(self, data):
|
||||
df = pd.DataFrame({"A": [1] * len(data)})
|
||||
xpr = (
|
||||
rf"Length of values \({len(data[:5])}\) "
|
||||
rf"does not match length of index \({len(df)}\)"
|
||||
)
|
||||
with pytest.raises(ValueError, match=xpr):
|
||||
df["B"] = data[:5]
|
||||
|
||||
def test_setitem_tuple_index(self, data):
|
||||
ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
|
||||
expected = pd.Series(data.take([1, 1]), index=ser.index)
|
||||
ser[(0, 0)] = data[1]
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_setitem_slice(self, data, box_in_series):
|
||||
arr = data[:5].copy()
|
||||
expected = data.take([0, 0, 0, 3, 4])
|
||||
if box_in_series:
|
||||
arr = pd.Series(arr)
|
||||
expected = pd.Series(expected)
|
||||
|
||||
arr[:3] = data[0]
|
||||
tm.assert_equal(arr, expected)
|
||||
|
||||
def test_setitem_loc_iloc_slice(self, data):
|
||||
arr = data[:5].copy()
|
||||
s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
|
||||
expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
|
||||
|
||||
result = s.copy()
|
||||
result.iloc[:3] = data[0]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = s.copy()
|
||||
result.loc[:"c"] = data[0]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
arr = data[:5]
|
||||
with pytest.raises(ValueError):
|
||||
arr[:1] = arr[:2]
|
||||
|
||||
def test_setitem_slice_array(self, data):
|
||||
arr = data[:5].copy()
|
||||
arr[:5] = data[-5:]
|
||||
tm.assert_extension_array_equal(arr, data[-5:])
|
||||
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
arr = data[:5].copy()
|
||||
with pytest.raises(ValueError):
|
||||
arr[0] = arr[[0, 1]]
|
||||
|
||||
def test_setitem_preserves_views(self, data):
|
||||
# GH#28150 setitem shouldn't swap the underlying data
|
||||
view1 = data.view()
|
||||
view2 = data[:]
|
||||
|
||||
data[0] = data[1]
|
||||
assert view1[0] == data[1]
|
||||
assert view2[0] == data[1]
|
||||
|
||||
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
df = expected = pd.DataFrame({0: pd.Series(data)})
|
||||
result = pd.DataFrame(index=df.index)
|
||||
|
||||
key = full_indexer(df)
|
||||
result.loc[key, 0] = df[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_with_expansion_row(self, data, na_value):
|
||||
df = pd.DataFrame({"data": data[:1]})
|
||||
|
||||
df.loc[1, "data"] = data[1]
|
||||
expected = pd.DataFrame({"data": data[:2]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/47284
|
||||
df.loc[2, "data"] = na_value
|
||||
expected = pd.DataFrame(
|
||||
{"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_setitem_series(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
ser = pd.Series(data, name="data")
|
||||
result = pd.Series(index=ser.index, dtype=object, name="data")
|
||||
|
||||
# because result has object dtype, the attempt to do setting inplace
|
||||
# is successful, and object dtype is retained
|
||||
key = full_indexer(ser)
|
||||
result.loc[key] = ser
|
||||
|
||||
expected = pd.Series(
|
||||
data.astype(object), index=ser.index, name="data", dtype=object
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_setitem_frame_2d_values(self, data):
|
||||
# GH#44514
|
||||
df = pd.DataFrame({"A": data})
|
||||
|
||||
# Avoiding using_array_manager fixture
|
||||
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
|
||||
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
|
||||
using_copy_on_write = pd.options.mode.copy_on_write
|
||||
|
||||
blk_data = df._mgr.arrays[0]
|
||||
|
||||
orig = df.copy()
|
||||
|
||||
df.iloc[:] = df.copy()
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
df.iloc[:-1] = df.iloc[:-1].copy()
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
df.iloc[:] = df.values
|
||||
tm.assert_frame_equal(df, orig)
|
||||
if not using_array_manager and not using_copy_on_write:
|
||||
# GH#33457 Check that this setting occurred in-place
|
||||
# FIXME(ArrayManager): this should work there too
|
||||
assert df._mgr.arrays[0] is blk_data
|
||||
|
||||
df.iloc[:-1] = df.values[:-1]
|
||||
tm.assert_frame_equal(df, orig)
|
||||
|
||||
def test_delitem_series(self, data):
|
||||
# GH#40763
|
||||
ser = pd.Series(data, name="data")
|
||||
|
||||
taker = np.arange(len(ser))
|
||||
taker = np.delete(taker, 1)
|
||||
|
||||
expected = ser[taker]
|
||||
del ser[1]
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_setitem_invalid(self, data, invalid_scalar):
|
||||
msg = "" # messages vary by subclass, so we do not test it
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
data[0] = invalid_scalar
|
||||
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
data[:] = invalid_scalar
|
||||
|
||||
def test_setitem_2d_values(self, data):
|
||||
# GH50085
|
||||
original = data.copy()
|
||||
df = pd.DataFrame({"a": data, "b": data})
|
||||
df.loc[[0, 1], :] = df.loc[[1, 0], :].values
|
||||
assert (df.loc[0, :] == original[1]).all()
|
||||
assert (df.loc[1, :] == original[0]).all()
|
230
lib/python3.11/site-packages/pandas/tests/extension/conftest.py
Normal file
230
lib/python3.11/site-packages/pandas/tests/extension/conftest.py
Normal file
@ -0,0 +1,230 @@
|
||||
import operator
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas._config.config import _get_option
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
options,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""A fixture providing the ExtensionDtype to validate."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""
|
||||
Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not be equal
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
"""
|
||||
Length-100 array in which all the elements are two.
|
||||
|
||||
Call pytest.skip in your fixture if the dtype does not support divmod.
|
||||
"""
|
||||
if not (dtype._is_numeric or dtype.kind == "m"):
|
||||
# Object-dtypes may want to allow this, but for the most part
|
||||
# only numeric and timedelta-like dtypes will need to implement this.
|
||||
pytest.skip(f"{dtype} is not a numeric dtype")
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length-2 array with [NA, Valid]"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_repeated(data):
|
||||
"""
|
||||
Generate many datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : fixture implementing `data`
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[int], Generator]:
|
||||
A callable that takes a `count` argument and
|
||||
returns a generator yielding `count` datasets.
|
||||
"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield data
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
"""
|
||||
Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
|
||||
For boolean dtypes (for which there are only 2 values available),
|
||||
set B=C=True
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
"""
|
||||
Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
"""
|
||||
Binary operator for comparing NA values.
|
||||
|
||||
Should return a function of two arguments that returns
|
||||
True if both arguments are (scalar) NA for your type.
|
||||
|
||||
By default, uses ``operator.is_``
|
||||
"""
|
||||
return operator.is_
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value(dtype):
|
||||
"""
|
||||
The scalar missing value for this type. Default dtype.na_value.
|
||||
|
||||
TODO: can be removed in 3.x (see https://github.com/pandas-dev/pandas/pull/54930)
|
||||
"""
|
||||
return dtype.na_value
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
"""
|
||||
Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing.
|
||||
|
||||
If a dtype has _is_boolean = True, i.e. only 2 unique non-NA entries,
|
||||
then set C=B.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def box_in_series(request):
|
||||
"""Whether to box the data in a Series"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def groupby_apply_op(request):
|
||||
"""
|
||||
Functions to test groupby.apply().
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_frame(request):
|
||||
"""
|
||||
Boolean fixture to support Series and Series.to_frame() comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_series(request):
|
||||
"""
|
||||
Boolean fixture to support arr and Series(arr) comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def use_numpy(request):
|
||||
"""
|
||||
Boolean fixture to support comparison testing of ExtensionDtype array
|
||||
and numpy array.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["ffill", "bfill"])
|
||||
def fillna_method(request):
|
||||
"""
|
||||
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
|
||||
Series.fillna(method=<method>) testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_array(request):
|
||||
"""
|
||||
Boolean fixture to support ExtensionDtype _from_sequence method testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def invalid_scalar(data):
|
||||
"""
|
||||
A scalar that *cannot* be held by this ExtensionArray.
|
||||
|
||||
The default should work for most subclasses, but is not guaranteed.
|
||||
|
||||
If the array can hold any item (i.e. object dtype), then use pytest.skip.
|
||||
"""
|
||||
return object.__new__(object)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def using_copy_on_write() -> bool:
|
||||
"""
|
||||
Fixture to check if Copy-on-Write is enabled.
|
||||
"""
|
||||
return (
|
||||
options.mode.copy_on_write is True
|
||||
and _get_option("mode.data_manager", silent=True) == "block"
|
||||
)
|
@ -0,0 +1,6 @@
|
||||
from pandas.tests.extension.date.array import (
|
||||
DateArray,
|
||||
DateDtype,
|
||||
)
|
||||
|
||||
__all__ = ["DateArray", "DateDtype"]
|
@ -0,0 +1,188 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.dtypes import register_extension_dtype
|
||||
|
||||
from pandas.api.extensions import (
|
||||
ExtensionArray,
|
||||
ExtensionDtype,
|
||||
)
|
||||
from pandas.api.types import pandas_dtype
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from pandas._typing import (
|
||||
Dtype,
|
||||
PositionalIndexer,
|
||||
)
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DateDtype(ExtensionDtype):
|
||||
@property
|
||||
def type(self):
|
||||
return dt.date
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "DateDtype"
|
||||
|
||||
@classmethod
|
||||
def construct_from_string(cls, string: str):
|
||||
if not isinstance(string, str):
|
||||
raise TypeError(
|
||||
f"'construct_from_string' expects a string, got {type(string)}"
|
||||
)
|
||||
|
||||
if string == cls.__name__:
|
||||
return cls()
|
||||
else:
|
||||
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return DateArray
|
||||
|
||||
@property
|
||||
def na_value(self):
|
||||
return dt.date.min
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.name
|
||||
|
||||
|
||||
class DateArray(ExtensionArray):
|
||||
def __init__(
|
||||
self,
|
||||
dates: (
|
||||
dt.date
|
||||
| Sequence[dt.date]
|
||||
| tuple[np.ndarray, np.ndarray, np.ndarray]
|
||||
| np.ndarray
|
||||
),
|
||||
) -> None:
|
||||
if isinstance(dates, dt.date):
|
||||
self._year = np.array([dates.year])
|
||||
self._month = np.array([dates.month])
|
||||
self._day = np.array([dates.year])
|
||||
return
|
||||
|
||||
ldates = len(dates)
|
||||
if isinstance(dates, list):
|
||||
# pre-allocate the arrays since we know the size before hand
|
||||
self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999)
|
||||
self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31)
|
||||
self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12)
|
||||
# populate them
|
||||
for i, (y, m, d) in enumerate(
|
||||
(date.year, date.month, date.day) for date in dates
|
||||
):
|
||||
self._year[i] = y
|
||||
self._month[i] = m
|
||||
self._day[i] = d
|
||||
|
||||
elif isinstance(dates, tuple):
|
||||
# only support triples
|
||||
if ldates != 3:
|
||||
raise ValueError("only triples are valid")
|
||||
# check if all elements have the same type
|
||||
if any(not isinstance(x, np.ndarray) for x in dates):
|
||||
raise TypeError("invalid type")
|
||||
ly, lm, ld = (len(cast(np.ndarray, d)) for d in dates)
|
||||
if not ly == lm == ld:
|
||||
raise ValueError(
|
||||
f"tuple members must have the same length: {(ly, lm, ld)}"
|
||||
)
|
||||
self._year = dates[0].astype(np.uint16)
|
||||
self._month = dates[1].astype(np.uint8)
|
||||
self._day = dates[2].astype(np.uint8)
|
||||
|
||||
elif isinstance(dates, np.ndarray) and dates.dtype == "U10":
|
||||
self._year = np.zeros(ldates, dtype=np.uint16) # 65535 (0, 9999)
|
||||
self._month = np.zeros(ldates, dtype=np.uint8) # 255 (1, 31)
|
||||
self._day = np.zeros(ldates, dtype=np.uint8) # 255 (1, 12)
|
||||
|
||||
# error: "object_" object is not iterable
|
||||
obj = np.char.split(dates, sep="-")
|
||||
for (i,), (y, m, d) in np.ndenumerate(obj): # type: ignore[misc]
|
||||
self._year[i] = int(y)
|
||||
self._month[i] = int(m)
|
||||
self._day[i] = int(d)
|
||||
|
||||
else:
|
||||
raise TypeError(f"{type(dates)} is not supported")
|
||||
|
||||
@property
|
||||
def dtype(self) -> ExtensionDtype:
|
||||
return DateDtype()
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
dtype = pandas_dtype(dtype)
|
||||
|
||||
if isinstance(dtype, DateDtype):
|
||||
data = self.copy() if copy else self
|
||||
else:
|
||||
data = self.to_numpy(dtype=dtype, copy=copy, na_value=dt.date.min)
|
||||
|
||||
return data
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
return self._year.nbytes + self._month.nbytes + self._day.nbytes
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._year) # all 3 arrays are enforced to have the same length
|
||||
|
||||
def __getitem__(self, item: PositionalIndexer):
|
||||
if isinstance(item, int):
|
||||
return dt.date(self._year[item], self._month[item], self._day[item])
|
||||
else:
|
||||
raise NotImplementedError("only ints are supported as indexes")
|
||||
|
||||
def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
|
||||
if not isinstance(key, int):
|
||||
raise NotImplementedError("only ints are supported as indexes")
|
||||
|
||||
if not isinstance(value, dt.date):
|
||||
raise TypeError("you can only set datetime.date types")
|
||||
|
||||
self._year[key] = value.year
|
||||
self._month[key] = value.month
|
||||
self._day[key] = value.day
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"DateArray{list(zip(self._year, self._month, self._day))}"
|
||||
|
||||
def copy(self) -> DateArray:
|
||||
return DateArray((self._year.copy(), self._month.copy(), self._day.copy()))
|
||||
|
||||
def isna(self) -> np.ndarray:
|
||||
return np.logical_and(
|
||||
np.logical_and(
|
||||
self._year == dt.date.min.year, self._month == dt.date.min.month
|
||||
),
|
||||
self._day == dt.date.min.day,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
|
||||
if isinstance(scalars, dt.date):
|
||||
raise TypeError
|
||||
elif isinstance(scalars, DateArray):
|
||||
if dtype is not None:
|
||||
return scalars.astype(dtype, copy=copy)
|
||||
if copy:
|
||||
return scalars.copy()
|
||||
return scalars[:]
|
||||
elif isinstance(scalars, np.ndarray):
|
||||
scalars = scalars.astype("U10") # 10 chars for yyyy-mm-dd
|
||||
return DateArray(scalars)
|
@ -0,0 +1,8 @@
|
||||
from pandas.tests.extension.decimal.array import (
|
||||
DecimalArray,
|
||||
DecimalDtype,
|
||||
make_data,
|
||||
to_decimal,
|
||||
)
|
||||
|
||||
__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"]
|
@ -0,0 +1,311 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import decimal
|
||||
import numbers
|
||||
import sys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
from pandas.core.dtypes.common import (
|
||||
is_dtype_equal,
|
||||
is_float,
|
||||
is_integer,
|
||||
pandas_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import (
|
||||
no_default,
|
||||
register_extension_dtype,
|
||||
)
|
||||
from pandas.api.types import (
|
||||
is_list_like,
|
||||
is_scalar,
|
||||
)
|
||||
from pandas.core import arraylike
|
||||
from pandas.core.algorithms import value_counts_internal as value_counts
|
||||
from pandas.core.arraylike import OpsMixin
|
||||
from pandas.core.arrays import (
|
||||
ExtensionArray,
|
||||
ExtensionScalarOpsMixin,
|
||||
)
|
||||
from pandas.core.indexers import check_array_indexer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import type_t
|
||||
|
||||
|
||||
@register_extension_dtype
|
||||
class DecimalDtype(ExtensionDtype):
|
||||
type = decimal.Decimal
|
||||
name = "decimal"
|
||||
na_value = decimal.Decimal("NaN")
|
||||
_metadata = ("context",)
|
||||
|
||||
def __init__(self, context=None) -> None:
|
||||
self.context = context or decimal.getcontext()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"DecimalDtype(context={self.context})"
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls) -> type_t[DecimalArray]:
|
||||
"""
|
||||
Return the array type associated with this dtype.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return DecimalArray
|
||||
|
||||
@property
|
||||
def _is_numeric(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False, context=None) -> None:
|
||||
for i, val in enumerate(values):
|
||||
if is_float(val) or is_integer(val):
|
||||
if np.isnan(val):
|
||||
values[i] = DecimalDtype.na_value
|
||||
else:
|
||||
# error: Argument 1 has incompatible type "float | int |
|
||||
# integer[Any]"; expected "Decimal | float | str | tuple[int,
|
||||
# Sequence[int], int]"
|
||||
values[i] = DecimalDtype.type(val) # type: ignore[arg-type]
|
||||
elif not isinstance(val, decimal.Decimal):
|
||||
raise TypeError("All values must be of type " + str(decimal.Decimal))
|
||||
values = np.asarray(values, dtype=object)
|
||||
|
||||
self._data = values
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self.data = self._data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
self._dtype = DecimalDtype(context)
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return self._dtype
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
|
||||
return cls._from_sequence(
|
||||
[decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls(values)
|
||||
|
||||
_HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
|
||||
|
||||
def to_numpy(
|
||||
self,
|
||||
dtype=None,
|
||||
copy: bool = False,
|
||||
na_value: object = no_default,
|
||||
decimals=None,
|
||||
) -> np.ndarray:
|
||||
result = np.asarray(self, dtype=dtype)
|
||||
if decimals is not None:
|
||||
result = np.asarray([round(x, decimals) for x in result])
|
||||
return result
|
||||
|
||||
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
|
||||
#
|
||||
if not all(
|
||||
isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs
|
||||
):
|
||||
return NotImplemented
|
||||
|
||||
result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
|
||||
self, ufunc, method, *inputs, **kwargs
|
||||
)
|
||||
if result is not NotImplemented:
|
||||
# e.g. test_array_ufunc_series_scalar_other
|
||||
return result
|
||||
|
||||
if "out" in kwargs:
|
||||
return arraylike.dispatch_ufunc_with_out(
|
||||
self, ufunc, method, *inputs, **kwargs
|
||||
)
|
||||
|
||||
inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs)
|
||||
result = getattr(ufunc, method)(*inputs, **kwargs)
|
||||
|
||||
if method == "reduce":
|
||||
result = arraylike.dispatch_reduction_ufunc(
|
||||
self, ufunc, method, *inputs, **kwargs
|
||||
)
|
||||
if result is not NotImplemented:
|
||||
return result
|
||||
|
||||
def reconstruct(x):
|
||||
if isinstance(x, (decimal.Decimal, numbers.Number)):
|
||||
return x
|
||||
else:
|
||||
return type(self)._from_sequence(x, dtype=self.dtype)
|
||||
|
||||
if ufunc.nout > 1:
|
||||
return tuple(reconstruct(x) for x in result)
|
||||
else:
|
||||
return reconstruct(result)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self._data[item]
|
||||
else:
|
||||
# array, slice.
|
||||
item = pd.api.indexers.check_array_indexer(self, item)
|
||||
return type(self)(self._data[item])
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
from pandas.api.extensions import take
|
||||
|
||||
data = self._data
|
||||
if allow_fill and fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
|
||||
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
|
||||
return self._from_sequence(result, dtype=self.dtype)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self._data.copy(), dtype=self.dtype)
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
if is_dtype_equal(dtype, self._dtype):
|
||||
if not copy:
|
||||
return self
|
||||
dtype = pandas_dtype(dtype)
|
||||
if isinstance(dtype, type(self.dtype)):
|
||||
return type(self)(self._data, copy=copy, context=dtype.context)
|
||||
|
||||
return super().astype(dtype, copy=copy)
|
||||
|
||||
def __setitem__(self, key, value) -> None:
|
||||
if is_list_like(value):
|
||||
if is_scalar(key):
|
||||
raise ValueError("setting an array element with a sequence.")
|
||||
value = [decimal.Decimal(v) for v in value]
|
||||
else:
|
||||
value = decimal.Decimal(value)
|
||||
|
||||
key = check_array_indexer(self, key)
|
||||
self._data[key] = value
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._data)
|
||||
|
||||
def __contains__(self, item) -> bool | np.bool_:
|
||||
if not isinstance(item, decimal.Decimal):
|
||||
return False
|
||||
elif item.is_nan():
|
||||
return self.isna().any()
|
||||
else:
|
||||
return super().__contains__(item)
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
n = len(self)
|
||||
if n:
|
||||
return n * sys.getsizeof(self[0])
|
||||
return 0
|
||||
|
||||
def isna(self):
|
||||
return np.array([x.is_nan() for x in self._data], dtype=bool)
|
||||
|
||||
@property
|
||||
def _na_value(self):
|
||||
return decimal.Decimal("NaN")
|
||||
|
||||
def _formatter(self, boxed=False):
|
||||
if boxed:
|
||||
return "Decimal: {}".format
|
||||
return repr
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
return cls(np.concatenate([x._data for x in to_concat]))
|
||||
|
||||
def _reduce(
|
||||
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
|
||||
):
|
||||
if skipna and self.isna().any():
|
||||
# If we don't have any NAs, we can ignore skipna
|
||||
other = self[~self.isna()]
|
||||
result = other._reduce(name, **kwargs)
|
||||
elif name == "sum" and len(self) == 0:
|
||||
# GH#29630 avoid returning int 0 or np.bool_(False) on old numpy
|
||||
result = decimal.Decimal(0)
|
||||
else:
|
||||
try:
|
||||
op = getattr(self.data, name)
|
||||
except AttributeError as err:
|
||||
raise NotImplementedError(
|
||||
f"decimal does not support the {name} operation"
|
||||
) from err
|
||||
result = op(axis=0)
|
||||
|
||||
if keepdims:
|
||||
return type(self)([result])
|
||||
else:
|
||||
return result
|
||||
|
||||
def _cmp_method(self, other, op):
|
||||
# For use with OpsMixin
|
||||
def convert_values(param):
|
||||
if isinstance(param, ExtensionArray) or is_list_like(param):
|
||||
ovalues = param
|
||||
else:
|
||||
# Assume it's an object
|
||||
ovalues = [param] * len(self)
|
||||
return ovalues
|
||||
|
||||
lvalues = self
|
||||
rvalues = convert_values(other)
|
||||
|
||||
# If the operator is not defined for the underlying objects,
|
||||
# a TypeError should be raised
|
||||
res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
|
||||
|
||||
return np.asarray(res, dtype=bool)
|
||||
|
||||
def value_counts(self, dropna: bool = True):
|
||||
return value_counts(self.to_numpy(), dropna=dropna)
|
||||
|
||||
# We override fillna here to simulate a 3rd party EA that has done so. This
|
||||
# lets us test the deprecation telling authors to implement _pad_or_backfill
|
||||
# Simulate a 3rd-party EA that has not yet updated to include a "copy"
|
||||
# keyword in its fillna method.
|
||||
# error: Signature of "fillna" incompatible with supertype "ExtensionArray"
|
||||
def fillna( # type: ignore[override]
|
||||
self,
|
||||
value=None,
|
||||
method=None,
|
||||
limit: int | None = None,
|
||||
):
|
||||
return super().fillna(value=value, method=method, limit=limit, copy=True)
|
||||
|
||||
|
||||
def to_decimal(values, context=None):
|
||||
return DecimalArray([decimal.Decimal(x) for x in values], context=context)
|
||||
|
||||
|
||||
def make_data():
|
||||
return [decimal.Decimal(val) for val in np.random.default_rng(2).random(100)]
|
||||
|
||||
|
||||
DecimalArray._add_arithmetic_ops()
|
@ -0,0 +1,587 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import decimal
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.extension import base
|
||||
from pandas.tests.extension.decimal.array import (
|
||||
DecimalArray,
|
||||
DecimalDtype,
|
||||
make_data,
|
||||
to_decimal,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return DecimalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return DecimalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
return DecimalArray([decimal.Decimal(2) for _ in range(100)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return DecimalArray(
|
||||
[decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return DecimalArray(
|
||||
[decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda x, y: x.is_nan() and y.is_nan()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
b = decimal.Decimal("1.0")
|
||||
a = decimal.Decimal("0.0")
|
||||
c = decimal.Decimal("2.0")
|
||||
na = decimal.Decimal("NaN")
|
||||
return DecimalArray([b, b, na, na, a, a, b, c])
|
||||
|
||||
|
||||
class TestDecimalArray(base.ExtensionTests):
|
||||
def _get_expected_exception(
|
||||
self, op_name: str, obj, other
|
||||
) -> type[Exception] | tuple[type[Exception], ...] | None:
|
||||
return None
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
return True
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
if op_name == "count":
|
||||
return super().check_reduce(ser, op_name, skipna)
|
||||
else:
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = getattr(np.asarray(ser), op_name)()
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
|
||||
if all_numeric_reductions in ["kurt", "skew", "sem", "median"]:
|
||||
mark = pytest.mark.xfail(raises=NotImplementedError)
|
||||
request.applymarker(mark)
|
||||
super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
|
||||
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
|
||||
op_name = all_numeric_reductions
|
||||
if op_name in ["skew", "median"]:
|
||||
mark = pytest.mark.xfail(raises=NotImplementedError)
|
||||
request.applymarker(mark)
|
||||
|
||||
return super().test_reduce_frame(data, all_numeric_reductions, skipna)
|
||||
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
self._compare_other(ser, data, comparison_op, 0.5)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
|
||||
alter = np.random.default_rng(2).choice([-1, 0, 1], len(data))
|
||||
# Randomly double, halve or keep same value
|
||||
other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter]
|
||||
self._compare_other(ser, data, comparison_op, other)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
op_name = all_arithmetic_operators
|
||||
ser = pd.Series(data)
|
||||
|
||||
context = decimal.getcontext()
|
||||
divbyzerotrap = context.traps[decimal.DivisionByZero]
|
||||
invalidoptrap = context.traps[decimal.InvalidOperation]
|
||||
context.traps[decimal.DivisionByZero] = 0
|
||||
context.traps[decimal.InvalidOperation] = 0
|
||||
|
||||
# Decimal supports ops with int, but not float
|
||||
other = pd.Series([int(d * 100) for d in data])
|
||||
self.check_opname(ser, op_name, other)
|
||||
|
||||
if "mod" not in op_name:
|
||||
self.check_opname(ser, op_name, ser * 2)
|
||||
|
||||
self.check_opname(ser, op_name, 0)
|
||||
self.check_opname(ser, op_name, 5)
|
||||
context.traps[decimal.DivisionByZero] = divbyzerotrap
|
||||
context.traps[decimal.InvalidOperation] = invalidoptrap
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
||||
with tm.assert_produces_warning(
|
||||
DeprecationWarning, match=msg, check_stacklevel=False
|
||||
):
|
||||
super().test_fillna_frame(data_missing)
|
||||
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
DeprecationWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"limit_area, input_ilocs, expected_ilocs",
|
||||
[
|
||||
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
||||
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
||||
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
||||
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
||||
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
||||
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
||||
],
|
||||
)
|
||||
def test_ffill_limit_area(
|
||||
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
):
|
||||
# GH#56616
|
||||
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
DeprecationWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
msg = "DecimalArray does not implement limit_area"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
super().test_ffill_limit_area(
|
||||
data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
msg = "Series.fillna with 'method' is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
DeprecationWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
msg = "The 'method' keyword in DecimalArray.fillna is deprecated"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning,
|
||||
match=msg,
|
||||
check_stacklevel=False,
|
||||
raise_on_extra_warnings=False,
|
||||
):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
msg = "|".join(
|
||||
[
|
||||
"ExtensionArray.fillna 'method' keyword is deprecated",
|
||||
"The 'method' keyword in DecimalArray.fillna is deprecated",
|
||||
]
|
||||
)
|
||||
with tm.assert_produces_warning(
|
||||
(FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
|
||||
):
|
||||
super().test_fillna_no_op_returns_copy(data)
|
||||
|
||||
def test_fillna_series(self, data_missing):
|
||||
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
||||
with tm.assert_produces_warning(
|
||||
DeprecationWarning, match=msg, check_stacklevel=False
|
||||
):
|
||||
super().test_fillna_series(data_missing)
|
||||
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
msg = "|".join(
|
||||
[
|
||||
"ExtensionArray.fillna 'method' keyword is deprecated",
|
||||
"The 'method' keyword in DecimalArray.fillna is deprecated",
|
||||
]
|
||||
)
|
||||
with tm.assert_produces_warning(
|
||||
(FutureWarning, DeprecationWarning), match=msg, check_stacklevel=False
|
||||
):
|
||||
super().test_fillna_series_method(data_missing, fillna_method)
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
|
||||
warn = DeprecationWarning if not using_copy_on_write else None
|
||||
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
super().test_fillna_copy_frame(data_missing)
|
||||
|
||||
def test_fillna_copy_series(self, data_missing, using_copy_on_write):
|
||||
warn = DeprecationWarning if not using_copy_on_write else None
|
||||
msg = "ExtensionArray.fillna added a 'copy' keyword"
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
super().test_fillna_copy_series(data_missing)
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna, request):
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
vcs = pd.Series(all_data).value_counts(dropna=dropna)
|
||||
vcs_ex = pd.Series(other).value_counts(dropna=dropna)
|
||||
|
||||
with decimal.localcontext() as ctx:
|
||||
# avoid raising when comparing Decimal("NAN") < Decimal(2)
|
||||
ctx.traps[decimal.InvalidOperation] = False
|
||||
|
||||
result = vcs.sort_index()
|
||||
expected = vcs_ex.sort_index()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_repr(self, data):
|
||||
# Overriding this base test to explicitly test that
|
||||
# the custom _formatter is used
|
||||
ser = pd.Series(data)
|
||||
assert data.dtype.name in repr(ser)
|
||||
assert "Decimal: " in repr(ser)
|
||||
|
||||
@pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior")
|
||||
@pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
|
||||
def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
|
||||
super().test_unary_ufunc_dunder_equivalence(data, ufunc)
|
||||
|
||||
def test_array_interface_copy(self, data):
|
||||
result_copy1 = np.array(data, copy=True)
|
||||
result_copy2 = np.array(data, copy=True)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
if not np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
return
|
||||
|
||||
try:
|
||||
result_nocopy1 = np.array(data, copy=False)
|
||||
except ValueError:
|
||||
# An error is always acceptable for `copy=False`
|
||||
return
|
||||
|
||||
result_nocopy2 = np.array(data, copy=False)
|
||||
# If copy=False was given and did not raise, these must share the same data
|
||||
assert np.may_share_memory(result_nocopy1, result_nocopy2)
|
||||
|
||||
|
||||
def test_take_na_value_other_decimal():
|
||||
arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
||||
result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0"))
|
||||
expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_constructor_coerce_data_to_extension_dtype():
|
||||
dtype = DecimalDtype()
|
||||
ser = pd.Series([0, 1, 2], dtype=dtype)
|
||||
|
||||
arr = DecimalArray(
|
||||
[decimal.Decimal(0), decimal.Decimal(1), decimal.Decimal(2)],
|
||||
dtype=dtype,
|
||||
)
|
||||
exp = pd.Series(arr)
|
||||
tm.assert_series_equal(ser, exp)
|
||||
|
||||
|
||||
def test_series_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
result = pd.Series(arr, dtype=DecimalDtype())
|
||||
expected = pd.Series(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.Series(arr, dtype="int64")
|
||||
expected = pd.Series([10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_dataframe_constructor_with_dtype():
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
|
||||
result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
|
||||
expected = pd.DataFrame({"A": arr})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
arr = DecimalArray([decimal.Decimal("10.0")])
|
||||
result = pd.DataFrame({"A": arr}, dtype="int64")
|
||||
expected = pd.DataFrame({"A": [10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_astype_dispatches(frame):
|
||||
# This is a dtype-specific test that ensures Series[decimal].astype
|
||||
# gets all the way through to ExtensionArray.astype
|
||||
# Designing a reliable smoke test that works for arbitrary data types
|
||||
# is difficult.
|
||||
data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a")
|
||||
ctx = decimal.Context()
|
||||
ctx.prec = 5
|
||||
|
||||
if frame:
|
||||
data = data.to_frame()
|
||||
|
||||
result = data.astype(DecimalDtype(ctx))
|
||||
|
||||
if frame:
|
||||
result = result["a"]
|
||||
|
||||
assert result.dtype.context.prec == ctx.prec
|
||||
|
||||
|
||||
class DecimalArrayWithoutFromSequence(DecimalArray):
|
||||
"""Helper class for testing error handling in _from_sequence."""
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
raise KeyError("For the test")
|
||||
|
||||
|
||||
class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
|
||||
@classmethod
|
||||
def _create_arithmetic_method(cls, op):
|
||||
return cls._create_method(op, coerce_to_dtype=False)
|
||||
|
||||
|
||||
DecimalArrayWithoutCoercion._add_arithmetic_ops()
|
||||
|
||||
|
||||
def test_combine_from_sequence_raises(monkeypatch):
|
||||
# https://github.com/pandas-dev/pandas/issues/22850
|
||||
cls = DecimalArrayWithoutFromSequence
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return DecimalArrayWithoutFromSequence
|
||||
|
||||
monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type)
|
||||
|
||||
arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
||||
ser = pd.Series(arr)
|
||||
result = ser.combine(ser, operator.add)
|
||||
|
||||
# note: object dtype
|
||||
expected = pd.Series(
|
||||
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
|
||||
)
|
||||
def test_scalar_ops_from_sequence_raises(class_):
|
||||
# op(EA, EA) should return an EA, or an ndarray if it's not possible
|
||||
# to return an EA with the return values.
|
||||
arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
|
||||
result = arr + arr
|
||||
expected = np.array(
|
||||
[decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reverse, expected_div, expected_mod",
|
||||
[(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])],
|
||||
)
|
||||
def test_divmod_array(reverse, expected_div, expected_mod):
|
||||
# https://github.com/pandas-dev/pandas/issues/22930
|
||||
arr = to_decimal([1, 2, 3, 4])
|
||||
if reverse:
|
||||
div, mod = divmod(2, arr)
|
||||
else:
|
||||
div, mod = divmod(arr, 2)
|
||||
expected_div = to_decimal(expected_div)
|
||||
expected_mod = to_decimal(expected_mod)
|
||||
|
||||
tm.assert_extension_array_equal(div, expected_div)
|
||||
tm.assert_extension_array_equal(mod, expected_mod)
|
||||
|
||||
|
||||
def test_ufunc_fallback(data):
|
||||
a = data[:5]
|
||||
s = pd.Series(a, index=range(3, 8))
|
||||
result = np.abs(s)
|
||||
expected = pd.Series(np.abs(a), index=range(3, 8))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc():
|
||||
a = to_decimal([1, 2, 3])
|
||||
result = np.exp(a)
|
||||
expected = to_decimal(np.exp(a._data))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series():
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
result = np.exp(s)
|
||||
expected = pd.Series(to_decimal(np.exp(a._data)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series_scalar_other():
|
||||
# check _HANDLED_TYPES
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
result = np.add(s, decimal.Decimal(1))
|
||||
expected = pd.Series(np.add(a, decimal.Decimal(1)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_ufunc_series_defer():
|
||||
a = to_decimal([1, 2, 3])
|
||||
s = pd.Series(a)
|
||||
|
||||
expected = pd.Series(to_decimal([2, 4, 6]))
|
||||
r1 = np.add(s, a)
|
||||
r2 = np.add(a, s)
|
||||
|
||||
tm.assert_series_equal(r1, expected)
|
||||
tm.assert_series_equal(r2, expected)
|
||||
|
||||
|
||||
def test_groupby_agg():
|
||||
# Ensure that the result of agg is inferred to be decimal dtype
|
||||
# https://github.com/pandas-dev/pandas/issues/29141
|
||||
|
||||
data = make_data()[:5]
|
||||
df = pd.DataFrame(
|
||||
{"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
|
||||
)
|
||||
|
||||
# single key, selected column
|
||||
expected = pd.Series(to_decimal([data[0], data[3]]))
|
||||
result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
# multiple keys, selected column
|
||||
expected = pd.Series(
|
||||
to_decimal([data[0], data[1], data[3]]),
|
||||
index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]),
|
||||
)
|
||||
result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
# multiple columns
|
||||
expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])})
|
||||
result = df.groupby("id1").agg(lambda x: x.iloc[0])
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
def test_groupby_agg_ea_method(monkeypatch):
|
||||
# Ensure that the result of agg is inferred to be decimal dtype
|
||||
# https://github.com/pandas-dev/pandas/issues/29141
|
||||
|
||||
def DecimalArray__my_sum(self):
|
||||
return np.sum(np.array(self))
|
||||
|
||||
monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False)
|
||||
|
||||
data = make_data()[:5]
|
||||
df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)})
|
||||
expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]]))
|
||||
|
||||
result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum())
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
s = pd.Series(DecimalArray(data))
|
||||
grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64)
|
||||
result = s.groupby(grouper).agg(lambda x: x.values.my_sum())
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
def test_indexing_no_materialize(monkeypatch):
|
||||
# See https://github.com/pandas-dev/pandas/issues/29708
|
||||
# Ensure that indexing operations do not materialize (convert to a numpy
|
||||
# array) the ExtensionArray unnecessary
|
||||
|
||||
def DecimalArray__array__(self, dtype=None):
|
||||
raise Exception("tried to convert a DecimalArray to a numpy array")
|
||||
|
||||
monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False)
|
||||
|
||||
data = make_data()
|
||||
s = pd.Series(DecimalArray(data))
|
||||
df = pd.DataFrame({"a": s, "b": range(len(s))})
|
||||
|
||||
# ensure the following operations do not raise an error
|
||||
s[s > 0.5]
|
||||
df[s > 0.5]
|
||||
s.at[0]
|
||||
df.at[0, "a"]
|
||||
|
||||
|
||||
def test_to_numpy_keyword():
|
||||
# test the extra keyword
|
||||
values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")]
|
||||
expected = np.array(
|
||||
[decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object"
|
||||
)
|
||||
a = pd.array(values, dtype="decimal")
|
||||
result = a.to_numpy(decimals=2)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = pd.Series(a).to_numpy(decimals=2)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_copy_on_write(using_copy_on_write):
|
||||
df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
|
||||
df2 = df.astype(DecimalDtype())
|
||||
df.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
expected = pd.DataFrame(
|
||||
{"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
|
||||
)
|
||||
tm.assert_equal(df2.values, expected.values)
|
@ -0,0 +1,7 @@
|
||||
from pandas.tests.extension.json.array import (
|
||||
JSONArray,
|
||||
JSONDtype,
|
||||
make_data,
|
||||
)
|
||||
|
||||
__all__ = ["JSONArray", "JSONDtype", "make_data"]
|
@ -0,0 +1,273 @@
|
||||
"""
|
||||
Test extension array for storing nested data in a pandas container.
|
||||
|
||||
The JSONArray stores lists of dictionaries. The storage mechanism is a list,
|
||||
not an ndarray.
|
||||
|
||||
Note
|
||||
----
|
||||
We currently store lists of UserDicts. Pandas has a few places
|
||||
internally that specifically check for dicts, and does non-scalar things
|
||||
in that case. We *want* the dictionaries to be treated as scalars, so we
|
||||
hack around pandas by using UserDicts.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import (
|
||||
UserDict,
|
||||
abc,
|
||||
)
|
||||
import itertools
|
||||
import numbers
|
||||
import string
|
||||
import sys
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
from pandas.core.dtypes.common import (
|
||||
is_bool_dtype,
|
||||
is_list_like,
|
||||
pandas_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.extensions import (
|
||||
ExtensionArray,
|
||||
ExtensionDtype,
|
||||
)
|
||||
from pandas.core.indexers import unpack_tuple_and_ellipses
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
from pandas._typing import type_t
|
||||
|
||||
|
||||
class JSONDtype(ExtensionDtype):
|
||||
type = abc.Mapping
|
||||
name = "json"
|
||||
na_value: Mapping[str, Any] = UserDict()
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls) -> type_t[JSONArray]:
|
||||
"""
|
||||
Return the array type associated with this dtype.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return JSONArray
|
||||
|
||||
|
||||
class JSONArray(ExtensionArray):
|
||||
dtype = JSONDtype()
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False) -> None:
|
||||
for val in values:
|
||||
if not isinstance(val, self.dtype.type):
|
||||
raise TypeError("All values must be of type " + str(self.dtype.type))
|
||||
self.data = values
|
||||
|
||||
# Some aliases for common attribute names to ensure pandas supports
|
||||
# these
|
||||
self._items = self._data = self.data
|
||||
# those aliases are currently not working due to assumptions
|
||||
# in internal code (GH-20735)
|
||||
# self._values = self.values = self.data
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
return cls(scalars)
|
||||
|
||||
@classmethod
|
||||
def _from_factorized(cls, values, original):
|
||||
return cls([UserDict(x) for x in values if x != ()])
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, tuple):
|
||||
item = unpack_tuple_and_ellipses(item)
|
||||
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self.data[item]
|
||||
elif isinstance(item, slice) and item == slice(None):
|
||||
# Make sure we get a view
|
||||
return type(self)(self.data)
|
||||
elif isinstance(item, slice):
|
||||
# slice
|
||||
return type(self)(self.data[item])
|
||||
elif not is_list_like(item):
|
||||
# e.g. "foo" or 2.5
|
||||
# exception message copied from numpy
|
||||
raise IndexError(
|
||||
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
|
||||
r"(`None`) and integer or boolean arrays are valid indices"
|
||||
)
|
||||
else:
|
||||
item = pd.api.indexers.check_array_indexer(self, item)
|
||||
if is_bool_dtype(item.dtype):
|
||||
return type(self)._from_sequence(
|
||||
[x for x, m in zip(self, item) if m], dtype=self.dtype
|
||||
)
|
||||
# integer
|
||||
return type(self)([self.data[i] for i in item])
|
||||
|
||||
def __setitem__(self, key, value) -> None:
|
||||
if isinstance(key, numbers.Integral):
|
||||
self.data[key] = value
|
||||
else:
|
||||
if not isinstance(value, (type(self), abc.Sequence)):
|
||||
# broadcast value
|
||||
value = itertools.cycle([value])
|
||||
|
||||
if isinstance(key, np.ndarray) and key.dtype == "bool":
|
||||
# masking
|
||||
for i, (k, v) in enumerate(zip(key, value)):
|
||||
if k:
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[i] = v
|
||||
else:
|
||||
for k, v in zip(key, value):
|
||||
assert isinstance(v, self.dtype.type)
|
||||
self.data[k] = v
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.data)
|
||||
|
||||
def __eq__(self, other):
|
||||
return NotImplemented
|
||||
|
||||
def __ne__(self, other):
|
||||
return NotImplemented
|
||||
|
||||
def __array__(self, dtype=None, copy=None):
|
||||
if copy is False:
|
||||
warnings.warn(
|
||||
"Starting with NumPy 2.0, the behavior of the 'copy' keyword has "
|
||||
"changed and passing 'copy=False' raises an error when returning "
|
||||
"a zero-copy NumPy array is not possible. pandas will follow "
|
||||
"this behavior starting with pandas 3.0.\nThis conversion to "
|
||||
"NumPy requires a copy, but 'copy=False' was passed. Consider "
|
||||
"using 'np.asarray(..)' instead.",
|
||||
FutureWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
if dtype is None:
|
||||
dtype = object
|
||||
if dtype == object:
|
||||
# on py38 builds it looks like numpy is inferring to a non-1D array
|
||||
return construct_1d_object_array_from_listlike(list(self))
|
||||
if copy is None:
|
||||
# Note: branch avoids `copy=None` for NumPy 1.x support
|
||||
return np.asarray(self.data, dtype=dtype)
|
||||
return np.asarray(self.data, dtype=dtype, copy=copy)
|
||||
|
||||
@property
|
||||
def nbytes(self) -> int:
|
||||
return sys.getsizeof(self.data)
|
||||
|
||||
def isna(self):
|
||||
return np.array([x == self.dtype.na_value for x in self.data], dtype=bool)
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
# re-implement here, since NumPy has trouble setting
|
||||
# sized objects like UserDicts into scalar slots of
|
||||
# an ndarary.
|
||||
indexer = np.asarray(indexer)
|
||||
msg = (
|
||||
"Index is out of bounds or cannot do a "
|
||||
"non-empty take from an empty array."
|
||||
)
|
||||
|
||||
if allow_fill:
|
||||
if fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
# bounds check
|
||||
if (indexer < -1).any():
|
||||
raise ValueError
|
||||
try:
|
||||
output = [
|
||||
self.data[loc] if loc != -1 else fill_value for loc in indexer
|
||||
]
|
||||
except IndexError as err:
|
||||
raise IndexError(msg) from err
|
||||
else:
|
||||
try:
|
||||
output = [self.data[loc] for loc in indexer]
|
||||
except IndexError as err:
|
||||
raise IndexError(msg) from err
|
||||
|
||||
return type(self)._from_sequence(output, dtype=self.dtype)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self.data[:])
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# NumPy has issues when all the dicts are the same length.
|
||||
# np.array([UserDict(...), UserDict(...)]) fails,
|
||||
# but np.array([{...}, {...}]) works, so cast.
|
||||
from pandas.core.arrays.string_ import StringDtype
|
||||
|
||||
dtype = pandas_dtype(dtype)
|
||||
# needed to add this check for the Series constructor
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
elif isinstance(dtype, StringDtype):
|
||||
arr_cls = dtype.construct_array_type()
|
||||
return arr_cls._from_sequence(self, dtype=dtype, copy=False)
|
||||
elif not copy:
|
||||
return np.asarray([dict(x) for x in self], dtype=dtype)
|
||||
else:
|
||||
return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
|
||||
|
||||
def unique(self):
|
||||
# Parent method doesn't work since np.array will try to infer
|
||||
# a 2-dim object.
|
||||
return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}])
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = list(itertools.chain.from_iterable(x.data for x in to_concat))
|
||||
return cls(data)
|
||||
|
||||
def _values_for_factorize(self):
|
||||
frozen = self._values_for_argsort()
|
||||
if len(frozen) == 0:
|
||||
# factorize_array expects 1-d array, this is a len-0 2-d array.
|
||||
frozen = frozen.ravel()
|
||||
return frozen, ()
|
||||
|
||||
def _values_for_argsort(self):
|
||||
# Bypass NumPy's shape inference to get a (N,) array of tuples.
|
||||
frozen = [tuple(x.items()) for x in self]
|
||||
return construct_1d_object_array_from_listlike(frozen)
|
||||
|
||||
def _pad_or_backfill(self, *, method, limit=None, copy=True):
|
||||
# GH#56616 - test EA method without limit_area argument
|
||||
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
|
||||
|
||||
|
||||
def make_data():
|
||||
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
|
||||
rng = np.random.default_rng(2)
|
||||
return [
|
||||
UserDict(
|
||||
[
|
||||
(rng.choice(list(string.ascii_letters)), rng.integers(0, 100))
|
||||
for _ in range(rng.integers(0, 10))
|
||||
]
|
||||
)
|
||||
for _ in range(100)
|
||||
]
|
@ -0,0 +1,490 @@
|
||||
import collections
|
||||
import operator
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.extension import base
|
||||
from pandas.tests.extension.json.array import (
|
||||
JSONArray,
|
||||
JSONDtype,
|
||||
make_data,
|
||||
)
|
||||
|
||||
# We intentionally don't run base.BaseSetitemTests because pandas'
|
||||
# internals has trouble setting sequences of values into scalar positions.
|
||||
unhashable = pytest.mark.xfail(reason="Unhashable")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return JSONDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
data = make_data()
|
||||
|
||||
# Why the while loop? NumPy is unable to construct an ndarray from
|
||||
# equal-length ndarrays. Many of our operations involve coercing the
|
||||
# EA to an ndarray of objects. To avoid random test failures, we ensure
|
||||
# that our data is coercible to an ndarray. Several tests deal with only
|
||||
# the first two elements, so that's what we'll check.
|
||||
|
||||
while len(data[0]) == len(data[1]):
|
||||
data = make_data()
|
||||
|
||||
return JSONArray(data)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return JSONArray([{}, {"a": 10}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return JSONArray([{"b": 1}, {}, {"a": 4}])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return operator.eq
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return JSONArray(
|
||||
[
|
||||
{"b": 1},
|
||||
{"b": 1},
|
||||
{},
|
||||
{},
|
||||
{"a": 0, "c": 2},
|
||||
{"a": 0, "c": 2},
|
||||
{"b": 1},
|
||||
{"c": 2},
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class TestJSONArray(base.ExtensionTests):
|
||||
@pytest.mark.xfail(
|
||||
reason="comparison method not implemented for JSONArray (GH-37867)"
|
||||
)
|
||||
def test_contains(self, data):
|
||||
# GH-37867
|
||||
super().test_contains(data)
|
||||
|
||||
@pytest.mark.xfail(reason="not implemented constructor from dtype")
|
||||
def test_from_dtype(self, data):
|
||||
# construct from our dtype & string dtype
|
||||
super().test_from_dtype(data)
|
||||
|
||||
@pytest.mark.xfail(reason="RecursionError, GH-33900")
|
||||
def test_series_constructor_no_data_with_index(self, dtype, na_value):
|
||||
# RecursionError: maximum recursion depth exceeded in comparison
|
||||
rec_limit = sys.getrecursionlimit()
|
||||
try:
|
||||
# Limit to avoid stack overflow on Windows CI
|
||||
sys.setrecursionlimit(100)
|
||||
super().test_series_constructor_no_data_with_index(dtype, na_value)
|
||||
finally:
|
||||
sys.setrecursionlimit(rec_limit)
|
||||
|
||||
@pytest.mark.xfail(reason="RecursionError, GH-33900")
|
||||
def test_series_constructor_scalar_na_with_index(self, dtype, na_value):
|
||||
# RecursionError: maximum recursion depth exceeded in comparison
|
||||
rec_limit = sys.getrecursionlimit()
|
||||
try:
|
||||
# Limit to avoid stack overflow on Windows CI
|
||||
sys.setrecursionlimit(100)
|
||||
super().test_series_constructor_scalar_na_with_index(dtype, na_value)
|
||||
finally:
|
||||
sys.setrecursionlimit(rec_limit)
|
||||
|
||||
@pytest.mark.xfail(reason="collection as scalar, GH-33901")
|
||||
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||||
# TypeError: All values must be of type <class 'collections.abc.Mapping'>
|
||||
rec_limit = sys.getrecursionlimit()
|
||||
try:
|
||||
# Limit to avoid stack overflow on Windows CI
|
||||
sys.setrecursionlimit(100)
|
||||
super().test_series_constructor_scalar_with_index(data, dtype)
|
||||
finally:
|
||||
sys.setrecursionlimit(rec_limit)
|
||||
|
||||
@pytest.mark.xfail(reason="Different definitions of NA")
|
||||
def test_stack(self):
|
||||
"""
|
||||
The test does .astype(object).stack(future_stack=True). If we happen to have
|
||||
any missing values in `data`, then we'll end up with different
|
||||
rows since we consider `{}` NA, but `.astype(object)` doesn't.
|
||||
"""
|
||||
super().test_stack()
|
||||
|
||||
@pytest.mark.xfail(reason="dict for NA")
|
||||
def test_unstack(self, data, index):
|
||||
# The base test has NaN for the expected NA value.
|
||||
# this matches otherwise
|
||||
return super().test_unstack(data, index)
|
||||
|
||||
@pytest.mark.xfail(reason="Setting a dict as a scalar")
|
||||
def test_fillna_series(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
super().test_fillna_series()
|
||||
|
||||
@pytest.mark.xfail(reason="Setting a dict as a scalar")
|
||||
def test_fillna_frame(self):
|
||||
"""We treat dictionaries as a mapping in fillna, not a scalar."""
|
||||
super().test_fillna_frame()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"limit_area, input_ilocs, expected_ilocs",
|
||||
[
|
||||
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
||||
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
||||
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
||||
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
||||
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
||||
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
||||
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
||||
],
|
||||
)
|
||||
def test_ffill_limit_area(
|
||||
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
):
|
||||
# GH#56616
|
||||
msg = "JSONArray does not implement limit_area"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
super().test_ffill_limit_area(
|
||||
data_missing, limit_area, input_ilocs, expected_ilocs
|
||||
)
|
||||
|
||||
@unhashable
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
super().test_value_counts(all_data, dropna)
|
||||
|
||||
@unhashable
|
||||
def test_value_counts_with_normalize(self, data):
|
||||
super().test_value_counts_with_normalize(data)
|
||||
|
||||
@unhashable
|
||||
def test_sort_values_frame(self):
|
||||
# TODO (EA.factorize): see if _values_for_factorize allows this.
|
||||
super().test_sort_values_frame()
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
|
||||
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_missing(
|
||||
self, data_missing_for_sorting, ascending, sort_by_key
|
||||
):
|
||||
super().test_sort_values_missing(
|
||||
data_missing_for_sorting, ascending, sort_by_key
|
||||
)
|
||||
|
||||
@pytest.mark.xfail(reason="combine for JSONArray not supported")
|
||||
def test_combine_le(self, data_repeated):
|
||||
super().test_combine_le(data_repeated)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="combine for JSONArray not supported - "
|
||||
"may pass depending on random data",
|
||||
strict=False,
|
||||
raises=AssertionError,
|
||||
)
|
||||
def test_combine_first(self, data):
|
||||
super().test_combine_first(data)
|
||||
|
||||
@pytest.mark.xfail(reason="broadcasting error")
|
||||
def test_where_series(self, data, na_value):
|
||||
# Fails with
|
||||
# *** ValueError: operands could not be broadcast together
|
||||
# with shapes (4,) (4,) (0,)
|
||||
super().test_where_series(data, na_value)
|
||||
|
||||
@pytest.mark.xfail(reason="Can't compare dicts.")
|
||||
def test_searchsorted(self, data_for_sorting):
|
||||
super().test_searchsorted(data_for_sorting)
|
||||
|
||||
@pytest.mark.xfail(reason="Can't compare dicts.")
|
||||
def test_equals(self, data, na_value, as_series):
|
||||
super().test_equals(data, na_value, as_series)
|
||||
|
||||
@pytest.mark.skip("fill-value is interpreted as a dict of values")
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
super().test_fillna_copy_frame(data_missing)
|
||||
|
||||
def test_equals_same_data_different_object(
|
||||
self, data, using_copy_on_write, request
|
||||
):
|
||||
if using_copy_on_write:
|
||||
mark = pytest.mark.xfail(reason="Fails with CoW")
|
||||
request.applymarker(mark)
|
||||
super().test_equals_same_data_different_object(data)
|
||||
|
||||
@pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
|
||||
def test_astype_str(self):
|
||||
"""This currently fails in NumPy on np.array(self, dtype=str) with
|
||||
|
||||
*** ValueError: setting an array element with a sequence
|
||||
"""
|
||||
super().test_astype_str()
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_transform(self):
|
||||
"""
|
||||
This currently fails in Series.name.setter, since the
|
||||
name must be hashable, but the value is a dictionary.
|
||||
I think this is what we want, i.e. `.name` should be the original
|
||||
values, and not the values for factorization.
|
||||
"""
|
||||
super().test_groupby_extension_transform()
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_apply(self):
|
||||
"""
|
||||
This fails in Index._do_unique_check with
|
||||
|
||||
> hash(val)
|
||||
E TypeError: unhashable type: 'UserDict' with
|
||||
|
||||
I suspect that once we support Index[ExtensionArray],
|
||||
we'll be able to dispatch unique.
|
||||
"""
|
||||
super().test_groupby_extension_apply()
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_agg(self):
|
||||
"""
|
||||
This fails when we get to tm.assert_series_equal when left.index
|
||||
contains dictionaries, which are not hashable.
|
||||
"""
|
||||
super().test_groupby_extension_agg()
|
||||
|
||||
@unhashable
|
||||
def test_groupby_extension_no_sort(self):
|
||||
"""
|
||||
This fails when we get to tm.assert_series_equal when left.index
|
||||
contains dictionaries, which are not hashable.
|
||||
"""
|
||||
super().test_groupby_extension_no_sort()
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
if len(data[0]) != 1:
|
||||
mark = pytest.mark.xfail(reason="raises in coercing to Series")
|
||||
request.applymarker(mark)
|
||||
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_compare_array(self, data, comparison_op, request):
|
||||
if comparison_op.__name__ in ["eq", "ne"]:
|
||||
mark = pytest.mark.xfail(reason="Comparison methods not implemented")
|
||||
request.applymarker(mark)
|
||||
super().test_compare_array(data, comparison_op)
|
||||
|
||||
@pytest.mark.xfail(reason="ValueError: Must have equal len keys and value")
|
||||
def test_setitem_loc_scalar_mixed(self, data):
|
||||
super().test_setitem_loc_scalar_mixed(data)
|
||||
|
||||
@pytest.mark.xfail(reason="ValueError: Must have equal len keys and value")
|
||||
def test_setitem_loc_scalar_multiple_homogoneous(self, data):
|
||||
super().test_setitem_loc_scalar_multiple_homogoneous(data)
|
||||
|
||||
@pytest.mark.xfail(reason="ValueError: Must have equal len keys and value")
|
||||
def test_setitem_iloc_scalar_mixed(self, data):
|
||||
super().test_setitem_iloc_scalar_mixed(data)
|
||||
|
||||
@pytest.mark.xfail(reason="ValueError: Must have equal len keys and value")
|
||||
def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
|
||||
super().test_setitem_iloc_scalar_multiple_homogoneous(data)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
np.array([True, True, True, False, False]),
|
||||
pd.array([True, True, True, False, False], dtype="boolean"),
|
||||
pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
|
||||
],
|
||||
ids=["numpy-array", "boolean-array", "boolean-array-na"],
|
||||
)
|
||||
def test_setitem_mask(self, data, mask, box_in_series, request):
|
||||
if box_in_series:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="cannot set using a list-like indexer with a different length"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
elif not isinstance(mask, np.ndarray):
|
||||
mark = pytest.mark.xfail(reason="Issues unwanted DeprecationWarning")
|
||||
request.applymarker(mark)
|
||||
super().test_setitem_mask(data, mask, box_in_series)
|
||||
|
||||
def test_setitem_mask_raises(self, data, box_in_series, request):
|
||||
if not box_in_series:
|
||||
mark = pytest.mark.xfail(reason="Fails to raise")
|
||||
request.applymarker(mark)
|
||||
|
||||
super().test_setitem_mask_raises(data, box_in_series)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="cannot set using a list-like indexer with a different length"
|
||||
)
|
||||
def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
|
||||
super().test_setitem_mask_boolean_array_with_na(data, box_in_series)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_setitem_integer_array(self, data, idx, box_in_series, request):
|
||||
if box_in_series:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="cannot set using a list-like indexer with a different length"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
super().test_setitem_integer_array(data, idx, box_in_series)
|
||||
|
||||
@pytest.mark.xfail(reason="list indices must be integers or slices, not NAType")
|
||||
@pytest.mark.parametrize(
|
||||
"idx, box_in_series",
|
||||
[
|
||||
([0, 1, 2, pd.NA], False),
|
||||
pytest.param(
|
||||
[0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
|
||||
),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
],
|
||||
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||||
)
|
||||
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||||
super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
|
||||
|
||||
@pytest.mark.xfail(reason="Fails to raise")
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
super().test_setitem_scalar_key_sequence_raise(data)
|
||||
|
||||
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request):
|
||||
if "full_slice" in request.node.name:
|
||||
mark = pytest.mark.xfail(reason="slice is not iterable")
|
||||
request.applymarker(mark)
|
||||
super().test_setitem_with_expansion_dataframe_column(data, full_indexer)
|
||||
|
||||
@pytest.mark.xfail(reason="slice is not iterable")
|
||||
def test_setitem_frame_2d_values(self, data):
|
||||
super().test_setitem_frame_2d_values(data)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="cannot set using a list-like indexer with a different length"
|
||||
)
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
super().test_setitem_mask_broadcast(data, setter)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="cannot set using a slice indexer with a different length"
|
||||
)
|
||||
def test_setitem_slice(self, data, box_in_series):
|
||||
super().test_setitem_slice(data, box_in_series)
|
||||
|
||||
@pytest.mark.xfail(reason="slice object is not iterable")
|
||||
def test_setitem_loc_iloc_slice(self, data):
|
||||
super().test_setitem_loc_iloc_slice(data)
|
||||
|
||||
@pytest.mark.xfail(reason="slice object is not iterable")
|
||||
def test_setitem_slice_mismatch_length_raises(self, data):
|
||||
super().test_setitem_slice_mismatch_length_raises(data)
|
||||
|
||||
@pytest.mark.xfail(reason="slice object is not iterable")
|
||||
def test_setitem_slice_array(self, data):
|
||||
super().test_setitem_slice_array(data)
|
||||
|
||||
@pytest.mark.xfail(reason="Fail to raise")
|
||||
def test_setitem_invalid(self, data, invalid_scalar):
|
||||
super().test_setitem_invalid(data, invalid_scalar)
|
||||
|
||||
@pytest.mark.xfail(reason="only integer scalar arrays can be converted")
|
||||
def test_setitem_2d_values(self, data):
|
||||
super().test_setitem_2d_values(data)
|
||||
|
||||
@pytest.mark.xfail(reason="data type 'json' not understood")
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
super().test_EA_types(engine, data, request)
|
||||
|
||||
|
||||
def custom_assert_series_equal(left, right, *args, **kwargs):
|
||||
# NumPy doesn't handle an array of equal-length UserDicts.
|
||||
# The default assert_series_equal eventually does a
|
||||
# Series.values, which raises. We work around it by
|
||||
# converting the UserDicts to dicts.
|
||||
if left.dtype.name == "json":
|
||||
assert left.dtype == right.dtype
|
||||
left = pd.Series(
|
||||
JSONArray(left.values.astype(object)), index=left.index, name=left.name
|
||||
)
|
||||
right = pd.Series(
|
||||
JSONArray(right.values.astype(object)),
|
||||
index=right.index,
|
||||
name=right.name,
|
||||
)
|
||||
tm.assert_series_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
def custom_assert_frame_equal(left, right, *args, **kwargs):
|
||||
obj_type = kwargs.get("obj", "DataFrame")
|
||||
tm.assert_index_equal(
|
||||
left.columns,
|
||||
right.columns,
|
||||
exact=kwargs.get("check_column_type", "equiv"),
|
||||
check_names=kwargs.get("check_names", True),
|
||||
check_exact=kwargs.get("check_exact", False),
|
||||
check_categorical=kwargs.get("check_categorical", True),
|
||||
obj=f"{obj_type}.columns",
|
||||
)
|
||||
|
||||
jsons = (left.dtypes == "json").index
|
||||
|
||||
for col in jsons:
|
||||
custom_assert_series_equal(left[col], right[col], *args, **kwargs)
|
||||
|
||||
left = left.drop(columns=jsons)
|
||||
right = right.drop(columns=jsons)
|
||||
tm.assert_frame_equal(left, right, *args, **kwargs)
|
||||
|
||||
|
||||
def test_custom_asserts():
|
||||
# This would always trigger the KeyError from trying to put
|
||||
# an array of equal-length UserDicts inside an ndarray.
|
||||
data = JSONArray(
|
||||
[
|
||||
collections.UserDict({"a": 1}),
|
||||
collections.UserDict({"b": 2}),
|
||||
collections.UserDict({"c": 3}),
|
||||
]
|
||||
)
|
||||
a = pd.Series(data)
|
||||
custom_assert_series_equal(a, a)
|
||||
custom_assert_frame_equal(a.to_frame(), a.to_frame())
|
||||
|
||||
b = pd.Series(data.take([0, 0, 1]))
|
||||
msg = r"Series are different"
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
custom_assert_series_equal(a, b)
|
||||
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
custom_assert_frame_equal(a.to_frame(), b.to_frame())
|
@ -0,0 +1,7 @@
|
||||
from pandas.tests.extension.list.array import (
|
||||
ListArray,
|
||||
ListDtype,
|
||||
make_data,
|
||||
)
|
||||
|
||||
__all__ = ["ListArray", "ListDtype", "make_data"]
|
@ -0,0 +1,137 @@
|
||||
"""
|
||||
Test extension array for storing nested data in a pandas container.
|
||||
|
||||
The ListArray stores an ndarray of lists.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import numbers
|
||||
import string
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.base import ExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas.api.types import (
|
||||
is_object_dtype,
|
||||
is_string_dtype,
|
||||
)
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import type_t
|
||||
|
||||
|
||||
class ListDtype(ExtensionDtype):
|
||||
type = list
|
||||
name = "list"
|
||||
na_value = np.nan
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls) -> type_t[ListArray]:
|
||||
"""
|
||||
Return the array type associated with this dtype.
|
||||
|
||||
Returns
|
||||
-------
|
||||
type
|
||||
"""
|
||||
return ListArray
|
||||
|
||||
|
||||
class ListArray(ExtensionArray):
|
||||
dtype = ListDtype()
|
||||
__array_priority__ = 1000
|
||||
|
||||
def __init__(self, values, dtype=None, copy=False) -> None:
|
||||
if not isinstance(values, np.ndarray):
|
||||
raise TypeError("Need to pass a numpy array as values")
|
||||
for val in values:
|
||||
if not isinstance(val, self.dtype.type) and not pd.isna(val):
|
||||
raise TypeError("All values must be of type " + str(self.dtype.type))
|
||||
self.data = values
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
data = np.empty(len(scalars), dtype=object)
|
||||
data[:] = scalars
|
||||
return cls(data)
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, numbers.Integral):
|
||||
return self.data[item]
|
||||
else:
|
||||
# slice, list-like, mask
|
||||
return type(self)(self.data[item])
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.data)
|
||||
|
||||
def isna(self):
|
||||
return np.array(
|
||||
[not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool
|
||||
)
|
||||
|
||||
def take(self, indexer, allow_fill=False, fill_value=None):
|
||||
# re-implement here, since NumPy has trouble setting
|
||||
# sized objects like UserDicts into scalar slots of
|
||||
# an ndarary.
|
||||
indexer = np.asarray(indexer)
|
||||
msg = (
|
||||
"Index is out of bounds or cannot do a "
|
||||
"non-empty take from an empty array."
|
||||
)
|
||||
|
||||
if allow_fill:
|
||||
if fill_value is None:
|
||||
fill_value = self.dtype.na_value
|
||||
# bounds check
|
||||
if (indexer < -1).any():
|
||||
raise ValueError
|
||||
try:
|
||||
output = [
|
||||
self.data[loc] if loc != -1 else fill_value for loc in indexer
|
||||
]
|
||||
except IndexError as err:
|
||||
raise IndexError(msg) from err
|
||||
else:
|
||||
try:
|
||||
output = [self.data[loc] for loc in indexer]
|
||||
except IndexError as err:
|
||||
raise IndexError(msg) from err
|
||||
|
||||
return self._from_sequence(output)
|
||||
|
||||
def copy(self):
|
||||
return type(self)(self.data[:])
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
||||
if copy:
|
||||
return self.copy()
|
||||
return self
|
||||
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
|
||||
# numpy has problems with astype(str) for nested elements
|
||||
return np.array([str(x) for x in self.data], dtype=dtype)
|
||||
elif not copy:
|
||||
return np.asarray(self.data, dtype=dtype)
|
||||
else:
|
||||
return np.array(self.data, dtype=dtype, copy=copy)
|
||||
|
||||
@classmethod
|
||||
def _concat_same_type(cls, to_concat):
|
||||
data = np.concatenate([x.data for x in to_concat])
|
||||
return cls(data)
|
||||
|
||||
|
||||
def make_data():
|
||||
# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
|
||||
rng = np.random.default_rng(2)
|
||||
data = np.empty(100, dtype=object)
|
||||
data[:] = [
|
||||
[rng.choice(list(string.ascii_letters)) for _ in range(rng.integers(0, 10))]
|
||||
for _ in range(100)
|
||||
]
|
||||
return data
|
@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.tests.extension.list.array import (
|
||||
ListArray,
|
||||
ListDtype,
|
||||
make_data,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return ListDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 ListArray for semantics test."""
|
||||
data = make_data()
|
||||
|
||||
while len(data[0]) == len(data[1]):
|
||||
data = make_data()
|
||||
|
||||
return ListArray(data)
|
||||
|
||||
|
||||
def test_to_csv(data):
|
||||
# https://github.com/pandas-dev/pandas/issues/28840
|
||||
# array with list-likes fail when doing astype(str) on the numpy array
|
||||
# which was done in get_values_for_csv
|
||||
df = pd.DataFrame({"a": data})
|
||||
res = df.to_csv()
|
||||
assert str(data[0]) in res
|
3420
lib/python3.11/site-packages/pandas/tests/extension/test_arrow.py
Normal file
3420
lib/python3.11/site-packages/pandas/tests/extension/test_arrow.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,200 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import CategoricalDtype
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data():
|
||||
while True:
|
||||
values = np.random.default_rng(2).choice(list(string.ascii_letters), size=100)
|
||||
# ensure we meet the requirements
|
||||
# 1. first two not null
|
||||
# 2. first and second are different
|
||||
if values[0] != values[1]:
|
||||
break
|
||||
return values
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return CategoricalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not be equal
|
||||
"""
|
||||
return Categorical(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return Categorical([np.nan, "A"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return Categorical(["A", "B", "C"], categories=["C", "A", "B"], ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return Categorical(["A", None, "B"], categories=["B", "A"], ordered=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
return Categorical(["a", "a", None, None, "b", "b", "a", "c"])
|
||||
|
||||
|
||||
class TestCategorical(base.ExtensionTests):
|
||||
@pytest.mark.xfail(reason="Memory usage doesn't match")
|
||||
def test_memory_usage(self, data):
|
||||
# TODO: Is this deliberate?
|
||||
super().test_memory_usage(data)
|
||||
|
||||
def test_contains(self, data, data_missing):
|
||||
# GH-37867
|
||||
# na value handling in Categorical.__contains__ is deprecated.
|
||||
# See base.BaseInterFaceTests.test_contains for more details.
|
||||
|
||||
na_value = data.dtype.na_value
|
||||
# ensure data without missing values
|
||||
data = data[~data.isna()]
|
||||
|
||||
# first elements are non-missing
|
||||
assert data[0] in data
|
||||
assert data_missing[0] in data_missing
|
||||
|
||||
# check the presence of na_value
|
||||
assert na_value in data_missing
|
||||
assert na_value not in data
|
||||
|
||||
# Categoricals can contain other nan-likes than na_value
|
||||
for na_value_obj in tm.NULL_OBJECTS:
|
||||
if na_value_obj is na_value:
|
||||
continue
|
||||
assert na_value_obj not in data
|
||||
# this section suffers from super method
|
||||
if not using_string_dtype():
|
||||
assert na_value_obj in data_missing
|
||||
|
||||
def test_empty(self, dtype):
|
||||
cls = dtype.construct_array_type()
|
||||
result = cls._empty((4,), dtype=dtype)
|
||||
|
||||
assert isinstance(result, cls)
|
||||
# the dtype we passed is not initialized, so will not match the
|
||||
# dtype on our result.
|
||||
assert result.dtype == CategoricalDtype([])
|
||||
|
||||
@pytest.mark.skip(reason="Backwards compatibility")
|
||||
def test_getitem_scalar(self, data):
|
||||
# CategoricalDtype.type isn't "correct" since it should
|
||||
# be a parent of the elements (object). But don't want
|
||||
# to break things by changing.
|
||||
super().test_getitem_scalar(data)
|
||||
|
||||
@pytest.mark.xfail(reason="Unobserved categories included")
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
return super().test_value_counts(all_data, dropna)
|
||||
|
||||
def test_combine_add(self, data_repeated):
|
||||
# GH 20825
|
||||
# When adding categoricals in combine, result is a string
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
s1 = pd.Series(orig_data1)
|
||||
s2 = pd.Series(orig_data2)
|
||||
result = s1.combine(s2, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series(
|
||||
[a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
val = s1.iloc[0]
|
||||
result = s1.combine(val, lambda x1, x2: x1 + x2)
|
||||
expected = pd.Series([a + val for a in list(orig_data1)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data, na_action):
|
||||
result = data.map(lambda x: x, na_action=na_action)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
# frame & scalar
|
||||
op_name = all_arithmetic_operators
|
||||
if op_name == "__rmod__":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason="rmod never called when string is first argument"
|
||||
)
|
||||
)
|
||||
super().test_arith_frame_with_scalar(data, op_name)
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
op_name = all_arithmetic_operators
|
||||
if op_name == "__rmod__":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason="rmod never called when string is first argument"
|
||||
)
|
||||
)
|
||||
super().test_arith_series_with_scalar(data, op_name)
|
||||
|
||||
def _compare_other(self, ser: pd.Series, data, op, other):
|
||||
op_name = f"__{op.__name__}__"
|
||||
if op_name not in ["__eq__", "__ne__"]:
|
||||
msg = "Unordered Categoricals can only compare equality or not"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
op(data, other)
|
||||
else:
|
||||
return super()._compare_other(ser, data, op, other)
|
||||
|
||||
@pytest.mark.xfail(reason="Categorical overrides __repr__")
|
||||
@pytest.mark.parametrize("size", ["big", "small"])
|
||||
def test_array_repr(self, data, size):
|
||||
super().test_array_repr(data, size)
|
||||
|
||||
@pytest.mark.xfail(reason="TBD")
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super().test_groupby_extension_agg(as_index, data_for_grouping)
|
||||
|
||||
|
||||
class Test2DCompat(base.NDArrayBacked2DTests):
|
||||
def test_repr_2d(self, data):
|
||||
# Categorical __repr__ doesn't include "Categorical", so we need
|
||||
# to special-case
|
||||
res = repr(data.reshape(1, -1))
|
||||
assert res.count("\nCategories") == 1
|
||||
|
||||
res = repr(data.reshape(-1, 1))
|
||||
assert res.count("\nCategories") == 1
|
@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes import dtypes
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class DummyDtype(dtypes.ExtensionDtype):
|
||||
pass
|
||||
|
||||
|
||||
class DummyArray(ExtensionArray):
|
||||
def __init__(self, data) -> None:
|
||||
self.data = data
|
||||
|
||||
def __array__(self, dtype=None, copy=None):
|
||||
return self.data
|
||||
|
||||
@property
|
||||
def dtype(self):
|
||||
return DummyDtype()
|
||||
|
||||
def astype(self, dtype, copy=True):
|
||||
# we don't support anything but a single dtype
|
||||
if isinstance(dtype, DummyDtype):
|
||||
if copy:
|
||||
return type(self)(self.data)
|
||||
return self
|
||||
elif not copy:
|
||||
return np.asarray(self, dtype=dtype)
|
||||
else:
|
||||
return np.array(self, dtype=dtype, copy=copy)
|
||||
|
||||
|
||||
class TestExtensionArrayDtype:
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
pd.Categorical([]),
|
||||
pd.Categorical([]).dtype,
|
||||
pd.Series(pd.Categorical([])),
|
||||
DummyDtype(),
|
||||
DummyArray(np.array([1, 2])),
|
||||
],
|
||||
)
|
||||
def test_is_extension_array_dtype(self, values):
|
||||
assert is_extension_array_dtype(values)
|
||||
|
||||
@pytest.mark.parametrize("values", [np.array([]), pd.Series(np.array([]))])
|
||||
def test_is_not_extension_array_dtype(self, values):
|
||||
assert not is_extension_array_dtype(values)
|
||||
|
||||
|
||||
def test_astype():
|
||||
arr = DummyArray(np.array([1, 2, 3]))
|
||||
expected = np.array([1, 2, 3], dtype=object)
|
||||
|
||||
result = arr.astype(object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_no_copy():
|
||||
arr = DummyArray(np.array([1, 2, 3], dtype=np.int64))
|
||||
result = arr.astype(arr.dtype, copy=False)
|
||||
|
||||
assert arr is result
|
||||
|
||||
result = arr.astype(arr.dtype)
|
||||
assert arr is not result
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [dtypes.CategoricalDtype(), dtypes.IntervalDtype()])
|
||||
def test_is_extension_array_dtype(dtype):
|
||||
assert isinstance(dtype, dtypes.ExtensionDtype)
|
||||
assert is_extension_array_dtype(dtype)
|
||||
|
||||
|
||||
class CapturingStringArray(pd.arrays.StringArray):
|
||||
"""Extend StringArray to capture arguments to __getitem__"""
|
||||
|
||||
def __getitem__(self, item):
|
||||
self.last_item_arg = item
|
||||
return super().__getitem__(item)
|
||||
|
||||
|
||||
def test_ellipsis_index():
|
||||
# GH#42430 1D slices over extension types turn into N-dimensional slices
|
||||
# over ExtensionArrays
|
||||
df = pd.DataFrame(
|
||||
{"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))}
|
||||
)
|
||||
_ = df.iloc[:1]
|
||||
|
||||
# String comparison because there's no native way to compare slices.
|
||||
# Before the fix for GH#42430, last_item_arg would get set to the 2D slice
|
||||
# (Ellipsis, slice(None, 1, None))
|
||||
out = df["col1"].array.last_item_arg
|
||||
assert str(out) == "slice(None, 1, None)"
|
@ -0,0 +1,144 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
@pytest.fixture(params=["US/Central"])
|
||||
def dtype(request):
|
||||
return DatetimeTZDtype(unit="ns", tz=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
data = DatetimeArray._from_sequence(
|
||||
pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return DatetimeArray._from_sequence(
|
||||
np.array(["NaT", "2000-01-01"], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
c = pd.Timestamp("2000-01-03")
|
||||
return DatetimeArray._from_sequence(
|
||||
np.array([b, c, a], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
return DatetimeArray._from_sequence(
|
||||
np.array([b, "NaT", a], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
"""
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
a = pd.Timestamp("2000-01-01")
|
||||
b = pd.Timestamp("2000-01-02")
|
||||
c = pd.Timestamp("2000-01-03")
|
||||
na = "NaT"
|
||||
return DatetimeArray._from_sequence(
|
||||
np.array([b, b, na, na, a, a, b, c], dtype="datetime64[ns]"), dtype=dtype
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return a is pd.NaT and a is b
|
||||
|
||||
return cmp
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
class TestDatetimeArray(base.ExtensionTests):
|
||||
def _get_expected_exception(self, op_name, obj, other):
|
||||
if op_name in ["__sub__", "__rsub__"]:
|
||||
return None
|
||||
return super()._get_expected_exception(op_name, obj, other)
|
||||
|
||||
def _supports_accumulation(self, ser, op_name: str) -> bool:
|
||||
return op_name in ["cummin", "cummax"]
|
||||
|
||||
def _supports_reduction(self, obj, op_name: str) -> bool:
|
||||
return op_name in ["min", "max", "median", "mean", "std", "any", "all"]
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
|
||||
meth = all_boolean_reductions
|
||||
msg = f"'{meth}' with datetime64 dtypes is deprecated and will raise in"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match=msg, check_stacklevel=False
|
||||
):
|
||||
super().test_reduce_series_boolean(data, all_boolean_reductions, skipna)
|
||||
|
||||
def test_series_constructor(self, data):
|
||||
# Series construction drops any .freq attr
|
||||
data = data._with_freq(None)
|
||||
super().test_series_constructor(data)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data, na_action):
|
||||
result = data.map(lambda x: x, na_action=na_action)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
if op_name in ["median", "mean", "std"]:
|
||||
alt = ser.astype("int64")
|
||||
|
||||
res_op = getattr(ser, op_name)
|
||||
exp_op = getattr(alt, op_name)
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
if op_name in ["mean", "median"]:
|
||||
# error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype"
|
||||
# has no attribute "tz"
|
||||
tz = ser.dtype.tz # type: ignore[union-attr]
|
||||
expected = pd.Timestamp(expected, tz=tz)
|
||||
else:
|
||||
expected = pd.Timedelta(expected)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
else:
|
||||
return super().check_reduce(ser, op_name, skipna)
|
||||
|
||||
|
||||
class Test2DCompat(base.NDArrayBacked2DTests):
|
||||
pass
|
@ -0,0 +1,26 @@
|
||||
"""
|
||||
Tests for behavior if an author does *not* implement EA methods.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.arrays import ExtensionArray
|
||||
|
||||
|
||||
class MyEA(ExtensionArray):
|
||||
def __init__(self, values) -> None:
|
||||
self._values = values
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
arr = np.arange(10)
|
||||
return MyEA(arr)
|
||||
|
||||
|
||||
class TestExtensionArray:
|
||||
def test_errors(self, data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
op_name = all_arithmetic_operators
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(data, op_name)
|
@ -0,0 +1,123 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import Interval
|
||||
from pandas.core.arrays import IntervalArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def make_data():
|
||||
N = 100
|
||||
left_array = np.random.default_rng(2).uniform(size=N).cumsum()
|
||||
right_array = left_array + np.random.default_rng(2).uniform(size=N)
|
||||
return [Interval(left, right) for left, right in zip(left_array, right_array)]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return IntervalDtype()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
return IntervalArray(make_data())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return IntervalArray.from_tuples([None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
pytest.skip("Interval is not a numeric dtype")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
return IntervalArray.from_tuples([(1, 2), None, (0, 1)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
a = (0, 1)
|
||||
b = (1, 2)
|
||||
c = (2, 3)
|
||||
return IntervalArray.from_tuples([b, b, None, None, a, a, b, c])
|
||||
|
||||
|
||||
class TestIntervalArray(base.ExtensionTests):
|
||||
divmod_exc = TypeError
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
return op_name in ["min", "max"]
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="Raises with incorrect message bc it disallows *all* listlikes "
|
||||
"instead of just wrong-length listlikes"
|
||||
)
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
super().test_fillna_length_mismatch(data_missing)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_hash_pandas_object(self, data):
|
||||
super().test_hash_pandas_object(data)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_hash_pandas_object_works(self, data, as_frame):
|
||||
super().test_hash_pandas_object_works(data, as_frame)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
super().test_EA_types(engine, data, request)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_astype_str(self, data):
|
||||
super().test_astype_str(data)
|
||||
|
||||
|
||||
# TODO: either belongs in tests.arrays.interval or move into base tests.
|
||||
def test_fillna_non_scalar_raises(data_missing):
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data_missing.fillna([1, 1])
|
@ -0,0 +1,417 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import (
|
||||
IS64,
|
||||
is_platform_windows,
|
||||
)
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float_dtype,
|
||||
is_signed_integer_dtype,
|
||||
is_unsigned_integer_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.boolean import BooleanDtype
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
from pandas.tests.extension import base
|
||||
|
||||
is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in divide:RuntimeWarning"
|
||||
),
|
||||
pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning"),
|
||||
# overflow only relevant for Floating dtype cases cases
|
||||
pytest.mark.filterwarnings("ignore:overflow encountered in reduce:RuntimeWarning"),
|
||||
]
|
||||
|
||||
|
||||
def make_data():
|
||||
return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100]
|
||||
|
||||
|
||||
def make_float_data():
|
||||
return (
|
||||
list(np.arange(0.1, 0.9, 0.1))
|
||||
+ [pd.NA]
|
||||
+ list(np.arange(1, 9.8, 0.1))
|
||||
+ [pd.NA]
|
||||
+ [9.9, 10.0]
|
||||
)
|
||||
|
||||
|
||||
def make_bool_data():
|
||||
return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
BooleanDtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
if dtype.kind == "f":
|
||||
data = make_float_data()
|
||||
elif dtype.kind == "b":
|
||||
data = make_bool_data()
|
||||
else:
|
||||
data = make_data()
|
||||
return pd.array(data, dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
if dtype.kind == "b":
|
||||
return pd.array(np.ones(100), dtype=dtype)
|
||||
return pd.array(np.ones(100) * 2, dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
if dtype.kind == "f":
|
||||
return pd.array([pd.NA, 0.1], dtype=dtype)
|
||||
elif dtype.kind == "b":
|
||||
return pd.array([np.nan, True], dtype=dtype)
|
||||
return pd.array([pd.NA, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
if dtype.kind == "f":
|
||||
return pd.array([0.1, 0.2, 0.0], dtype=dtype)
|
||||
elif dtype.kind == "b":
|
||||
return pd.array([True, True, False], dtype=dtype)
|
||||
return pd.array([1, 2, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
if dtype.kind == "f":
|
||||
return pd.array([0.1, pd.NA, 0.0], dtype=dtype)
|
||||
elif dtype.kind == "b":
|
||||
return pd.array([True, np.nan, False], dtype=dtype)
|
||||
return pd.array([1, pd.NA, 0], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
# we are pd.NA
|
||||
return lambda x, y: x is pd.NA and y is pd.NA
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
if dtype.kind == "f":
|
||||
b = 0.1
|
||||
a = 0.0
|
||||
c = 0.2
|
||||
elif dtype.kind == "b":
|
||||
b = True
|
||||
a = False
|
||||
c = b
|
||||
else:
|
||||
b = 1
|
||||
a = 0
|
||||
c = 2
|
||||
|
||||
na = pd.NA
|
||||
return pd.array([b, b, na, na, a, a, b, c], dtype=dtype)
|
||||
|
||||
|
||||
class TestMaskedArrays(base.ExtensionTests):
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data_missing, na_action):
|
||||
result = data_missing.map(lambda x: x, na_action=na_action)
|
||||
if data_missing.dtype == Float32Dtype():
|
||||
# map roundtrips through objects, which converts to float64
|
||||
expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
|
||||
else:
|
||||
expected = data_missing.to_numpy()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_map_na_action_ignore(self, data_missing_for_sorting):
|
||||
zero = data_missing_for_sorting[2]
|
||||
result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore")
|
||||
if data_missing_for_sorting.dtype.kind == "b":
|
||||
expected = np.array([False, pd.NA, False], dtype=object)
|
||||
else:
|
||||
expected = np.array([zero, np.nan, zero])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def _get_expected_exception(self, op_name, obj, other):
|
||||
try:
|
||||
dtype = tm.get_dtype(obj)
|
||||
except AttributeError:
|
||||
# passed arguments reversed
|
||||
dtype = tm.get_dtype(other)
|
||||
|
||||
if dtype.kind == "b":
|
||||
if op_name.strip("_").lstrip("r") in ["pow", "truediv", "floordiv"]:
|
||||
# match behavior with non-masked bool dtype
|
||||
return NotImplementedError
|
||||
elif op_name in ["__sub__", "__rsub__"]:
|
||||
# exception message would include "numpy boolean subtract""
|
||||
return TypeError
|
||||
return None
|
||||
return None
|
||||
|
||||
def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
|
||||
sdtype = tm.get_dtype(obj)
|
||||
expected = pointwise_result
|
||||
|
||||
if op_name in ("eq", "ne", "le", "ge", "lt", "gt"):
|
||||
return expected.astype("boolean")
|
||||
|
||||
if sdtype.kind in "iu":
|
||||
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"Downcasting object dtype arrays",
|
||||
category=FutureWarning,
|
||||
)
|
||||
filled = expected.fillna(np.nan)
|
||||
expected = filled.astype("Float64")
|
||||
else:
|
||||
# combine method result in 'biggest' (int64) dtype
|
||||
expected = expected.astype(sdtype)
|
||||
elif sdtype.kind == "b":
|
||||
if op_name in (
|
||||
"__floordiv__",
|
||||
"__rfloordiv__",
|
||||
"__pow__",
|
||||
"__rpow__",
|
||||
"__mod__",
|
||||
"__rmod__",
|
||||
):
|
||||
# combine keeps boolean type
|
||||
expected = expected.astype("Int8")
|
||||
|
||||
elif op_name in ("__truediv__", "__rtruediv__"):
|
||||
# combine with bools does not generate the correct result
|
||||
# (numpy behaviour for div is to regard the bools as numeric)
|
||||
op = self.get_op_from_name(op_name)
|
||||
expected = self._combine(obj.astype(float), other, op)
|
||||
expected = expected.astype("Float64")
|
||||
|
||||
if op_name == "__rpow__":
|
||||
# for rpow, combine does not propagate NaN
|
||||
result = getattr(obj, op_name)(other)
|
||||
expected[result.isna()] = np.nan
|
||||
else:
|
||||
# combine method result in 'biggest' (float64) dtype
|
||||
expected = expected.astype(sdtype)
|
||||
return expected
|
||||
|
||||
def test_divmod_series_array(self, data, data_for_twos, request):
|
||||
if data.dtype.kind == "b":
|
||||
mark = pytest.mark.xfail(
|
||||
reason="Inconsistency between floordiv and divmod; we raise for "
|
||||
"floordiv but not for divmod. This matches what we do for "
|
||||
"non-masked bool dtype."
|
||||
)
|
||||
request.applymarker(mark)
|
||||
super().test_divmod_series_array(data, data_for_twos)
|
||||
|
||||
def test_combine_le(self, data_repeated):
|
||||
# TODO: patching self is a bad pattern here
|
||||
orig_data1, orig_data2 = data_repeated(2)
|
||||
if orig_data1.dtype.kind == "b":
|
||||
self._combine_le_expected_dtype = "boolean"
|
||||
else:
|
||||
# TODO: can we make this boolean?
|
||||
self._combine_le_expected_dtype = object
|
||||
super().test_combine_le(data_repeated)
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if op_name in ["any", "all"] and ser.dtype.kind != "b":
|
||||
pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
|
||||
return True
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# overwrite to ensure pd.NA is tested instead of np.nan
|
||||
# https://github.com/pandas-dev/pandas/issues/30958
|
||||
|
||||
cmp_dtype = "int64"
|
||||
if ser.dtype.kind == "f":
|
||||
# Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has
|
||||
# no attribute "numpy_dtype"
|
||||
cmp_dtype = ser.dtype.numpy_dtype # type: ignore[union-attr]
|
||||
elif ser.dtype.kind == "b":
|
||||
if op_name in ["min", "max"]:
|
||||
cmp_dtype = "bool"
|
||||
|
||||
# TODO: prod with integer dtypes does *not* match the result we would
|
||||
# get if we used object for cmp_dtype. In that cae the object result
|
||||
# is a large integer while the non-object case overflows and returns 0
|
||||
alt = ser.dropna().astype(cmp_dtype)
|
||||
if op_name == "count":
|
||||
result = getattr(ser, op_name)()
|
||||
expected = getattr(alt, op_name)()
|
||||
else:
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = getattr(alt, op_name)(skipna=skipna)
|
||||
if not skipna and ser.isna().any() and op_name not in ["any", "all"]:
|
||||
expected = pd.NA
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
|
||||
if is_float_dtype(arr.dtype):
|
||||
cmp_dtype = arr.dtype.name
|
||||
elif op_name in ["mean", "median", "var", "std", "skew"]:
|
||||
cmp_dtype = "Float64"
|
||||
elif op_name in ["max", "min"]:
|
||||
cmp_dtype = arr.dtype.name
|
||||
elif arr.dtype in ["Int64", "UInt64"]:
|
||||
cmp_dtype = arr.dtype.name
|
||||
elif is_signed_integer_dtype(arr.dtype):
|
||||
# TODO: Why does Window Numpy 2.0 dtype depend on skipna?
|
||||
cmp_dtype = (
|
||||
"Int32"
|
||||
if (is_platform_windows() and (not np_version_gt2 or not skipna))
|
||||
or not IS64
|
||||
else "Int64"
|
||||
)
|
||||
elif is_unsigned_integer_dtype(arr.dtype):
|
||||
cmp_dtype = (
|
||||
"UInt32"
|
||||
if (is_platform_windows() and (not np_version_gt2 or not skipna))
|
||||
or not IS64
|
||||
else "UInt64"
|
||||
)
|
||||
elif arr.dtype.kind == "b":
|
||||
if op_name in ["mean", "median", "var", "std", "skew"]:
|
||||
cmp_dtype = "Float64"
|
||||
elif op_name in ["min", "max"]:
|
||||
cmp_dtype = "boolean"
|
||||
elif op_name in ["sum", "prod"]:
|
||||
cmp_dtype = (
|
||||
"Int32"
|
||||
if (is_platform_windows() and (not np_version_gt2 or not skipna))
|
||||
or not IS64
|
||||
else "Int64"
|
||||
)
|
||||
else:
|
||||
raise TypeError("not supposed to reach this")
|
||||
else:
|
||||
raise TypeError("not supposed to reach this")
|
||||
return cmp_dtype
|
||||
|
||||
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
|
||||
return True
|
||||
|
||||
def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
# overwrite to ensure pd.NA is tested instead of np.nan
|
||||
# https://github.com/pandas-dev/pandas/issues/30958
|
||||
length = 64
|
||||
if is_windows_or_32bit:
|
||||
# Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has
|
||||
# no attribute "itemsize"
|
||||
if not ser.dtype.itemsize == 8: # type: ignore[union-attr]
|
||||
length = 32
|
||||
|
||||
if ser.dtype.name.startswith("U"):
|
||||
expected_dtype = f"UInt{length}"
|
||||
elif ser.dtype.name.startswith("I"):
|
||||
expected_dtype = f"Int{length}"
|
||||
elif ser.dtype.name.startswith("F"):
|
||||
# Incompatible types in assignment (expression has type
|
||||
# "Union[dtype[Any], ExtensionDtype]", variable has type "str")
|
||||
expected_dtype = ser.dtype # type: ignore[assignment]
|
||||
elif ser.dtype.kind == "b":
|
||||
if op_name in ("cummin", "cummax"):
|
||||
expected_dtype = "boolean"
|
||||
else:
|
||||
expected_dtype = f"Int{length}"
|
||||
|
||||
if expected_dtype == "Float32" and op_name == "cumprod" and skipna:
|
||||
# TODO: xfail?
|
||||
pytest.skip(
|
||||
f"Float32 precision lead to large differences with op {op_name} "
|
||||
f"and skipna={skipna}"
|
||||
)
|
||||
|
||||
if op_name == "cumsum":
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
getattr(ser.astype("float64"), op_name)(skipna=skipna),
|
||||
dtype=expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
elif op_name in ["cummax", "cummin"]:
|
||||
result = getattr(ser, op_name)(skipna=skipna)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
getattr(ser.astype("float64"), op_name)(skipna=skipna),
|
||||
dtype=ser.dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
elif op_name == "cumprod":
|
||||
result = getattr(ser[:12], op_name)(skipna=skipna)
|
||||
expected = pd.Series(
|
||||
pd.array(
|
||||
getattr(ser[:12].astype("float64"), op_name)(skipna=skipna),
|
||||
dtype=expected_dtype,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
else:
|
||||
raise NotImplementedError(f"{op_name} not supported")
|
||||
|
||||
|
||||
class Test2DCompat(base.Dim2CompatTests):
|
||||
pass
|
@ -0,0 +1,426 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
Note: we do not bother with base.BaseIndexTests because NumpyExtensionArray
|
||||
will never be held in an Index.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import NumpyEADtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_object_dtype
|
||||
from pandas.core.arrays.numpy_ import NumpyExtensionArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
orig_assert_attr_equal = tm.assert_attr_equal
|
||||
|
||||
|
||||
def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
|
||||
"""
|
||||
patch tm.assert_attr_equal so NumpyEADtype("object") is closed enough to
|
||||
np.dtype("object")
|
||||
"""
|
||||
if attr == "dtype":
|
||||
lattr = getattr(left, "dtype", None)
|
||||
rattr = getattr(right, "dtype", None)
|
||||
if isinstance(lattr, NumpyEADtype) and not isinstance(rattr, NumpyEADtype):
|
||||
left = left.astype(lattr.numpy_dtype)
|
||||
elif isinstance(rattr, NumpyEADtype) and not isinstance(lattr, NumpyEADtype):
|
||||
right = right.astype(rattr.numpy_dtype)
|
||||
|
||||
orig_assert_attr_equal(attr, left, right, obj)
|
||||
|
||||
|
||||
@pytest.fixture(params=["float", "object"])
|
||||
def dtype(request):
|
||||
return NumpyEADtype(np.dtype(request.param))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def allow_in_pandas(monkeypatch):
|
||||
"""
|
||||
A monkeypatch to tells pandas to let us in.
|
||||
|
||||
By default, passing a NumpyExtensionArray to an index / series / frame
|
||||
constructor will unbox that NumpyExtensionArray to an ndarray, and treat
|
||||
it as a non-EA column. We don't want people using EAs without
|
||||
reason.
|
||||
|
||||
The mechanism for this is a check against ABCNumpyExtensionArray
|
||||
in each constructor.
|
||||
|
||||
But, for testing, we need to allow them in pandas. So we patch
|
||||
the _typ of NumpyExtensionArray, so that we evade the ABCNumpyExtensionArray
|
||||
check.
|
||||
"""
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(NumpyExtensionArray, "_typ", "extension")
|
||||
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(allow_in_pandas, dtype):
|
||||
if dtype.numpy_dtype == "object":
|
||||
return pd.Series([(i,) for i in range(100)]).array
|
||||
return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(allow_in_pandas, dtype):
|
||||
if dtype.numpy_dtype == "object":
|
||||
return NumpyExtensionArray(np.array([np.nan, (1,)], dtype=object))
|
||||
return NumpyExtensionArray(np.array([np.nan, 1.0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
def cmp(a, b):
|
||||
return np.isnan(a) and np.isnan(b)
|
||||
|
||||
return cmp
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
# Use an empty tuple for first element, then remove,
|
||||
# to disable np.array's shape inference.
|
||||
return NumpyExtensionArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
|
||||
return NumpyExtensionArray(np.array([1, 2, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(allow_in_pandas, dtype):
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
return NumpyExtensionArray(np.array([(1,), np.nan, (0,)], dtype=object))
|
||||
return NumpyExtensionArray(np.array([1, np.nan, 0]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(allow_in_pandas, dtype):
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
if dtype.numpy_dtype == "object":
|
||||
a, b, c = (1,), (2,), (3,)
|
||||
else:
|
||||
a, b, c = np.arange(3)
|
||||
return NumpyExtensionArray(
|
||||
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos(dtype):
|
||||
if dtype.kind == "O":
|
||||
pytest.skip(f"{dtype} is not a numeric dtype")
|
||||
arr = np.ones(100) * 2
|
||||
return NumpyExtensionArray._from_sequence(arr, dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def skip_numpy_object(dtype, request):
|
||||
"""
|
||||
Tests for NumpyExtensionArray with nested data. Users typically won't create
|
||||
these objects via `pd.array`, but they can show up through `.array`
|
||||
on a Series with nested data. Many of the base tests fail, as they aren't
|
||||
appropriate for nested data.
|
||||
|
||||
This fixture allows these tests to be skipped when used as a usefixtures
|
||||
marker to either an individual test or a test class.
|
||||
"""
|
||||
if dtype == "object":
|
||||
mark = pytest.mark.xfail(reason="Fails for object dtype")
|
||||
request.applymarker(mark)
|
||||
|
||||
|
||||
skip_nested = pytest.mark.usefixtures("skip_numpy_object")
|
||||
|
||||
|
||||
class TestNumpyExtensionArray(base.ExtensionTests):
|
||||
@pytest.mark.skip(reason="We don't register our dtype")
|
||||
# We don't want to register. This test should probably be split in two.
|
||||
def test_from_dtype(self, data):
|
||||
pass
|
||||
|
||||
@skip_nested
|
||||
def test_series_constructor_scalar_with_index(self, data, dtype):
|
||||
# ValueError: Length of passed values is 1, index implies 3.
|
||||
super().test_series_constructor_scalar_with_index(data, dtype)
|
||||
|
||||
def test_check_dtype(self, data, request, using_infer_string):
|
||||
if data.dtype.numpy_dtype == "object":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"NumpyExtensionArray expectedly clashes with a "
|
||||
f"NumPy name: {data.dtype.numpy_dtype}"
|
||||
)
|
||||
)
|
||||
super().test_check_dtype(data)
|
||||
|
||||
def test_is_not_object_type(self, dtype, request):
|
||||
if dtype.numpy_dtype == "object":
|
||||
# Different from BaseDtypeTests.test_is_not_object_type
|
||||
# because NumpyEADtype(object) is an object type
|
||||
assert is_object_dtype(dtype)
|
||||
else:
|
||||
super().test_is_not_object_type(dtype)
|
||||
|
||||
@skip_nested
|
||||
def test_getitem_scalar(self, data):
|
||||
# AssertionError
|
||||
super().test_getitem_scalar(data)
|
||||
|
||||
@skip_nested
|
||||
def test_shift_fill_value(self, data):
|
||||
# np.array shape inference. Shift implementation fails.
|
||||
super().test_shift_fill_value(data)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_frame(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_frame(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_copy_series(self, data_missing):
|
||||
# The "scalar" for this array isn't a scalar.
|
||||
super().test_fillna_copy_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
# TODO: NumpyExtensionArray.searchsorted calls ndarray.searchsorted which
|
||||
# isn't quite what we want in nested data cases. Instead we need to
|
||||
# adapt something like libindex._bin_search.
|
||||
super().test_searchsorted(data_for_sorting, as_series)
|
||||
|
||||
@pytest.mark.xfail(reason="NumpyExtensionArray.diff may fail on dtype")
|
||||
def test_diff(self, data, periods):
|
||||
return super().test_diff(data, periods)
|
||||
|
||||
def test_insert(self, data, request):
|
||||
if data.dtype.numpy_dtype == object:
|
||||
mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
|
||||
request.applymarker(mark)
|
||||
|
||||
super().test_insert(data)
|
||||
|
||||
@skip_nested
|
||||
def test_insert_invalid(self, data, invalid_scalar):
|
||||
# NumpyExtensionArray[object] can hold anything, so skip
|
||||
super().test_insert_invalid(data, invalid_scalar)
|
||||
|
||||
divmod_exc = None
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def test_divmod(self, data):
|
||||
divmod_exc = None
|
||||
if data.dtype.kind == "O":
|
||||
divmod_exc = TypeError
|
||||
self.divmod_exc = divmod_exc
|
||||
super().test_divmod(data)
|
||||
|
||||
def test_divmod_series_array(self, data):
|
||||
ser = pd.Series(data)
|
||||
exc = None
|
||||
if data.dtype.kind == "O":
|
||||
exc = TypeError
|
||||
self.divmod_exc = exc
|
||||
self._check_divmod_op(ser, divmod, data)
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
opname = all_arithmetic_operators
|
||||
series_scalar_exc = None
|
||||
if data.dtype.numpy_dtype == object:
|
||||
if opname in ["__mul__", "__rmul__"]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="the Series.combine step raises but not the Series method."
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
series_scalar_exc = TypeError
|
||||
self.series_scalar_exc = series_scalar_exc
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
opname = all_arithmetic_operators
|
||||
series_array_exc = None
|
||||
if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
|
||||
series_array_exc = TypeError
|
||||
self.series_array_exc = series_array_exc
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
opname = all_arithmetic_operators
|
||||
frame_scalar_exc = None
|
||||
if data.dtype.numpy_dtype == object:
|
||||
if opname in ["__mul__", "__rmul__"]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="the Series.combine step raises but not the Series method."
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
frame_scalar_exc = TypeError
|
||||
self.frame_scalar_exc = frame_scalar_exc
|
||||
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
if ser.dtype.kind == "O":
|
||||
return op_name in ["sum", "min", "max", "any", "all"]
|
||||
return True
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
res_op = getattr(ser, op_name)
|
||||
# avoid coercing int -> float. Just cast to the actual numpy type.
|
||||
# error: Item "ExtensionDtype" of "dtype[Any] | ExtensionDtype" has
|
||||
# no attribute "numpy_dtype"
|
||||
cmp_dtype = ser.dtype.numpy_dtype # type: ignore[union-attr]
|
||||
alt = ser.astype(cmp_dtype)
|
||||
exp_op = getattr(alt, op_name)
|
||||
if op_name == "count":
|
||||
result = res_op()
|
||||
expected = exp_op()
|
||||
else:
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
@pytest.mark.skip("TODO: tests not written yet")
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
|
||||
pass
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_series(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_series(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Non-scalar "scalar" values.
|
||||
super().test_fillna_frame(data_missing)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_invalid(self, data, invalid_scalar):
|
||||
# object dtype can hold anything, so doesn't raise
|
||||
super().test_setitem_invalid(data, invalid_scalar)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_sequence_broadcasts(self, data, box_in_series):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_sequence_broadcasts(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("setter", ["loc", None])
|
||||
def test_setitem_mask_broadcast(self, data, setter):
|
||||
# ValueError: cannot set using a list-like indexer with a different
|
||||
# length than the value
|
||||
super().test_setitem_mask_broadcast(data, setter)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_scalar_key_sequence_raise(self, data):
|
||||
# Failed: DID NOT RAISE <class 'ValueError'>
|
||||
super().test_setitem_scalar_key_sequence_raise(data)
|
||||
|
||||
# TODO: there is some issue with NumpyExtensionArray, therefore,
|
||||
# skip the setitem test for now, and fix it later (GH 31446)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
np.array([True, True, True, False, False]),
|
||||
pd.array([True, True, True, False, False], dtype="boolean"),
|
||||
],
|
||||
ids=["numpy-array", "boolean-array"],
|
||||
)
|
||||
def test_setitem_mask(self, data, mask, box_in_series):
|
||||
super().test_setitem_mask(data, mask, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
|
||||
ids=["list", "integer-array", "numpy-array"],
|
||||
)
|
||||
def test_setitem_integer_array(self, data, idx, box_in_series):
|
||||
super().test_setitem_integer_array(data, idx, box_in_series)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, box_in_series",
|
||||
[
|
||||
([0, 1, 2, pd.NA], False),
|
||||
pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
(pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
|
||||
],
|
||||
ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
|
||||
)
|
||||
def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
|
||||
super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_slice(self, data, box_in_series):
|
||||
super().test_setitem_slice(data, box_in_series)
|
||||
|
||||
@skip_nested
|
||||
def test_setitem_loc_iloc_slice(self, data):
|
||||
super().test_setitem_loc_iloc_slice(data)
|
||||
|
||||
def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/32395
|
||||
df = expected = pd.DataFrame({"data": pd.Series(data)})
|
||||
result = pd.DataFrame(index=df.index)
|
||||
|
||||
# because result has object dtype, the attempt to do setting inplace
|
||||
# is successful, and object dtype is retained
|
||||
key = full_indexer(df)
|
||||
result.loc[key, "data"] = df["data"]
|
||||
|
||||
# base class method has expected = df; NumpyExtensionArray behaves oddly because
|
||||
# we patch _typ for these tests.
|
||||
if data.dtype.numpy_dtype != object:
|
||||
if not isinstance(key, slice) or key != slice(None):
|
||||
expected = pd.DataFrame({"data": data.to_numpy()})
|
||||
tm.assert_frame_equal(result, expected, check_column_type=False)
|
||||
|
||||
@pytest.mark.xfail(reason="NumpyEADtype is unpacked")
|
||||
def test_index_from_listlike_with_dtype(self, data):
|
||||
super().test_index_from_listlike_with_dtype(data)
|
||||
|
||||
@skip_nested
|
||||
@pytest.mark.parametrize("engine", ["c", "python"])
|
||||
def test_EA_types(self, engine, data, request):
|
||||
super().test_EA_types(engine, data, request)
|
||||
|
||||
|
||||
class Test2DCompat(base.NDArrayBacked2DTests):
|
||||
pass
|
@ -0,0 +1,119 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import (
|
||||
Period,
|
||||
iNaT,
|
||||
)
|
||||
from pandas.compat import is_platform_windows
|
||||
from pandas.compat.numpy import np_version_gte1p24
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import PeriodArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@pytest.fixture(params=["D", "2D"])
|
||||
def dtype(request):
|
||||
return PeriodDtype(freq=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
return PeriodArray(np.arange(1970, 2070), dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype):
|
||||
return PeriodArray([2018, 2019, 2017], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
return PeriodArray([iNaT, 2017], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype):
|
||||
return PeriodArray([2018, iNaT, 2017], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype):
|
||||
B = 2018
|
||||
NA = iNaT
|
||||
A = 2017
|
||||
C = 2019
|
||||
return PeriodArray([B, B, NA, NA, A, A, B, C], dtype=dtype)
|
||||
|
||||
|
||||
class TestPeriodArray(base.ExtensionTests):
|
||||
def _get_expected_exception(self, op_name, obj, other):
|
||||
if op_name in ("__sub__", "__rsub__"):
|
||||
return None
|
||||
return super()._get_expected_exception(op_name, obj, other)
|
||||
|
||||
def _supports_accumulation(self, ser, op_name: str) -> bool:
|
||||
return op_name in ["cummin", "cummax"]
|
||||
|
||||
def _supports_reduction(self, obj, op_name: str) -> bool:
|
||||
return op_name in ["min", "max", "median"]
|
||||
|
||||
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
|
||||
if op_name == "median":
|
||||
res_op = getattr(ser, op_name)
|
||||
|
||||
alt = ser.astype("int64")
|
||||
|
||||
exp_op = getattr(alt, op_name)
|
||||
result = res_op(skipna=skipna)
|
||||
expected = exp_op(skipna=skipna)
|
||||
# error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no
|
||||
# attribute "freq"
|
||||
freq = ser.dtype.freq # type: ignore[union-attr]
|
||||
expected = Period._from_ordinal(int(expected), freq=freq)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
else:
|
||||
return super().check_reduce(ser, op_name, skipna)
|
||||
|
||||
@pytest.mark.parametrize("periods", [1, -2])
|
||||
def test_diff(self, data, periods):
|
||||
if is_platform_windows() and np_version_gte1p24:
|
||||
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
|
||||
super().test_diff(data, periods)
|
||||
else:
|
||||
super().test_diff(data, periods)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map(self, data, na_action):
|
||||
result = data.map(lambda x: x, na_action=na_action)
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
|
||||
class Test2DCompat(base.NDArrayBacked2DTests):
|
||||
pass
|
@ -0,0 +1,503 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import SparseArray
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def make_data(fill_value):
|
||||
rng = np.random.default_rng(2)
|
||||
if np.isnan(fill_value):
|
||||
data = rng.uniform(size=100)
|
||||
else:
|
||||
data = rng.integers(1, 100, size=100, dtype=int)
|
||||
if data[0] == data[1]:
|
||||
data[0] += 1
|
||||
|
||||
data[2::3] = fill_value
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return SparseDtype()
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data(request):
|
||||
"""Length-100 PeriodArray for semantics test."""
|
||||
res = SparseArray(make_data(request.param), fill_value=request.param)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
return SparseArray(np.ones(100) * 2)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing(request):
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
return SparseArray([np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_repeated(request):
|
||||
"""Return different versions of data for count times"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield SparseArray(make_data(request.param), fill_value=request.param)
|
||||
|
||||
yield gen
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_sorting(request):
|
||||
return SparseArray([2, 3, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_missing_for_sorting(request):
|
||||
return SparseArray([2, np.nan, 1], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
return lambda left, right: pd.isna(left) and pd.isna(right)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_grouping(request):
|
||||
return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, np.nan])
|
||||
def data_for_compare(request):
|
||||
return SparseArray([0, 0, np.nan, -2, -1, 4, 2, 3, 0, 0], fill_value=request.param)
|
||||
|
||||
|
||||
class TestSparseArray(base.ExtensionTests):
|
||||
def _supports_reduction(self, obj, op_name: str) -> bool:
|
||||
return True
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
|
||||
if all_numeric_reductions in [
|
||||
"prod",
|
||||
"median",
|
||||
"var",
|
||||
"std",
|
||||
"sem",
|
||||
"skew",
|
||||
"kurt",
|
||||
]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="This should be viable but is not implemented"
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
elif (
|
||||
all_numeric_reductions in ["sum", "max", "min", "mean"]
|
||||
and data.dtype.kind == "f"
|
||||
and not skipna
|
||||
):
|
||||
mark = pytest.mark.xfail(reason="getting a non-nan float")
|
||||
request.node.add_marker(mark)
|
||||
|
||||
super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
|
||||
if all_numeric_reductions in [
|
||||
"prod",
|
||||
"median",
|
||||
"var",
|
||||
"std",
|
||||
"sem",
|
||||
"skew",
|
||||
"kurt",
|
||||
]:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="This should be viable but is not implemented"
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
elif (
|
||||
all_numeric_reductions in ["sum", "max", "min", "mean"]
|
||||
and data.dtype.kind == "f"
|
||||
and not skipna
|
||||
):
|
||||
mark = pytest.mark.xfail(reason="ExtensionArray NA mask are different")
|
||||
request.node.add_marker(mark)
|
||||
|
||||
super().test_reduce_frame(data, all_numeric_reductions, skipna)
|
||||
|
||||
def _check_unsupported(self, data):
|
||||
if data.dtype == SparseDtype(int, 0):
|
||||
pytest.skip("Can't store nan in int array.")
|
||||
|
||||
def test_concat_mixed_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/20762
|
||||
# This should be the same, aside from concat([sparse, float])
|
||||
df1 = pd.DataFrame({"A": data[:3]})
|
||||
df2 = pd.DataFrame({"A": [1, 2, 3]})
|
||||
df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category")
|
||||
dfs = [df1, df2, df3]
|
||||
|
||||
# dataframes
|
||||
result = pd.concat(dfs)
|
||||
expected = pd.concat(
|
||||
[x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The previous implementation of stack is deprecated"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[
|
||||
["A", "B"],
|
||||
pd.MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b")], names=["outer", "inner"]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("future_stack", [True, False])
|
||||
def test_stack(self, data, columns, future_stack):
|
||||
super().test_stack(data, columns, future_stack)
|
||||
|
||||
def test_concat_columns(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_concat_columns(data, na_value)
|
||||
|
||||
def test_concat_extension_arrays_copy_false(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_concat_extension_arrays_copy_false(data, na_value)
|
||||
|
||||
def test_align(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align(data, na_value)
|
||||
|
||||
def test_align_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align_frame(data, na_value)
|
||||
|
||||
def test_align_series_frame(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_align_series_frame(data, na_value)
|
||||
|
||||
def test_merge(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_merge(data, na_value)
|
||||
|
||||
def test_get(self, data):
|
||||
ser = pd.Series(data, index=[2 * i for i in range(len(data))])
|
||||
if np.isnan(ser.values.fill_value):
|
||||
assert np.isnan(ser.get(4)) and np.isnan(ser.iloc[2])
|
||||
else:
|
||||
assert ser.get(4) == ser.iloc[2]
|
||||
assert ser.get(2) == ser.iloc[1]
|
||||
|
||||
def test_reindex(self, data, na_value):
|
||||
self._check_unsupported(data)
|
||||
super().test_reindex(data, na_value)
|
||||
|
||||
def test_isna(self, data_missing):
|
||||
sarr = SparseArray(data_missing)
|
||||
expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
|
||||
expected = SparseArray([True, False], dtype=expected_dtype)
|
||||
result = sarr.isna()
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# test isna for arr without na
|
||||
sarr = sarr.fillna(0)
|
||||
expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
|
||||
expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype)
|
||||
tm.assert_equal(sarr.isna(), expected)
|
||||
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
warns = (PerformanceWarning, FutureWarning)
|
||||
with tm.assert_produces_warning(warns, check_stacklevel=False):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
def test_fillna_no_op_returns_copy(self, data, request):
|
||||
if np.isnan(data.fill_value):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="returns array with different fill value")
|
||||
)
|
||||
super().test_fillna_no_op_returns_copy(data)
|
||||
|
||||
@pytest.mark.xfail(reason="Unsupported")
|
||||
def test_fillna_series(self, data_missing):
|
||||
# this one looks doable.
|
||||
# TODO: this fails bc we do not pass through data_missing. If we did,
|
||||
# the 0-fill case would xpass
|
||||
super().test_fillna_series()
|
||||
|
||||
def test_fillna_frame(self, data_missing):
|
||||
# Have to override to specify that fill_value will change.
|
||||
fill_value = data_missing[1]
|
||||
|
||||
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
||||
|
||||
if pd.isna(data_missing.fill_value):
|
||||
dtype = SparseDtype(data_missing.dtype, fill_value)
|
||||
else:
|
||||
dtype = data_missing.dtype
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype),
|
||||
"B": [1, 2],
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
_combine_le_expected_dtype = "Sparse[bool]"
|
||||
|
||||
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
|
||||
arr = data_missing.take([1, 1])
|
||||
df = pd.DataFrame({"A": arr}, copy=False)
|
||||
|
||||
filled_val = df.iloc[0, 0]
|
||||
result = df.fillna(filled_val)
|
||||
|
||||
if hasattr(df._mgr, "blocks"):
|
||||
if using_copy_on_write:
|
||||
assert df.values.base is result.values.base
|
||||
else:
|
||||
assert df.values.base is not result.values.base
|
||||
assert df.A._values.to_dense() is arr.to_dense()
|
||||
|
||||
def test_fillna_copy_series(self, data_missing, using_copy_on_write):
|
||||
arr = data_missing.take([1, 1])
|
||||
ser = pd.Series(arr, copy=False)
|
||||
|
||||
filled_val = ser[0]
|
||||
result = ser.fillna(filled_val)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert ser._values is result._values
|
||||
|
||||
else:
|
||||
assert ser._values is not result._values
|
||||
assert ser._values.to_dense() is arr.to_dense()
|
||||
|
||||
@pytest.mark.xfail(reason="Not Applicable")
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
super().test_fillna_length_mismatch(data_missing)
|
||||
|
||||
def test_where_series(self, data, na_value):
|
||||
assert data[0] != data[1]
|
||||
cls = type(data)
|
||||
a, b = data[:2]
|
||||
|
||||
ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))
|
||||
|
||||
cond = np.array([True, True, False, False])
|
||||
result = ser.where(cond)
|
||||
|
||||
new_dtype = SparseDtype("float", 0.0)
|
||||
expected = pd.Series(
|
||||
cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
|
||||
cond = np.array([True, False, True, True])
|
||||
result = ser.where(cond, other)
|
||||
expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False):
|
||||
super().test_searchsorted(data_for_sorting, as_series)
|
||||
|
||||
def test_shift_0_periods(self, data):
|
||||
# GH#33856 shifting with periods=0 should return a copy, not same obj
|
||||
result = data.shift(0)
|
||||
|
||||
data._sparse_values[0] = data._sparse_values[1]
|
||||
assert result._sparse_values[0] != result._sparse_values[1]
|
||||
|
||||
@pytest.mark.parametrize("method", ["argmax", "argmin"])
|
||||
def test_argmin_argmax_all_na(self, method, data, na_value):
|
||||
# overriding because Sparse[int64, 0] cannot handle na_value
|
||||
self._check_unsupported(data)
|
||||
super().test_argmin_argmax_all_na(method, data, na_value)
|
||||
|
||||
@pytest.mark.fails_arm_wheels
|
||||
@pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame])
|
||||
def test_equals(self, data, na_value, as_series, box):
|
||||
self._check_unsupported(data)
|
||||
super().test_equals(data, na_value, as_series, box)
|
||||
|
||||
@pytest.mark.fails_arm_wheels
|
||||
def test_equals_same_data_different_object(self, data):
|
||||
super().test_equals_same_data_different_object(data)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, na_action, expected",
|
||||
[
|
||||
(lambda x: x, None, SparseArray([1.0, np.nan])),
|
||||
(lambda x: x, "ignore", SparseArray([1.0, np.nan])),
|
||||
(str, None, SparseArray(["1.0", "nan"], fill_value="nan")),
|
||||
(str, "ignore", SparseArray(["1.0", np.nan])),
|
||||
],
|
||||
)
|
||||
def test_map(self, func, na_action, expected):
|
||||
# GH52096
|
||||
data = SparseArray([1, np.nan])
|
||||
result = data.map(func, na_action=na_action)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map_raises(self, data, na_action):
|
||||
# GH52096
|
||||
msg = "fill value in the sparse values not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data.map(lambda x: np.nan, na_action=na_action)
|
||||
|
||||
@pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")
|
||||
def test_astype_string(self, data, nullable_string_dtype):
|
||||
# TODO: this fails bc we do not pass through nullable_string_dtype;
|
||||
# If we did, the 0-cases would xpass
|
||||
super().test_astype_string(data)
|
||||
|
||||
series_scalar_exc = None
|
||||
frame_scalar_exc = None
|
||||
divmod_exc = None
|
||||
series_array_exc = None
|
||||
|
||||
def _skip_if_different_combine(self, data):
|
||||
if data.fill_value == 0:
|
||||
# arith ops call on dtype.fill_value so that the sparsity
|
||||
# is maintained. Combine can't be called on a dtype in
|
||||
# general, so we can't make the expected. This is tested elsewhere
|
||||
pytest.skip("Incorrected expected from Series.combine and tested elsewhere")
|
||||
|
||||
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super().test_arith_series_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_series_with_array(self, data, all_arithmetic_operators):
|
||||
self._skip_if_different_combine(data)
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
|
||||
if data.dtype.fill_value != 0:
|
||||
pass
|
||||
elif all_arithmetic_operators.strip("_") not in [
|
||||
"mul",
|
||||
"rmul",
|
||||
"floordiv",
|
||||
"rfloordiv",
|
||||
"pow",
|
||||
"mod",
|
||||
"rmod",
|
||||
]:
|
||||
mark = pytest.mark.xfail(reason="result dtype.fill_value mismatch")
|
||||
request.applymarker(mark)
|
||||
super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
|
||||
|
||||
def _compare_other(
|
||||
self, ser: pd.Series, data_for_compare: SparseArray, comparison_op, other
|
||||
):
|
||||
op = comparison_op
|
||||
|
||||
result = op(data_for_compare, other)
|
||||
if isinstance(other, pd.Series):
|
||||
assert isinstance(result, pd.Series)
|
||||
assert isinstance(result.dtype, SparseDtype)
|
||||
else:
|
||||
assert isinstance(result, SparseArray)
|
||||
assert result.dtype.subtype == np.bool_
|
||||
|
||||
if isinstance(other, pd.Series):
|
||||
fill_value = op(data_for_compare.fill_value, other._values.fill_value)
|
||||
expected = SparseArray(
|
||||
op(data_for_compare.to_dense(), np.asarray(other)),
|
||||
fill_value=fill_value,
|
||||
dtype=np.bool_,
|
||||
)
|
||||
|
||||
else:
|
||||
fill_value = np.all(
|
||||
op(np.asarray(data_for_compare.fill_value), np.asarray(other))
|
||||
)
|
||||
|
||||
expected = SparseArray(
|
||||
op(data_for_compare.to_dense(), np.asarray(other)),
|
||||
fill_value=fill_value,
|
||||
dtype=np.bool_,
|
||||
)
|
||||
if isinstance(other, pd.Series):
|
||||
# error: Incompatible types in assignment
|
||||
expected = pd.Series(expected) # type: ignore[assignment]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_scalar(self, data_for_compare: SparseArray, comparison_op):
|
||||
ser = pd.Series(data_for_compare)
|
||||
self._compare_other(ser, data_for_compare, comparison_op, 0)
|
||||
self._compare_other(ser, data_for_compare, comparison_op, 1)
|
||||
self._compare_other(ser, data_for_compare, comparison_op, -1)
|
||||
self._compare_other(ser, data_for_compare, comparison_op, np.nan)
|
||||
|
||||
def test_array(self, data_for_compare: SparseArray, comparison_op, request):
|
||||
if data_for_compare.dtype.fill_value == 0 and comparison_op.__name__ in [
|
||||
"eq",
|
||||
"ge",
|
||||
"le",
|
||||
]:
|
||||
mark = pytest.mark.xfail(reason="Wrong fill_value")
|
||||
request.applymarker(mark)
|
||||
|
||||
arr = np.linspace(-4, 5, 10)
|
||||
ser = pd.Series(data_for_compare)
|
||||
self._compare_other(ser, data_for_compare, comparison_op, arr)
|
||||
|
||||
def test_sparse_array(self, data_for_compare: SparseArray, comparison_op, request):
|
||||
if data_for_compare.dtype.fill_value == 0 and comparison_op.__name__ != "gt":
|
||||
mark = pytest.mark.xfail(reason="Wrong fill_value")
|
||||
request.applymarker(mark)
|
||||
|
||||
ser = pd.Series(data_for_compare)
|
||||
arr = data_for_compare + 1
|
||||
self._compare_other(ser, data_for_compare, comparison_op, arr)
|
||||
arr = data_for_compare * 2
|
||||
self._compare_other(ser, data_for_compare, comparison_op, arr)
|
||||
|
||||
@pytest.mark.xfail(reason="Different repr")
|
||||
def test_array_repr(self, data, size):
|
||||
super().test_array_repr(data, size)
|
||||
|
||||
@pytest.mark.xfail(reason="result does not match expected")
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super().test_groupby_extension_agg(as_index, data_for_grouping)
|
||||
|
||||
|
||||
def test_array_type_with_arg(dtype):
|
||||
assert dtype.construct_array_type() is SparseArray
|
@ -0,0 +1,277 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite, and should contain no other tests.
|
||||
The test suite for the full functionality of the array is located in
|
||||
`pandas/tests/arrays/`.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
Additional tests should either be added to one of the BaseExtensionTests
|
||||
classes (if they are relevant for the extension interface for all dtypes), or
|
||||
be added to the array-specific tests in `pandas/tests/arrays/`.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import string
|
||||
from typing import cast
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import HAS_PYARROW
|
||||
|
||||
from pandas.core.dtypes.base import StorageExtensionDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_string_dtype
|
||||
from pandas.core.arrays import ArrowStringArray
|
||||
from pandas.core.arrays.string_ import StringDtype
|
||||
from pandas.tests.arrays.string_.test_string import string_dtype_highest_priority
|
||||
from pandas.tests.extension import base
|
||||
|
||||
|
||||
def maybe_split_array(arr, chunked):
|
||||
if not chunked:
|
||||
return arr
|
||||
elif arr.dtype.storage != "pyarrow":
|
||||
return arr
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
arrow_array = arr._pa_array
|
||||
split = len(arrow_array) // 2
|
||||
arrow_array = pa.chunked_array(
|
||||
[*arrow_array[:split].chunks, *arrow_array[split:].chunks]
|
||||
)
|
||||
assert arrow_array.num_chunks == 2
|
||||
return type(arr)(arrow_array)
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def chunked(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype(string_dtype_arguments):
|
||||
storage, na_value = string_dtype_arguments
|
||||
return StringDtype(storage=storage, na_value=na_value)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype, chunked):
|
||||
strings = np.random.default_rng(2).choice(list(string.ascii_letters), size=100)
|
||||
while strings[0] == strings[1]:
|
||||
strings = np.random.default_rng(2).choice(list(string.ascii_letters), size=100)
|
||||
|
||||
arr = dtype.construct_array_type()._from_sequence(strings, dtype=dtype)
|
||||
return maybe_split_array(arr, chunked)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype, chunked):
|
||||
"""Length 2 array with [NA, Valid]"""
|
||||
arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"], dtype=dtype)
|
||||
return maybe_split_array(arr, chunked)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting(dtype, chunked):
|
||||
arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"], dtype=dtype)
|
||||
return maybe_split_array(arr, chunked)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting(dtype, chunked):
|
||||
arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"], dtype=dtype)
|
||||
return maybe_split_array(arr, chunked)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping(dtype, chunked):
|
||||
arr = dtype.construct_array_type()._from_sequence(
|
||||
["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"], dtype=dtype
|
||||
)
|
||||
return maybe_split_array(arr, chunked)
|
||||
|
||||
|
||||
class TestStringArray(base.ExtensionTests):
|
||||
def test_eq_with_str(self, dtype):
|
||||
super().test_eq_with_str(dtype)
|
||||
|
||||
if dtype.na_value is pd.NA:
|
||||
# only the NA-variant supports parametrized string alias
|
||||
assert dtype == f"string[{dtype.storage}]"
|
||||
elif dtype.storage == "pyarrow":
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
assert dtype == "string[pyarrow_numpy]"
|
||||
|
||||
def test_is_not_string_type(self, dtype):
|
||||
# Different from BaseDtypeTests.test_is_not_string_type
|
||||
# because StringDtype is a string type
|
||||
assert is_string_dtype(dtype)
|
||||
|
||||
def test_is_dtype_from_name(self, dtype, using_infer_string):
|
||||
if dtype.na_value is np.nan and not using_infer_string:
|
||||
result = type(dtype).is_dtype(dtype.name)
|
||||
assert result is False
|
||||
else:
|
||||
super().test_is_dtype_from_name(dtype)
|
||||
|
||||
def test_construct_from_string_own_name(self, dtype, using_infer_string):
|
||||
if dtype.na_value is np.nan and not using_infer_string:
|
||||
with pytest.raises(TypeError, match="Cannot construct a 'StringDtype'"):
|
||||
dtype.construct_from_string(dtype.name)
|
||||
else:
|
||||
super().test_construct_from_string_own_name(dtype)
|
||||
|
||||
def test_view(self, data):
|
||||
if data.dtype.storage == "pyarrow":
|
||||
pytest.skip(reason="2D support not implemented for ArrowStringArray")
|
||||
super().test_view(data)
|
||||
|
||||
def test_from_dtype(self, data):
|
||||
# base test uses string representation of dtype
|
||||
pass
|
||||
|
||||
def test_transpose(self, data):
|
||||
if data.dtype.storage == "pyarrow":
|
||||
pytest.skip(reason="2D support not implemented for ArrowStringArray")
|
||||
super().test_transpose(data)
|
||||
|
||||
def test_setitem_preserves_views(self, data):
|
||||
if data.dtype.storage == "pyarrow":
|
||||
pytest.skip(reason="2D support not implemented for ArrowStringArray")
|
||||
super().test_setitem_preserves_views(data)
|
||||
|
||||
def test_dropna_array(self, data_missing):
|
||||
result = data_missing.dropna()
|
||||
expected = data_missing[[1]]
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
data = data[~data.isna()]
|
||||
|
||||
valid = data[0]
|
||||
result = data.fillna(valid)
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
result = data.fillna(method="backfill")
|
||||
assert result is not data
|
||||
tm.assert_extension_array_equal(result, data)
|
||||
|
||||
def _get_expected_exception(
|
||||
self, op_name: str, obj, other
|
||||
) -> type[Exception] | tuple[type[Exception], ...] | None:
|
||||
if op_name in [
|
||||
"__mod__",
|
||||
"__rmod__",
|
||||
"__divmod__",
|
||||
"__rdivmod__",
|
||||
"__pow__",
|
||||
"__rpow__",
|
||||
]:
|
||||
return TypeError
|
||||
elif op_name in ["__mul__", "__rmul__"]:
|
||||
# Can only multiply strings by integers
|
||||
return TypeError
|
||||
elif op_name in [
|
||||
"__truediv__",
|
||||
"__rtruediv__",
|
||||
"__floordiv__",
|
||||
"__rfloordiv__",
|
||||
"__sub__",
|
||||
"__rsub__",
|
||||
]:
|
||||
return TypeError
|
||||
|
||||
return None
|
||||
|
||||
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
|
||||
return (
|
||||
op_name in ["min", "max", "sum"]
|
||||
or ser.dtype.na_value is np.nan # type: ignore[union-attr]
|
||||
and op_name in ("any", "all")
|
||||
)
|
||||
|
||||
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
|
||||
assert isinstance(ser.dtype, StorageExtensionDtype)
|
||||
return op_name in ["cummin", "cummax", "cumsum"]
|
||||
|
||||
def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
|
||||
dtype = cast(StringDtype, tm.get_dtype(obj))
|
||||
if op_name in ["__add__", "__radd__"]:
|
||||
cast_to = dtype
|
||||
dtype_other = tm.get_dtype(other) if not isinstance(other, str) else None
|
||||
if isinstance(dtype_other, StringDtype):
|
||||
cast_to = string_dtype_highest_priority(dtype, dtype_other)
|
||||
elif dtype.na_value is np.nan:
|
||||
cast_to = np.bool_ # type: ignore[assignment]
|
||||
elif dtype.storage == "pyarrow":
|
||||
cast_to = "bool[pyarrow]" # type: ignore[assignment]
|
||||
else:
|
||||
cast_to = "boolean" # type: ignore[assignment]
|
||||
return pointwise_result.astype(cast_to)
|
||||
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
ser = pd.Series(data)
|
||||
self._compare_other(ser, data, comparison_op, "abc")
|
||||
|
||||
def test_combine_add(self, data_repeated, using_infer_string, request):
|
||||
dtype = next(data_repeated(1)).dtype
|
||||
if using_infer_string and (
|
||||
(dtype.na_value is pd.NA) and dtype.storage == "python"
|
||||
):
|
||||
mark = pytest.mark.xfail(
|
||||
reason="The pointwise operation result will be inferred to "
|
||||
"string[nan, pyarrow], which does not match the input dtype"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
super().test_combine_add(data_repeated)
|
||||
|
||||
def test_arith_series_with_array(
|
||||
self, data, all_arithmetic_operators, using_infer_string, request
|
||||
):
|
||||
dtype = data.dtype
|
||||
if (
|
||||
using_infer_string
|
||||
and all_arithmetic_operators == "__radd__"
|
||||
and dtype.na_value is pd.NA
|
||||
and (HAS_PYARROW or dtype.storage == "pyarrow")
|
||||
):
|
||||
# TODO(infer_string)
|
||||
mark = pytest.mark.xfail(
|
||||
reason="The pointwise operation result will be inferred to "
|
||||
"string[nan, pyarrow], which does not match the input dtype"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
super().test_arith_series_with_array(data, all_arithmetic_operators)
|
||||
|
||||
|
||||
class Test2DCompat(base.Dim2CompatTests):
|
||||
@pytest.fixture(autouse=True)
|
||||
def arrow_not_supported(self, data):
|
||||
if isinstance(data, ArrowStringArray):
|
||||
pytest.skip(reason="2D support not implemented for ArrowStringArray")
|
||||
|
||||
|
||||
def test_searchsorted_with_na_raises(data_for_sorting, as_series):
|
||||
# GH50447
|
||||
b, c, a = data_for_sorting
|
||||
arr = data_for_sorting.take([2, 0, 1]) # to get [a, b, c]
|
||||
arr[-1] = pd.NA
|
||||
|
||||
if as_series:
|
||||
arr = pd.Series(arr)
|
||||
|
||||
msg = (
|
||||
"searchsorted requires array to be sorted, "
|
||||
"which is impossible with NAs present."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.searchsorted(b)
|
Reference in New Issue
Block a user