done
This commit is contained in:
		| @ -0,0 +1,131 @@ | ||||
| """ | ||||
| Base test suite for extension arrays. | ||||
|  | ||||
| These tests are intended for third-party libraries to subclass to validate | ||||
| that their extension arrays and dtypes satisfy the interface. Moving or | ||||
| renaming the tests should not be done lightly. | ||||
|  | ||||
| Libraries are expected to implement a few pytest fixtures to provide data | ||||
| for the tests. The fixtures may be located in either | ||||
|  | ||||
| * The same module as your test class. | ||||
| * A ``conftest.py`` in the same directory as your test class. | ||||
|  | ||||
| The full list of fixtures may be found in the ``conftest.py`` next to this | ||||
| file. | ||||
|  | ||||
| .. code-block:: python | ||||
|  | ||||
|    import pytest | ||||
|    from pandas.tests.extension.base import BaseDtypeTests | ||||
|  | ||||
|  | ||||
|    @pytest.fixture | ||||
|    def dtype(): | ||||
|        return MyDtype() | ||||
|  | ||||
|  | ||||
|    class TestMyDtype(BaseDtypeTests): | ||||
|        pass | ||||
|  | ||||
|  | ||||
| Your class ``TestDtype`` will inherit all the tests defined on | ||||
| ``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` | ||||
| wherever the test requires it. You're free to implement additional tests. | ||||
|  | ||||
| """ | ||||
| from pandas.tests.extension.base.accumulate import BaseAccumulateTests | ||||
| from pandas.tests.extension.base.casting import BaseCastingTests | ||||
| from pandas.tests.extension.base.constructors import BaseConstructorsTests | ||||
| from pandas.tests.extension.base.dim2 import (  # noqa: F401 | ||||
|     Dim2CompatTests, | ||||
|     NDArrayBacked2DTests, | ||||
| ) | ||||
| from pandas.tests.extension.base.dtype import BaseDtypeTests | ||||
| from pandas.tests.extension.base.getitem import BaseGetitemTests | ||||
| from pandas.tests.extension.base.groupby import BaseGroupbyTests | ||||
| from pandas.tests.extension.base.index import BaseIndexTests | ||||
| from pandas.tests.extension.base.interface import BaseInterfaceTests | ||||
| from pandas.tests.extension.base.io import BaseParsingTests | ||||
| from pandas.tests.extension.base.methods import BaseMethodsTests | ||||
| from pandas.tests.extension.base.missing import BaseMissingTests | ||||
| from pandas.tests.extension.base.ops import (  # noqa: F401 | ||||
|     BaseArithmeticOpsTests, | ||||
|     BaseComparisonOpsTests, | ||||
|     BaseOpsUtil, | ||||
|     BaseUnaryOpsTests, | ||||
| ) | ||||
| from pandas.tests.extension.base.printing import BasePrintingTests | ||||
| from pandas.tests.extension.base.reduce import BaseReduceTests | ||||
| from pandas.tests.extension.base.reshaping import BaseReshapingTests | ||||
| from pandas.tests.extension.base.setitem import BaseSetitemTests | ||||
|  | ||||
|  | ||||
| # One test class that you can inherit as an alternative to inheriting all the | ||||
| # test classes above. | ||||
| # Note 1) this excludes Dim2CompatTests and NDArrayBacked2DTests. | ||||
| # Note 2) this uses BaseReduceTests and and _not_ BaseBooleanReduceTests, | ||||
| #  BaseNoReduceTests, or BaseNumericReduceTests | ||||
| class ExtensionTests( | ||||
|     BaseAccumulateTests, | ||||
|     BaseCastingTests, | ||||
|     BaseConstructorsTests, | ||||
|     BaseDtypeTests, | ||||
|     BaseGetitemTests, | ||||
|     BaseGroupbyTests, | ||||
|     BaseIndexTests, | ||||
|     BaseInterfaceTests, | ||||
|     BaseParsingTests, | ||||
|     BaseMethodsTests, | ||||
|     BaseMissingTests, | ||||
|     BaseArithmeticOpsTests, | ||||
|     BaseComparisonOpsTests, | ||||
|     BaseUnaryOpsTests, | ||||
|     BasePrintingTests, | ||||
|     BaseReduceTests, | ||||
|     BaseReshapingTests, | ||||
|     BaseSetitemTests, | ||||
|     Dim2CompatTests, | ||||
| ): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| def __getattr__(name: str): | ||||
|     import warnings | ||||
|  | ||||
|     if name == "BaseNoReduceTests": | ||||
|         warnings.warn( | ||||
|             "BaseNoReduceTests is deprecated and will be removed in a " | ||||
|             "future version. Use BaseReduceTests and override " | ||||
|             "`_supports_reduction` instead.", | ||||
|             FutureWarning, | ||||
|         ) | ||||
|         from pandas.tests.extension.base.reduce import BaseNoReduceTests | ||||
|  | ||||
|         return BaseNoReduceTests | ||||
|  | ||||
|     elif name == "BaseNumericReduceTests": | ||||
|         warnings.warn( | ||||
|             "BaseNumericReduceTests is deprecated and will be removed in a " | ||||
|             "future version. Use BaseReduceTests and override " | ||||
|             "`_supports_reduction` instead.", | ||||
|             FutureWarning, | ||||
|         ) | ||||
|         from pandas.tests.extension.base.reduce import BaseNumericReduceTests | ||||
|  | ||||
|         return BaseNumericReduceTests | ||||
|  | ||||
|     elif name == "BaseBooleanReduceTests": | ||||
|         warnings.warn( | ||||
|             "BaseBooleanReduceTests is deprecated and will be removed in a " | ||||
|             "future version. Use BaseReduceTests and override " | ||||
|             "`_supports_reduction` instead.", | ||||
|             FutureWarning, | ||||
|         ) | ||||
|         from pandas.tests.extension.base.reduce import BaseBooleanReduceTests | ||||
|  | ||||
|         return BaseBooleanReduceTests | ||||
|  | ||||
|     raise AttributeError( | ||||
|         f"module 'pandas.tests.extension.base' has no attribute '{name}'" | ||||
|     ) | ||||
| @ -0,0 +1,40 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class BaseAccumulateTests: | ||||
|     """ | ||||
|     Accumulation specific tests. Generally these only | ||||
|     make sense for numeric/boolean operations. | ||||
|     """ | ||||
|  | ||||
|     def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: | ||||
|         # Do we expect this accumulation to be supported for this dtype? | ||||
|         # We default to assuming "no"; subclass authors should override here. | ||||
|         return False | ||||
|  | ||||
|     def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): | ||||
|         try: | ||||
|             alt = ser.astype("float64") | ||||
|         except (TypeError, ValueError): | ||||
|             # e.g. Period can't be cast to float64 (TypeError) | ||||
|             #      String can't be cast to float64 (ValueError) | ||||
|             alt = ser.astype(object) | ||||
|  | ||||
|         result = getattr(ser, op_name)(skipna=skipna) | ||||
|         expected = getattr(alt, op_name)(skipna=skipna) | ||||
|         tm.assert_series_equal(result, expected, check_dtype=False) | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_accumulate_series(self, data, all_numeric_accumulations, skipna): | ||||
|         op_name = all_numeric_accumulations | ||||
|         ser = pd.Series(data) | ||||
|  | ||||
|         if self._supports_accumulation(ser, op_name): | ||||
|             self.check_accumulate(ser, op_name, skipna) | ||||
|         else: | ||||
|             with pytest.raises((NotImplementedError, TypeError)): | ||||
|                 # TODO: require TypeError for things that will _never_ work? | ||||
|                 getattr(ser, op_name)(skipna=skipna) | ||||
| @ -0,0 +1,2 @@ | ||||
| class BaseExtensionTests: | ||||
|     pass | ||||
| @ -0,0 +1,87 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.internals.blocks import NumpyBlock | ||||
|  | ||||
|  | ||||
| class BaseCastingTests: | ||||
|     """Casting to and from ExtensionDtypes""" | ||||
|  | ||||
|     def test_astype_object_series(self, all_data): | ||||
|         ser = pd.Series(all_data, name="A") | ||||
|         result = ser.astype(object) | ||||
|         assert result.dtype == np.dtype(object) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             blk = result._mgr.blocks[0] | ||||
|             assert isinstance(blk, NumpyBlock) | ||||
|             assert blk.is_object | ||||
|         assert isinstance(result._mgr.array, np.ndarray) | ||||
|         assert result._mgr.array.dtype == np.dtype(object) | ||||
|  | ||||
|     def test_astype_object_frame(self, all_data): | ||||
|         df = pd.DataFrame({"A": all_data}) | ||||
|  | ||||
|         result = df.astype(object) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             blk = result._mgr.blocks[0] | ||||
|             assert isinstance(blk, NumpyBlock), type(blk) | ||||
|             assert blk.is_object | ||||
|         assert isinstance(result._mgr.arrays[0], np.ndarray) | ||||
|         assert result._mgr.arrays[0].dtype == np.dtype(object) | ||||
|  | ||||
|         # check that we can compare the dtypes | ||||
|         comp = result.dtypes == df.dtypes | ||||
|         assert not comp.any() | ||||
|  | ||||
|     def test_tolist(self, data): | ||||
|         result = pd.Series(data).tolist() | ||||
|         expected = list(data) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_astype_str(self, data): | ||||
|         result = pd.Series(data[:2]).astype(str) | ||||
|         expected = pd.Series([str(x) for x in data[:2]], dtype=str) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "nullable_string_dtype", | ||||
|         [ | ||||
|             "string[python]", | ||||
|             pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_string(self, data, nullable_string_dtype): | ||||
|         # GH-33465, GH#45326 as of 2.0 we decode bytes instead of calling str(obj) | ||||
|         result = pd.Series(data[:5]).astype(nullable_string_dtype) | ||||
|         expected = pd.Series( | ||||
|             [str(x) if not isinstance(x, bytes) else x.decode() for x in data[:5]], | ||||
|             dtype=nullable_string_dtype, | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_to_numpy(self, data): | ||||
|         expected = np.asarray(data) | ||||
|  | ||||
|         result = data.to_numpy() | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         result = pd.Series(data).to_numpy() | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_astype_empty_dataframe(self, dtype): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33113 | ||||
|         df = pd.DataFrame() | ||||
|         result = df.astype(dtype) | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     @pytest.mark.parametrize("copy", [True, False]) | ||||
|     def test_astype_own_type(self, data, copy): | ||||
|         # ensure that astype returns the original object for equal dtype and copy=False | ||||
|         # https://github.com/pandas-dev/pandas/issues/28488 | ||||
|         result = data.astype(data.dtype, copy=copy) | ||||
|         assert (result is data) is (not copy) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
| @ -0,0 +1,142 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.extensions import ExtensionArray | ||||
| from pandas.core.internals.blocks import EABackedBlock | ||||
|  | ||||
|  | ||||
| class BaseConstructorsTests: | ||||
|     def test_from_sequence_from_cls(self, data): | ||||
|         result = type(data)._from_sequence(data, dtype=data.dtype) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         data = data[:0] | ||||
|         result = type(data)._from_sequence(data, dtype=data.dtype) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|     def test_array_from_scalars(self, data): | ||||
|         scalars = [data[0], data[1], data[2]] | ||||
|         result = data._from_sequence(scalars, dtype=data.dtype) | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|     def test_series_constructor(self, data): | ||||
|         result = pd.Series(data, copy=False) | ||||
|         assert result.dtype == data.dtype | ||||
|         assert len(result) == len(data) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert isinstance(result._mgr.blocks[0], EABackedBlock) | ||||
|         assert result._mgr.array is data | ||||
|  | ||||
|         # Series[EA] is unboxed / boxed correctly | ||||
|         result2 = pd.Series(result) | ||||
|         assert result2.dtype == data.dtype | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert isinstance(result2._mgr.blocks[0], EABackedBlock) | ||||
|  | ||||
|     def test_series_constructor_no_data_with_index(self, dtype, na_value): | ||||
|         result = pd.Series(index=[1, 2, 3], dtype=dtype) | ||||
|         expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH 33559 - empty index | ||||
|         result = pd.Series(index=[], dtype=dtype) | ||||
|         expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_constructor_scalar_na_with_index(self, dtype, na_value): | ||||
|         result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype) | ||||
|         expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_constructor_scalar_with_index(self, data, dtype): | ||||
|         scalar = data[0] | ||||
|         result = pd.Series(scalar, index=[1, 2, 3], dtype=dtype) | ||||
|         expected = pd.Series([scalar] * 3, index=[1, 2, 3], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = pd.Series(scalar, index=["foo"], dtype=dtype) | ||||
|         expected = pd.Series([scalar], index=["foo"], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("from_series", [True, False]) | ||||
|     def test_dataframe_constructor_from_dict(self, data, from_series): | ||||
|         if from_series: | ||||
|             data = pd.Series(data) | ||||
|         result = pd.DataFrame({"A": data}) | ||||
|         assert result.dtypes["A"] == data.dtype | ||||
|         assert result.shape == (len(data), 1) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert isinstance(result._mgr.blocks[0], EABackedBlock) | ||||
|         assert isinstance(result._mgr.arrays[0], ExtensionArray) | ||||
|  | ||||
|     def test_dataframe_from_series(self, data): | ||||
|         result = pd.DataFrame(pd.Series(data)) | ||||
|         assert result.dtypes[0] == data.dtype | ||||
|         assert result.shape == (len(data), 1) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert isinstance(result._mgr.blocks[0], EABackedBlock) | ||||
|         assert isinstance(result._mgr.arrays[0], ExtensionArray) | ||||
|  | ||||
|     def test_series_given_mismatched_index_raises(self, data): | ||||
|         msg = r"Length of values \(3\) does not match length of index \(5\)" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             pd.Series(data[:3], index=[0, 1, 2, 3, 4]) | ||||
|  | ||||
|     def test_from_dtype(self, data): | ||||
|         # construct from our dtype & string dtype | ||||
|         dtype = data.dtype | ||||
|  | ||||
|         expected = pd.Series(data) | ||||
|         result = pd.Series(list(data), dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = pd.Series(list(data), dtype=str(dtype)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # gh-30280 | ||||
|  | ||||
|         expected = pd.DataFrame(data).astype(dtype) | ||||
|         result = pd.DataFrame(list(data), dtype=dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = pd.DataFrame(list(data), dtype=str(dtype)) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_pandas_array(self, data): | ||||
|         # pd.array(extension_array) should be idempotent... | ||||
|         result = pd.array(data) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|     def test_pandas_array_dtype(self, data): | ||||
|         # ... but specifying dtype will override idempotency | ||||
|         result = pd.array(data, dtype=np.dtype(object)) | ||||
|         expected = pd.arrays.NumpyExtensionArray(np.asarray(data, dtype=object)) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_construct_empty_dataframe(self, dtype): | ||||
|         # GH 33623 | ||||
|         result = pd.DataFrame(columns=["a"], dtype=dtype) | ||||
|         expected = pd.DataFrame( | ||||
|             {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0) | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_empty(self, dtype): | ||||
|         cls = dtype.construct_array_type() | ||||
|         result = cls._empty((4,), dtype=dtype) | ||||
|         assert isinstance(result, cls) | ||||
|         assert result.dtype == dtype | ||||
|         assert result.shape == (4,) | ||||
|  | ||||
|         # GH#19600 method on ExtensionDtype | ||||
|         result2 = dtype.empty((4,)) | ||||
|         assert isinstance(result2, cls) | ||||
|         assert result2.dtype == dtype | ||||
|         assert result2.shape == (4,) | ||||
|  | ||||
|         result2 = dtype.empty(4) | ||||
|         assert isinstance(result2, cls) | ||||
|         assert result2.dtype == dtype | ||||
|         assert result2.shape == (4,) | ||||
							
								
								
									
										345
									
								
								lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										345
									
								
								lib/python3.11/site-packages/pandas/tests/extension/base/dim2.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,345 @@ | ||||
| """ | ||||
| Tests for 2D compatibility. | ||||
| """ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.missing import is_matching_na | ||||
|  | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_bool_dtype, | ||||
|     is_integer_dtype, | ||||
| ) | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE | ||||
|  | ||||
|  | ||||
| class Dim2CompatTests: | ||||
|     # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays. | ||||
|     #  i.e. not for pyarrow-backed EAs. | ||||
|  | ||||
|     @pytest.fixture(autouse=True) | ||||
|     def skip_if_doesnt_support_2d(self, dtype, request): | ||||
|         if not dtype._supports_2d: | ||||
|             node = request.node | ||||
|             # In cases where we are mixed in to ExtensionTests, we only want to | ||||
|             #  skip tests that are defined in Dim2CompatTests | ||||
|             test_func = node._obj | ||||
|             if test_func.__qualname__.startswith("Dim2CompatTests"): | ||||
|                 # TODO: is there a less hacky way of checking this? | ||||
|                 pytest.skip(f"{dtype} does not support 2D.") | ||||
|  | ||||
|     def test_transpose(self, data): | ||||
|         arr2d = data.repeat(2).reshape(-1, 2) | ||||
|         shape = arr2d.shape | ||||
|         assert shape[0] != shape[-1]  # otherwise the rest of the test is useless | ||||
|  | ||||
|         assert arr2d.T.shape == shape[::-1] | ||||
|  | ||||
|     def test_frame_from_2d_array(self, data): | ||||
|         arr2d = data.repeat(2).reshape(-1, 2) | ||||
|  | ||||
|         df = pd.DataFrame(arr2d) | ||||
|         expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_swapaxes(self, data): | ||||
|         arr2d = data.repeat(2).reshape(-1, 2) | ||||
|  | ||||
|         result = arr2d.swapaxes(0, 1) | ||||
|         expected = arr2d.T | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_delete_2d(self, data): | ||||
|         arr2d = data.repeat(3).reshape(-1, 3) | ||||
|  | ||||
|         # axis = 0 | ||||
|         result = arr2d.delete(1, axis=0) | ||||
|         expected = data.delete(1).repeat(3).reshape(-1, 3) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # axis = 1 | ||||
|         result = arr2d.delete(1, axis=1) | ||||
|         expected = data.repeat(2).reshape(-1, 2) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_take_2d(self, data): | ||||
|         arr2d = data.reshape(-1, 1) | ||||
|  | ||||
|         result = arr2d.take([0, 0, -1], axis=0) | ||||
|  | ||||
|         expected = data.take([0, 0, -1]).reshape(-1, 1) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_repr_2d(self, data): | ||||
|         # this could fail in a corner case where an element contained the name | ||||
|         res = repr(data.reshape(1, -1)) | ||||
|         assert res.count(f"<{type(data).__name__}") == 1 | ||||
|  | ||||
|         res = repr(data.reshape(-1, 1)) | ||||
|         assert res.count(f"<{type(data).__name__}") == 1 | ||||
|  | ||||
|     def test_reshape(self, data): | ||||
|         arr2d = data.reshape(-1, 1) | ||||
|         assert arr2d.shape == (data.size, 1) | ||||
|         assert len(arr2d) == len(data) | ||||
|  | ||||
|         arr2d = data.reshape((-1, 1)) | ||||
|         assert arr2d.shape == (data.size, 1) | ||||
|         assert len(arr2d) == len(data) | ||||
|  | ||||
|         with pytest.raises(ValueError): | ||||
|             data.reshape((data.size, 2)) | ||||
|         with pytest.raises(ValueError): | ||||
|             data.reshape(data.size, 2) | ||||
|  | ||||
|     def test_getitem_2d(self, data): | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         result = arr2d[0] | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         with pytest.raises(IndexError): | ||||
|             arr2d[1] | ||||
|  | ||||
|         with pytest.raises(IndexError): | ||||
|             arr2d[-2] | ||||
|  | ||||
|         result = arr2d[:] | ||||
|         tm.assert_extension_array_equal(result, arr2d) | ||||
|  | ||||
|         result = arr2d[:, :] | ||||
|         tm.assert_extension_array_equal(result, arr2d) | ||||
|  | ||||
|         result = arr2d[:, 0] | ||||
|         expected = data[[0]] | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # dimension-expanding getitem on 1D | ||||
|         result = data[:, np.newaxis] | ||||
|         tm.assert_extension_array_equal(result, arr2d.T) | ||||
|  | ||||
|     def test_iter_2d(self, data): | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         objs = list(iter(arr2d)) | ||||
|         assert len(objs) == arr2d.shape[0] | ||||
|  | ||||
|         for obj in objs: | ||||
|             assert isinstance(obj, type(data)) | ||||
|             assert obj.dtype == data.dtype | ||||
|             assert obj.ndim == 1 | ||||
|             assert len(obj) == arr2d.shape[1] | ||||
|  | ||||
|     def test_tolist_2d(self, data): | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         result = arr2d.tolist() | ||||
|         expected = [data.tolist()] | ||||
|  | ||||
|         assert isinstance(result, list) | ||||
|         assert all(isinstance(x, list) for x in result) | ||||
|  | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_concat_2d(self, data): | ||||
|         left = type(data)._concat_same_type([data, data]).reshape(-1, 2) | ||||
|         right = left.copy() | ||||
|  | ||||
|         # axis=0 | ||||
|         result = left._concat_same_type([left, right], axis=0) | ||||
|         expected = data._concat_same_type([data] * 4).reshape(-1, 2) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # axis=1 | ||||
|         result = left._concat_same_type([left, right], axis=1) | ||||
|         assert result.shape == (len(data), 4) | ||||
|         tm.assert_extension_array_equal(result[:, :2], left) | ||||
|         tm.assert_extension_array_equal(result[:, 2:], right) | ||||
|  | ||||
|         # axis > 1 -> invalid | ||||
|         msg = "axis 2 is out of bounds for array of dimension 2" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             left._concat_same_type([left, right], axis=2) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["backfill", "pad"]) | ||||
|     def test_fillna_2d_method(self, data_missing, method): | ||||
|         # pad_or_backfill is always along axis=0 | ||||
|         arr = data_missing.repeat(2).reshape(2, 2) | ||||
|         assert arr[0].isna().all() | ||||
|         assert not arr[1].isna().any() | ||||
|  | ||||
|         result = arr._pad_or_backfill(method=method, limit=None) | ||||
|  | ||||
|         expected = data_missing._pad_or_backfill(method=method).repeat(2).reshape(2, 2) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # Reverse so that backfill is not a no-op. | ||||
|         arr2 = arr[::-1] | ||||
|         assert not arr2[0].isna().any() | ||||
|         assert arr2[1].isna().all() | ||||
|  | ||||
|         result2 = arr2._pad_or_backfill(method=method, limit=None) | ||||
|  | ||||
|         expected2 = ( | ||||
|             data_missing[::-1]._pad_or_backfill(method=method).repeat(2).reshape(2, 2) | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result2, expected2) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) | ||||
|     def test_reductions_2d_axis_none(self, data, method): | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         err_expected = None | ||||
|         err_result = None | ||||
|         try: | ||||
|             expected = getattr(data, method)() | ||||
|         except Exception as err: | ||||
|             # if the 1D reduction is invalid, the 2D reduction should be as well | ||||
|             err_expected = err | ||||
|             try: | ||||
|                 result = getattr(arr2d, method)(axis=None) | ||||
|             except Exception as err2: | ||||
|                 err_result = err2 | ||||
|  | ||||
|         else: | ||||
|             result = getattr(arr2d, method)(axis=None) | ||||
|  | ||||
|         if err_result is not None or err_expected is not None: | ||||
|             assert type(err_result) == type(err_expected) | ||||
|             return | ||||
|  | ||||
|         assert is_matching_na(result, expected) or result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) | ||||
|     @pytest.mark.parametrize("min_count", [0, 1]) | ||||
|     def test_reductions_2d_axis0(self, data, method, min_count): | ||||
|         if min_count == 1 and method not in ["sum", "prod"]: | ||||
|             pytest.skip(f"min_count not relevant for {method}") | ||||
|  | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         kwargs = {} | ||||
|         if method in ["std", "var"]: | ||||
|             # pass ddof=0 so we get all-zero std instead of all-NA std | ||||
|             kwargs["ddof"] = 0 | ||||
|         elif method in ["prod", "sum"]: | ||||
|             kwargs["min_count"] = min_count | ||||
|  | ||||
|         try: | ||||
|             result = getattr(arr2d, method)(axis=0, **kwargs) | ||||
|         except Exception as err: | ||||
|             try: | ||||
|                 getattr(data, method)() | ||||
|             except Exception as err2: | ||||
|                 assert type(err) == type(err2) | ||||
|                 return | ||||
|             else: | ||||
|                 raise AssertionError("Both reductions should raise or neither") | ||||
|  | ||||
|         def get_reduction_result_dtype(dtype): | ||||
|             # windows and 32bit builds will in some cases have int32/uint32 | ||||
|             #  where other builds will have int64/uint64. | ||||
|             if dtype.itemsize == 8: | ||||
|                 return dtype | ||||
|             elif dtype.kind in "ib": | ||||
|                 return NUMPY_INT_TO_DTYPE[np.dtype(int)] | ||||
|             else: | ||||
|                 # i.e. dtype.kind == "u" | ||||
|                 return NUMPY_INT_TO_DTYPE[np.dtype("uint")] | ||||
|  | ||||
|         if method in ["sum", "prod"]: | ||||
|             # std and var are not dtype-preserving | ||||
|             expected = data | ||||
|             if data.dtype.kind in "iub": | ||||
|                 dtype = get_reduction_result_dtype(data.dtype) | ||||
|                 expected = data.astype(dtype) | ||||
|                 assert dtype == expected.dtype | ||||
|  | ||||
|             if min_count == 0: | ||||
|                 fill_value = 1 if method == "prod" else 0 | ||||
|                 expected = expected.fillna(fill_value) | ||||
|  | ||||
|             tm.assert_extension_array_equal(result, expected) | ||||
|         elif method == "median": | ||||
|             # std and var are not dtype-preserving | ||||
|             expected = data | ||||
|             tm.assert_extension_array_equal(result, expected) | ||||
|         elif method in ["mean", "std", "var"]: | ||||
|             if is_integer_dtype(data) or is_bool_dtype(data): | ||||
|                 data = data.astype("Float64") | ||||
|             if method == "mean": | ||||
|                 tm.assert_extension_array_equal(result, data) | ||||
|             else: | ||||
|                 tm.assert_extension_array_equal(result, data - data) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) | ||||
|     def test_reductions_2d_axis1(self, data, method): | ||||
|         arr2d = data.reshape(1, -1) | ||||
|  | ||||
|         try: | ||||
|             result = getattr(arr2d, method)(axis=1) | ||||
|         except Exception as err: | ||||
|             try: | ||||
|                 getattr(data, method)() | ||||
|             except Exception as err2: | ||||
|                 assert type(err) == type(err2) | ||||
|                 return | ||||
|             else: | ||||
|                 raise AssertionError("Both reductions should raise or neither") | ||||
|  | ||||
|         # not necessarily type/dtype-preserving, so weaker assertions | ||||
|         assert result.shape == (1,) | ||||
|         expected_scalar = getattr(data, method)() | ||||
|         res = result[0] | ||||
|         assert is_matching_na(res, expected_scalar) or res == expected_scalar | ||||
|  | ||||
|  | ||||
| class NDArrayBacked2DTests(Dim2CompatTests): | ||||
|     # More specific tests for NDArrayBackedExtensionArray subclasses | ||||
|  | ||||
|     def test_copy_order(self, data): | ||||
|         # We should be matching numpy semantics for the "order" keyword in 'copy' | ||||
|         arr2d = data.repeat(2).reshape(-1, 2) | ||||
|         assert arr2d._ndarray.flags["C_CONTIGUOUS"] | ||||
|  | ||||
|         res = arr2d.copy() | ||||
|         assert res._ndarray.flags["C_CONTIGUOUS"] | ||||
|  | ||||
|         res = arr2d[::2, ::2].copy() | ||||
|         assert res._ndarray.flags["C_CONTIGUOUS"] | ||||
|  | ||||
|         res = arr2d.copy("F") | ||||
|         assert not res._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert res._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         res = arr2d.copy("K") | ||||
|         assert res._ndarray.flags["C_CONTIGUOUS"] | ||||
|  | ||||
|         res = arr2d.T.copy("K") | ||||
|         assert not res._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert res._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         # order not accepted by numpy | ||||
|         msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr2d.copy("Q") | ||||
|  | ||||
|         # neither contiguity | ||||
|         arr_nc = arr2d[::2] | ||||
|         assert not arr_nc._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert not arr_nc._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"] | ||||
|  | ||||
|         assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"] | ||||
|         assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"] | ||||
| @ -0,0 +1,123 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import ( | ||||
|     infer_dtype, | ||||
|     is_object_dtype, | ||||
|     is_string_dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BaseDtypeTests: | ||||
|     """Base class for ExtensionDtype classes""" | ||||
|  | ||||
|     def test_name(self, dtype): | ||||
|         assert isinstance(dtype.name, str) | ||||
|  | ||||
|     def test_kind(self, dtype): | ||||
|         valid = set("biufcmMOSUV") | ||||
|         assert dtype.kind in valid | ||||
|  | ||||
|     def test_is_dtype_from_name(self, dtype): | ||||
|         result = type(dtype).is_dtype(dtype.name) | ||||
|         assert result is True | ||||
|  | ||||
|     def test_is_dtype_unboxes_dtype(self, data, dtype): | ||||
|         assert dtype.is_dtype(data) is True | ||||
|  | ||||
|     def test_is_dtype_from_self(self, dtype): | ||||
|         result = type(dtype).is_dtype(dtype) | ||||
|         assert result is True | ||||
|  | ||||
|     def test_is_dtype_other_input(self, dtype): | ||||
|         assert dtype.is_dtype([1, 2, 3]) is False | ||||
|  | ||||
|     def test_is_not_string_type(self, dtype): | ||||
|         assert not is_string_dtype(dtype) | ||||
|  | ||||
|     def test_is_not_object_type(self, dtype): | ||||
|         assert not is_object_dtype(dtype) | ||||
|  | ||||
|     def test_eq_with_str(self, dtype): | ||||
|         assert dtype == dtype.name | ||||
|         assert dtype != dtype.name + "-suffix" | ||||
|  | ||||
|     def test_eq_with_numpy_object(self, dtype): | ||||
|         assert dtype != np.dtype("object") | ||||
|  | ||||
|     def test_eq_with_self(self, dtype): | ||||
|         assert dtype == dtype | ||||
|         assert dtype != object() | ||||
|  | ||||
|     def test_array_type(self, data, dtype): | ||||
|         assert dtype.construct_array_type() is type(data) | ||||
|  | ||||
|     def test_check_dtype(self, data): | ||||
|         dtype = data.dtype | ||||
|  | ||||
|         # check equivalency for using .dtypes | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": pd.Series(data, dtype=dtype), | ||||
|                 "B": data, | ||||
|                 "C": pd.Series(["foo"] * len(data), dtype=object), | ||||
|                 "D": 1, | ||||
|             } | ||||
|         ) | ||||
|         result = df.dtypes == str(dtype) | ||||
|         assert np.dtype("int64") != "Int64" | ||||
|  | ||||
|         expected = pd.Series([True, True, False, False], index=list("ABCD")) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series([True, True, False, False], index=list("ABCD")) | ||||
|         result = df.dtypes.apply(str) == str(dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_hashable(self, dtype): | ||||
|         hash(dtype)  # no error | ||||
|  | ||||
|     def test_str(self, dtype): | ||||
|         assert str(dtype) == dtype.name | ||||
|  | ||||
|     def test_eq(self, dtype): | ||||
|         assert dtype == dtype.name | ||||
|         assert dtype != "anonther_type" | ||||
|  | ||||
|     def test_construct_from_string_own_name(self, dtype): | ||||
|         result = dtype.construct_from_string(dtype.name) | ||||
|         assert type(result) is type(dtype) | ||||
|  | ||||
|         # check OK as classmethod | ||||
|         result = type(dtype).construct_from_string(dtype.name) | ||||
|         assert type(result) is type(dtype) | ||||
|  | ||||
|     def test_construct_from_string_another_type_raises(self, dtype): | ||||
|         msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             type(dtype).construct_from_string("another_type") | ||||
|  | ||||
|     def test_construct_from_string_wrong_type_raises(self, dtype): | ||||
|         with pytest.raises( | ||||
|             TypeError, | ||||
|             match="'construct_from_string' expects a string, got <class 'int'>", | ||||
|         ): | ||||
|             type(dtype).construct_from_string(0) | ||||
|  | ||||
|     def test_get_common_dtype(self, dtype): | ||||
|         # in practice we will not typically call this with a 1-length list | ||||
|         # (we shortcut to just use that dtype as the common dtype), but | ||||
|         # still testing as good practice to have this working (and it is the | ||||
|         # only case we can test in general) | ||||
|         assert dtype._get_common_dtype([dtype]) == dtype | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_infer_dtype(self, data, data_missing, skipna): | ||||
|         # only testing that this works without raising an error | ||||
|         res = infer_dtype(data, skipna=skipna) | ||||
|         assert isinstance(res, str) | ||||
|         res = infer_dtype(data_missing, skipna=skipna) | ||||
|         assert isinstance(res, str) | ||||
| @ -0,0 +1,469 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class BaseGetitemTests: | ||||
|     """Tests for ExtensionArray.__getitem__.""" | ||||
|  | ||||
|     def test_iloc_series(self, data): | ||||
|         ser = pd.Series(data) | ||||
|         result = ser.iloc[:4] | ||||
|         expected = pd.Series(data[:4]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.iloc[[0, 1, 2, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_iloc_frame(self, data): | ||||
|         df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) | ||||
|         expected = pd.DataFrame({"A": data[:4]}) | ||||
|  | ||||
|         # slice -> frame | ||||
|         result = df.iloc[:4, [0]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # sequence -> frame | ||||
|         result = df.iloc[[0, 1, 2, 3], [0]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series(data[:4], name="A") | ||||
|  | ||||
|         # slice -> series | ||||
|         result = df.iloc[:4, 0] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # sequence -> series | ||||
|         result = df.iloc[:4, 0] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH#32959 slice columns with step | ||||
|         result = df.iloc[:, ::2] | ||||
|         tm.assert_frame_equal(result, df[["A"]]) | ||||
|         result = df[["B", "A"]].iloc[:, ::2] | ||||
|         tm.assert_frame_equal(result, df[["B"]]) | ||||
|  | ||||
|     def test_iloc_frame_single_block(self, data): | ||||
|         # GH#32959 null slice along index, slice along columns with single-block | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|  | ||||
|         result = df.iloc[:, :] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.iloc[:, :1] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.iloc[:, :2] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.iloc[:, ::2] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|         result = df.iloc[:, 1:2] | ||||
|         tm.assert_frame_equal(result, df.iloc[:, :0]) | ||||
|  | ||||
|         result = df.iloc[:, -1:] | ||||
|         tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     def test_loc_series(self, data): | ||||
|         ser = pd.Series(data) | ||||
|         result = ser.loc[:3] | ||||
|         expected = pd.Series(data[:4]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.loc[[0, 1, 2, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_frame(self, data): | ||||
|         df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) | ||||
|         expected = pd.DataFrame({"A": data[:4]}) | ||||
|  | ||||
|         # slice -> frame | ||||
|         result = df.loc[:3, ["A"]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # sequence -> frame | ||||
|         result = df.loc[[0, 1, 2, 3], ["A"]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series(data[:4], name="A") | ||||
|  | ||||
|         # slice -> series | ||||
|         result = df.loc[:3, "A"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # sequence -> series | ||||
|         result = df.loc[:3, "A"] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_iloc_frame_single_dtype(self, data): | ||||
|         # GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly | ||||
|         #  return a scalar | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype) | ||||
|  | ||||
|         result = df.loc[2] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series( | ||||
|             [data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype | ||||
|         ) | ||||
|         result = df.iloc[-1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_scalar(self, data): | ||||
|         result = data[0] | ||||
|         assert isinstance(result, data.dtype.type) | ||||
|  | ||||
|         result = pd.Series(data)[0] | ||||
|         assert isinstance(result, data.dtype.type) | ||||
|  | ||||
|     def test_getitem_invalid(self, data): | ||||
|         # TODO: box over scalar, [scalar], (scalar,)? | ||||
|  | ||||
|         msg = ( | ||||
|             r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis " | ||||
|             r"\(`None`\) and integer or boolean arrays are valid indices" | ||||
|         ) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data["foo"] | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data[2.5] | ||||
|  | ||||
|         ub = len(data) | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "list index out of range",  # json | ||||
|                 "index out of bounds",  # pyarrow | ||||
|                 "Out of bounds access",  # Sparse | ||||
|                 f"loc must be an integer between -{ub} and {ub}",  # Sparse | ||||
|                 f"index {ub+1} is out of bounds for axis 0 with size {ub}", | ||||
|                 f"index -{ub+1} is out of bounds for axis 0 with size {ub}", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data[ub + 1] | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data[-ub - 1] | ||||
|  | ||||
|     def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): | ||||
|         result = data_missing[0] | ||||
|         assert na_cmp(result, na_value) | ||||
|  | ||||
|     def test_getitem_empty(self, data): | ||||
|         # Indexing with empty list | ||||
|         result = data[[]] | ||||
|         assert len(result) == 0 | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|         expected = data[np.array([], dtype="int64")] | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_mask(self, data): | ||||
|         # Empty mask, raw array | ||||
|         mask = np.zeros(len(data), dtype=bool) | ||||
|         result = data[mask] | ||||
|         assert len(result) == 0 | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|         # Empty mask, in series | ||||
|         mask = np.zeros(len(data), dtype=bool) | ||||
|         result = pd.Series(data)[mask] | ||||
|         assert len(result) == 0 | ||||
|         assert result.dtype == data.dtype | ||||
|  | ||||
|         # non-empty mask, raw array | ||||
|         mask[0] = True | ||||
|         result = data[mask] | ||||
|         assert len(result) == 1 | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|         # non-empty mask, in series | ||||
|         result = pd.Series(data)[mask] | ||||
|         assert len(result) == 1 | ||||
|         assert result.dtype == data.dtype | ||||
|  | ||||
|     def test_getitem_mask_raises(self, data): | ||||
|         mask = np.array([True, False]) | ||||
|         msg = f"Boolean index has wrong length: 2 instead of {len(data)}" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data[mask] | ||||
|  | ||||
|         mask = pd.array(mask, dtype="boolean") | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             data[mask] | ||||
|  | ||||
|     def test_getitem_boolean_array_mask(self, data): | ||||
|         mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") | ||||
|         result = data[mask] | ||||
|         assert len(result) == 0 | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|         result = pd.Series(data)[mask] | ||||
|         assert len(result) == 0 | ||||
|         assert result.dtype == data.dtype | ||||
|  | ||||
|         mask[:5] = True | ||||
|         expected = data.take([0, 1, 2, 3, 4]) | ||||
|         result = data[mask] | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series(expected) | ||||
|         result = pd.Series(data)[mask] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_getitem_boolean_na_treated_as_false(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31503 | ||||
|         mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") | ||||
|         mask[:2] = pd.NA | ||||
|         mask[2:4] = True | ||||
|  | ||||
|         result = data[mask] | ||||
|         expected = data[mask.fillna(False)] | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         s = pd.Series(data) | ||||
|  | ||||
|         result = s[mask] | ||||
|         expected = s[mask.fillna(False)] | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], | ||||
|         ids=["list", "integer-array", "numpy-array"], | ||||
|     ) | ||||
|     def test_getitem_integer_array(self, data, idx): | ||||
|         result = data[idx] | ||||
|         assert len(result) == 3 | ||||
|         assert isinstance(result, type(data)) | ||||
|         expected = data.take([0, 1, 2]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         expected = pd.Series(expected) | ||||
|         result = pd.Series(data)[idx] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], | ||||
|         ids=["list", "integer-array"], | ||||
|     ) | ||||
|     def test_getitem_integer_with_missing_raises(self, data, idx): | ||||
|         msg = "Cannot index with an integer indexer containing NA values" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             data[idx] | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         reason="Tries label-based and raises KeyError; " | ||||
|         "in some cases raises when calling np.asarray" | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], | ||||
|         ids=["list", "integer-array"], | ||||
|     ) | ||||
|     def test_getitem_series_integer_with_missing_raises(self, data, idx): | ||||
|         msg = "Cannot index with an integer indexer containing NA values" | ||||
|         # TODO: this raises KeyError about labels not found (it tries label-based) | ||||
|  | ||||
|         ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser[idx] | ||||
|  | ||||
|     def test_getitem_slice(self, data): | ||||
|         # getitem[slice] should return an array | ||||
|         result = data[slice(0)]  # empty | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|         result = data[slice(1)]  # scalar | ||||
|         assert isinstance(result, type(data)) | ||||
|  | ||||
|     def test_getitem_ellipsis_and_slice(self, data): | ||||
|         # GH#40353 this is called from slice_block_rows | ||||
|         result = data[..., :] | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         result = data[:, ...] | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         result = data[..., :3] | ||||
|         tm.assert_extension_array_equal(result, data[:3]) | ||||
|  | ||||
|         result = data[:3, ...] | ||||
|         tm.assert_extension_array_equal(result, data[:3]) | ||||
|  | ||||
|         result = data[..., ::2] | ||||
|         tm.assert_extension_array_equal(result, data[::2]) | ||||
|  | ||||
|         result = data[::2, ...] | ||||
|         tm.assert_extension_array_equal(result, data[::2]) | ||||
|  | ||||
|     def test_get(self, data): | ||||
|         # GH 20882 | ||||
|         s = pd.Series(data, index=[2 * i for i in range(len(data))]) | ||||
|         assert s.get(4) == s.iloc[2] | ||||
|  | ||||
|         result = s.get([4, 6]) | ||||
|         expected = s.iloc[[2, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.get(slice(2)) | ||||
|         expected = s.iloc[[0, 1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         assert s.get(-1) is None | ||||
|         assert s.get(s.index.max() + 1) is None | ||||
|  | ||||
|         s = pd.Series(data[:6], index=list("abcdef")) | ||||
|         assert s.get("c") == s.iloc[2] | ||||
|  | ||||
|         result = s.get(slice("b", "d")) | ||||
|         expected = s.iloc[[1, 2, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.get("Z") | ||||
|         assert result is None | ||||
|  | ||||
|         msg = "Series.__getitem__ treating keys as positions is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             assert s.get(4) == s.iloc[4] | ||||
|             assert s.get(-1) == s.iloc[-1] | ||||
|             assert s.get(len(s)) is None | ||||
|  | ||||
|         # GH 21257 | ||||
|         s = pd.Series(data) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             # GH#45324 make sure we aren't giving a spurious FutureWarning | ||||
|             s2 = s[::2] | ||||
|         assert s2.get(1) is None | ||||
|  | ||||
|     def test_take_sequence(self, data): | ||||
|         result = pd.Series(data)[[0, 1, 3]] | ||||
|         assert result.iloc[0] == data[0] | ||||
|         assert result.iloc[1] == data[1] | ||||
|         assert result.iloc[2] == data[3] | ||||
|  | ||||
|     def test_take(self, data, na_value, na_cmp): | ||||
|         result = data.take([0, -1]) | ||||
|         assert result.dtype == data.dtype | ||||
|         assert result[0] == data[0] | ||||
|         assert result[1] == data[-1] | ||||
|  | ||||
|         result = data.take([0, -1], allow_fill=True, fill_value=na_value) | ||||
|         assert result[0] == data[0] | ||||
|         assert na_cmp(result[1], na_value) | ||||
|  | ||||
|         with pytest.raises(IndexError, match="out of bounds"): | ||||
|             data.take([len(data) + 1]) | ||||
|  | ||||
|     def test_take_empty(self, data, na_value, na_cmp): | ||||
|         empty = data[:0] | ||||
|  | ||||
|         result = empty.take([-1], allow_fill=True) | ||||
|         assert na_cmp(result[0], na_value) | ||||
|  | ||||
|         msg = "cannot do a non-empty take from an empty axes|out of bounds" | ||||
|  | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             empty.take([-1]) | ||||
|  | ||||
|         with pytest.raises(IndexError, match="cannot do a non-empty take"): | ||||
|             empty.take([0, 1]) | ||||
|  | ||||
|     def test_take_negative(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20640 | ||||
|         n = len(data) | ||||
|         result = data.take([0, -n, n - 1, -1]) | ||||
|         expected = data.take([0, 0, n - 1, n - 1]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_take_non_na_fill_value(self, data_missing): | ||||
|         fill_value = data_missing[1]  # valid | ||||
|         na = data_missing[0] | ||||
|  | ||||
|         arr = data_missing._from_sequence( | ||||
|             [na, fill_value, na], dtype=data_missing.dtype | ||||
|         ) | ||||
|         result = arr.take([-1, 1], fill_value=fill_value, allow_fill=True) | ||||
|         expected = arr.take([1, 1]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_take_pandas_style_negative_raises(self, data, na_value): | ||||
|         with pytest.raises(ValueError, match=""): | ||||
|             data.take([0, -2], fill_value=na_value, allow_fill=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("allow_fill", [True, False]) | ||||
|     def test_take_out_of_bounds_raises(self, data, allow_fill): | ||||
|         arr = data[:3] | ||||
|  | ||||
|         with pytest.raises(IndexError, match="out of bounds|out-of-bounds"): | ||||
|             arr.take(np.asarray([0, 3]), allow_fill=allow_fill) | ||||
|  | ||||
|     def test_take_series(self, data): | ||||
|         s = pd.Series(data) | ||||
|         result = s.take([0, -1]) | ||||
|         expected = pd.Series( | ||||
|             data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), | ||||
|             index=[0, len(data) - 1], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_reindex(self, data, na_value): | ||||
|         s = pd.Series(data) | ||||
|         result = s.reindex([0, 1, 3]) | ||||
|         expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         n = len(data) | ||||
|         result = s.reindex([-1, 0, n]) | ||||
|         expected = pd.Series( | ||||
|             data._from_sequence([na_value, data[0], na_value], dtype=s.dtype), | ||||
|             index=[-1, 0, n], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.reindex([n, n + 1]) | ||||
|         expected = pd.Series( | ||||
|             data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_reindex_non_na_fill_value(self, data_missing): | ||||
|         valid = data_missing[1] | ||||
|         na = data_missing[0] | ||||
|  | ||||
|         arr = data_missing._from_sequence([na, valid], dtype=data_missing.dtype) | ||||
|         ser = pd.Series(arr) | ||||
|         result = ser.reindex([0, 1, 2], fill_value=valid) | ||||
|         expected = pd.Series( | ||||
|             data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype) | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_loc_len1(self, data): | ||||
|         # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         res = df.loc[[0], "A"] | ||||
|         assert res.ndim == 1 | ||||
|         assert res._mgr.arrays[0].ndim == 1 | ||||
|         if hasattr(res._mgr, "blocks"): | ||||
|             assert res._mgr._block.ndim == 1 | ||||
|  | ||||
|     def test_item(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/pull/30175 | ||||
|         s = pd.Series(data) | ||||
|         result = s[:1].item() | ||||
|         assert result == data[0] | ||||
|  | ||||
|         msg = "can only convert an array of size 1 to a Python scalar" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s[:0].item() | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.item() | ||||
| @ -0,0 +1,174 @@ | ||||
| import re | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_bool_dtype, | ||||
|     is_numeric_dtype, | ||||
|     is_object_dtype, | ||||
|     is_string_dtype, | ||||
| ) | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:The default of observed=False is deprecated:FutureWarning" | ||||
| ) | ||||
| class BaseGroupbyTests: | ||||
|     """Groupby-specific tests.""" | ||||
|  | ||||
|     def test_grouping_grouper(self, data_for_grouping): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": pd.Series( | ||||
|                     ["B", "B", None, None, "A", "A", "B", "C"], dtype=object | ||||
|                 ), | ||||
|                 "B": data_for_grouping, | ||||
|             } | ||||
|         ) | ||||
|         gr1 = df.groupby("A")._grouper.groupings[0] | ||||
|         gr2 = df.groupby("B")._grouper.groupings[0] | ||||
|  | ||||
|         tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) | ||||
|         tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) | ||||
|  | ||||
|     @pytest.mark.parametrize("as_index", [True, False]) | ||||
|     def test_groupby_extension_agg(self, as_index, data_for_grouping): | ||||
|         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) | ||||
|  | ||||
|         is_bool = data_for_grouping.dtype._is_boolean | ||||
|         if is_bool: | ||||
|             # only 2 unique values, and the final entry has c==b | ||||
|             #  (see data_for_grouping docstring) | ||||
|             df = df.iloc[:-1] | ||||
|  | ||||
|         result = df.groupby("B", as_index=as_index).A.mean() | ||||
|         _, uniques = pd.factorize(data_for_grouping, sort=True) | ||||
|  | ||||
|         exp_vals = [3.0, 1.0, 4.0] | ||||
|         if is_bool: | ||||
|             exp_vals = exp_vals[:-1] | ||||
|         if as_index: | ||||
|             index = pd.Index(uniques, name="B") | ||||
|             expected = pd.Series(exp_vals, index=index, name="A") | ||||
|             tm.assert_series_equal(result, expected) | ||||
|         else: | ||||
|             expected = pd.DataFrame({"B": uniques, "A": exp_vals}) | ||||
|             tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_groupby_agg_extension(self, data_for_grouping): | ||||
|         # GH#38980 groupby agg on extension type fails for non-numeric types | ||||
|         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) | ||||
|  | ||||
|         expected = df.iloc[[0, 2, 4, 7]] | ||||
|         expected = expected.set_index("A") | ||||
|  | ||||
|         result = df.groupby("A").agg({"B": "first"}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.groupby("A").agg("first") | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.groupby("A").first() | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_groupby_extension_no_sort(self, data_for_grouping): | ||||
|         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) | ||||
|  | ||||
|         is_bool = data_for_grouping.dtype._is_boolean | ||||
|         if is_bool: | ||||
|             # only 2 unique values, and the final entry has c==b | ||||
|             #  (see data_for_grouping docstring) | ||||
|             df = df.iloc[:-1] | ||||
|  | ||||
|         result = df.groupby("B", sort=False).A.mean() | ||||
|         _, index = pd.factorize(data_for_grouping, sort=False) | ||||
|  | ||||
|         index = pd.Index(index, name="B") | ||||
|         exp_vals = [1.0, 3.0, 4.0] | ||||
|         if is_bool: | ||||
|             exp_vals = exp_vals[:-1] | ||||
|         expected = pd.Series(exp_vals, index=index, name="A") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_groupby_extension_transform(self, data_for_grouping): | ||||
|         is_bool = data_for_grouping.dtype._is_boolean | ||||
|  | ||||
|         valid = data_for_grouping[~data_for_grouping.isna()] | ||||
|         df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid}) | ||||
|         is_bool = data_for_grouping.dtype._is_boolean | ||||
|         if is_bool: | ||||
|             # only 2 unique values, and the final entry has c==b | ||||
|             #  (see data_for_grouping docstring) | ||||
|             df = df.iloc[:-1] | ||||
|  | ||||
|         result = df.groupby("B").A.transform(len) | ||||
|         expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") | ||||
|         if is_bool: | ||||
|             expected = expected[:-1] | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): | ||||
|         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) | ||||
|         msg = "DataFrameGroupBy.apply operated on the grouping columns" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             df.groupby("B", group_keys=False, observed=False).apply(groupby_apply_op) | ||||
|         df.groupby("B", group_keys=False, observed=False).A.apply(groupby_apply_op) | ||||
|         msg = "DataFrameGroupBy.apply operated on the grouping columns" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             df.groupby("A", group_keys=False, observed=False).apply(groupby_apply_op) | ||||
|         df.groupby("A", group_keys=False, observed=False).B.apply(groupby_apply_op) | ||||
|  | ||||
|     def test_groupby_apply_identity(self, data_for_grouping): | ||||
|         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) | ||||
|         result = df.groupby("A").B.apply(lambda x: x.array) | ||||
|         expected = pd.Series( | ||||
|             [ | ||||
|                 df.B.iloc[[0, 1, 6]].array, | ||||
|                 df.B.iloc[[2, 3]].array, | ||||
|                 df.B.iloc[[4, 5]].array, | ||||
|                 df.B.iloc[[7]].array, | ||||
|             ], | ||||
|             index=pd.Index([1, 2, 3, 4], name="A"), | ||||
|             name="B", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_in_numeric_groupby(self, data_for_grouping): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": [1, 1, 2, 2, 3, 3, 1, 4], | ||||
|                 "B": data_for_grouping, | ||||
|                 "C": [1, 1, 1, 1, 1, 1, 1, 1], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         dtype = data_for_grouping.dtype | ||||
|         if ( | ||||
|             is_numeric_dtype(dtype) | ||||
|             or is_bool_dtype(dtype) | ||||
|             or dtype.name == "decimal" | ||||
|             or is_string_dtype(dtype) | ||||
|             or is_object_dtype(dtype) | ||||
|             or dtype.kind == "m"  # in particular duration[*][pyarrow] | ||||
|         ): | ||||
|             expected = pd.Index(["B", "C"]) | ||||
|             result = df.groupby("A").sum().columns | ||||
|         else: | ||||
|             expected = pd.Index(["C"]) | ||||
|  | ||||
|             msg = "|".join( | ||||
|                 [ | ||||
|                     # period/datetime | ||||
|                     "does not support sum operations", | ||||
|                     # all others | ||||
|                     re.escape(f"agg function failed [how->sum,dtype->{dtype}"), | ||||
|                 ] | ||||
|             ) | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 df.groupby("A").sum() | ||||
|             result = df.groupby("A").sum(numeric_only=True).columns | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,19 @@ | ||||
| """ | ||||
| Tests for Indexes backed by arbitrary ExtensionArrays. | ||||
| """ | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| class BaseIndexTests: | ||||
|     """Tests for Index object backed by an ExtensionArray""" | ||||
|  | ||||
|     def test_index_from_array(self, data): | ||||
|         idx = pd.Index(data) | ||||
|         assert data.dtype == idx.dtype | ||||
|  | ||||
|     def test_index_from_listlike_with_dtype(self, data): | ||||
|         idx = pd.Index(data, dtype=data.dtype) | ||||
|         assert idx.dtype == data.dtype | ||||
|  | ||||
|         idx = pd.Index(list(data), dtype=data.dtype) | ||||
|         assert idx.dtype == data.dtype | ||||
| @ -0,0 +1,172 @@ | ||||
| import warnings | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike | ||||
| from pandas.core.dtypes.common import is_extension_array_dtype | ||||
| from pandas.core.dtypes.dtypes import ExtensionDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class BaseInterfaceTests: | ||||
|     """Tests that the basic interface is satisfied.""" | ||||
|  | ||||
|     # ------------------------------------------------------------------------ | ||||
|     # Interface | ||||
|     # ------------------------------------------------------------------------ | ||||
|  | ||||
|     def test_len(self, data): | ||||
|         assert len(data) == 100 | ||||
|  | ||||
|     def test_size(self, data): | ||||
|         assert data.size == 100 | ||||
|  | ||||
|     def test_ndim(self, data): | ||||
|         assert data.ndim == 1 | ||||
|  | ||||
|     def test_can_hold_na_valid(self, data): | ||||
|         # GH-20761 | ||||
|         assert data._can_hold_na is True | ||||
|  | ||||
|     def test_contains(self, data, data_missing): | ||||
|         # GH-37867 | ||||
|         # Tests for membership checks. Membership checks for nan-likes is tricky and | ||||
|         # the settled on rule is: `nan_like in arr` is True if nan_like is | ||||
|         # arr.dtype.na_value and arr.isna().any() is True. Else the check returns False. | ||||
|  | ||||
|         na_value = data.dtype.na_value | ||||
|         # ensure data without missing values | ||||
|         data = data[~data.isna()] | ||||
|  | ||||
|         # first elements are non-missing | ||||
|         assert data[0] in data | ||||
|         assert data_missing[0] in data_missing | ||||
|  | ||||
|         # check the presence of na_value | ||||
|         assert na_value in data_missing | ||||
|         assert na_value not in data | ||||
|  | ||||
|         # the data can never contain other nan-likes than na_value | ||||
|         for na_value_obj in tm.NULL_OBJECTS: | ||||
|             if na_value_obj is na_value or type(na_value_obj) == type(na_value): | ||||
|                 # type check for e.g. two instances of Decimal("NAN") | ||||
|                 continue | ||||
|             assert na_value_obj not in data | ||||
|             assert na_value_obj not in data_missing | ||||
|  | ||||
|     def test_memory_usage(self, data): | ||||
|         s = pd.Series(data) | ||||
|         result = s.memory_usage(index=False) | ||||
|         assert result == s.nbytes | ||||
|  | ||||
|     def test_array_interface(self, data): | ||||
|         result = np.array(data) | ||||
|         assert result[0] == data[0] | ||||
|  | ||||
|         result = np.array(data, dtype=object) | ||||
|         expected = np.array(list(data), dtype=object) | ||||
|         if expected.ndim > 1: | ||||
|             # nested data, explicitly construct as 1D | ||||
|             expected = construct_1d_object_array_from_listlike(list(data)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_array_interface_copy(self, data): | ||||
|         result_copy1 = np.array(data, copy=True) | ||||
|         result_copy2 = np.array(data, copy=True) | ||||
|         assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|         if not np_version_gt2: | ||||
|             # copy=False semantics are only supported in NumPy>=2. | ||||
|             return | ||||
|  | ||||
|         warning_raised = False | ||||
|         msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter("always") | ||||
|             result_nocopy1 = np.array(data, copy=False) | ||||
|             assert len(w) <= 1 | ||||
|             if len(w): | ||||
|                 warning_raised = True | ||||
|                 assert msg in str(w[0].message) | ||||
|  | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter("always") | ||||
|             result_nocopy2 = np.array(data, copy=False) | ||||
|             assert len(w) <= 1 | ||||
|             if len(w): | ||||
|                 warning_raised = True | ||||
|                 assert msg in str(w[0].message) | ||||
|  | ||||
|         if not warning_raised: | ||||
|             # If copy=False was given and did not raise, these must share the same data | ||||
|             assert np.may_share_memory(result_nocopy1, result_nocopy2) | ||||
|  | ||||
|     def test_is_extension_array_dtype(self, data): | ||||
|         assert is_extension_array_dtype(data) | ||||
|         assert is_extension_array_dtype(data.dtype) | ||||
|         assert is_extension_array_dtype(pd.Series(data)) | ||||
|         assert isinstance(data.dtype, ExtensionDtype) | ||||
|  | ||||
|     def test_no_values_attribute(self, data): | ||||
|         # GH-20735: EA's with .values attribute give problems with internal | ||||
|         # code, disallowing this for now until solved | ||||
|         assert not hasattr(data, "values") | ||||
|         assert not hasattr(data, "_values") | ||||
|  | ||||
|     def test_is_numeric_honored(self, data): | ||||
|         result = pd.Series(data) | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert result._mgr.blocks[0].is_numeric is data.dtype._is_numeric | ||||
|  | ||||
|     def test_isna_extension_array(self, data_missing): | ||||
|         # If your `isna` returns an ExtensionArray, you must also implement | ||||
|         # _reduce. At the *very* least, you must implement any and all | ||||
|         na = data_missing.isna() | ||||
|         if is_extension_array_dtype(na): | ||||
|             assert na._reduce("any") | ||||
|             assert na.any() | ||||
|  | ||||
|             assert not na._reduce("all") | ||||
|             assert not na.all() | ||||
|  | ||||
|             assert na.dtype._is_boolean | ||||
|  | ||||
|     def test_copy(self, data): | ||||
|         # GH#27083 removing deep keyword from EA.copy | ||||
|         assert data[0] != data[1] | ||||
|         result = data.copy() | ||||
|  | ||||
|         if data.dtype._is_immutable: | ||||
|             pytest.skip(f"test_copy assumes mutability and {data.dtype} is immutable") | ||||
|  | ||||
|         data[1] = data[0] | ||||
|         assert result[1] != result[0] | ||||
|  | ||||
|     def test_view(self, data): | ||||
|         # view with no dtype should return a shallow copy, *not* the same | ||||
|         #  object | ||||
|         assert data[1] != data[0] | ||||
|  | ||||
|         result = data.view() | ||||
|         assert result is not data | ||||
|         assert type(result) == type(data) | ||||
|  | ||||
|         if data.dtype._is_immutable: | ||||
|             pytest.skip(f"test_view assumes mutability and {data.dtype} is immutable") | ||||
|  | ||||
|         result[1] = result[0] | ||||
|         assert data[1] == data[0] | ||||
|  | ||||
|         # check specifically that the `dtype` kwarg is accepted | ||||
|         data.view(dtype=None) | ||||
|  | ||||
|     def test_tolist(self, data): | ||||
|         result = data.tolist() | ||||
|         expected = list(data) | ||||
|         assert isinstance(result, list) | ||||
|         assert result == expected | ||||
| @ -0,0 +1,39 @@ | ||||
| from io import StringIO | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import ExtensionArray | ||||
|  | ||||
|  | ||||
| class BaseParsingTests: | ||||
|     @pytest.mark.parametrize("engine", ["c", "python"]) | ||||
|     def test_EA_types(self, engine, data, request): | ||||
|         if isinstance(data.dtype, pd.CategoricalDtype): | ||||
|             # in parsers.pyx _convert_with_dtype there is special-casing for | ||||
|             #  Categorical that pre-empts _from_sequence_of_strings | ||||
|             pass | ||||
|         elif isinstance(data.dtype, pd.core.dtypes.dtypes.NumpyEADtype): | ||||
|             # These get unwrapped internally so are treated as numpy dtypes | ||||
|             #  in the parsers.pyx code | ||||
|             pass | ||||
|         elif ( | ||||
|             type(data)._from_sequence_of_strings.__func__ | ||||
|             is ExtensionArray._from_sequence_of_strings.__func__ | ||||
|         ): | ||||
|             # i.e. the EA hasn't overridden _from_sequence_of_strings | ||||
|             mark = pytest.mark.xfail( | ||||
|                 reason="_from_sequence_of_strings not implemented", | ||||
|                 raises=NotImplementedError, | ||||
|             ) | ||||
|             request.node.add_marker(mark) | ||||
|  | ||||
|         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) | ||||
|         csv_output = df.to_csv(index=False, na_rep=np.nan) | ||||
|         result = pd.read_csv( | ||||
|             StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine | ||||
|         ) | ||||
|         expected = df | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,720 @@ | ||||
| import inspect | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._typing import Dtype | ||||
|  | ||||
| from pandas.core.dtypes.common import is_bool_dtype | ||||
| from pandas.core.dtypes.dtypes import NumpyEADtype | ||||
| from pandas.core.dtypes.missing import na_value_for_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.sorting import nargsort | ||||
|  | ||||
|  | ||||
| class BaseMethodsTests: | ||||
|     """Various Series and DataFrame methods.""" | ||||
|  | ||||
|     def test_hash_pandas_object(self, data): | ||||
|         # _hash_pandas_object should return a uint64 ndarray of the same length | ||||
|         # as the data | ||||
|         from pandas.core.util.hashing import _default_hash_key | ||||
|  | ||||
|         res = data._hash_pandas_object( | ||||
|             encoding="utf-8", hash_key=_default_hash_key, categorize=False | ||||
|         ) | ||||
|         assert res.dtype == np.uint64 | ||||
|         assert res.shape == data.shape | ||||
|  | ||||
|     def test_value_counts_default_dropna(self, data): | ||||
|         # make sure we have consistent default dropna kwarg | ||||
|         if not hasattr(data, "value_counts"): | ||||
|             pytest.skip(f"value_counts is not implemented for {type(data)}") | ||||
|         sig = inspect.signature(data.value_counts) | ||||
|         kwarg = sig.parameters["dropna"] | ||||
|         assert kwarg.default is True | ||||
|  | ||||
|     @pytest.mark.parametrize("dropna", [True, False]) | ||||
|     def test_value_counts(self, all_data, dropna): | ||||
|         all_data = all_data[:10] | ||||
|         if dropna: | ||||
|             other = all_data[~all_data.isna()] | ||||
|         else: | ||||
|             other = all_data | ||||
|  | ||||
|         result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() | ||||
|         expected = pd.Series(other).value_counts(dropna=dropna).sort_index() | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_value_counts_with_normalize(self, data): | ||||
|         # GH 33172 | ||||
|         data = data[:10].unique() | ||||
|         values = np.array(data[~data.isna()]) | ||||
|         ser = pd.Series(data, dtype=data.dtype) | ||||
|  | ||||
|         result = ser.value_counts(normalize=True).sort_index() | ||||
|  | ||||
|         if not isinstance(data, pd.Categorical): | ||||
|             expected = pd.Series( | ||||
|                 [1 / len(values)] * len(values), index=result.index, name="proportion" | ||||
|             ) | ||||
|         else: | ||||
|             expected = pd.Series(0.0, index=result.index, name="proportion") | ||||
|             expected[result > 0] = 1 / len(values) | ||||
|  | ||||
|         if isinstance(data.dtype, pd.StringDtype) and data.dtype.na_value is np.nan: | ||||
|             # TODO: avoid special-casing | ||||
|             expected = expected.astype("float64") | ||||
|         elif getattr(data.dtype, "storage", "") == "pyarrow" or isinstance( | ||||
|             data.dtype, pd.ArrowDtype | ||||
|         ): | ||||
|             # TODO: avoid special-casing | ||||
|             expected = expected.astype("double[pyarrow]") | ||||
|         elif na_value_for_dtype(data.dtype) is pd.NA: | ||||
|             # TODO(GH#44692): avoid special-casing | ||||
|             expected = expected.astype("Float64") | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_count(self, data_missing): | ||||
|         df = pd.DataFrame({"A": data_missing}) | ||||
|         result = df.count(axis="columns") | ||||
|         expected = pd.Series([0, 1]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_count(self, data_missing): | ||||
|         # GH#26835 | ||||
|         ser = pd.Series(data_missing) | ||||
|         result = ser.count() | ||||
|         expected = 1 | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_apply_simple_series(self, data): | ||||
|         result = pd.Series(data).apply(id) | ||||
|         assert isinstance(result, pd.Series) | ||||
|  | ||||
|     @pytest.mark.parametrize("na_action", [None, "ignore"]) | ||||
|     def test_map(self, data_missing, na_action): | ||||
|         result = data_missing.map(lambda x: x, na_action=na_action) | ||||
|         expected = data_missing.to_numpy() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_argsort(self, data_for_sorting): | ||||
|         result = pd.Series(data_for_sorting).argsort() | ||||
|         # argsort result gets passed to take, so should be np.intp | ||||
|         expected = pd.Series(np.array([2, 0, 1], dtype=np.intp)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_argsort_missing_array(self, data_missing_for_sorting): | ||||
|         result = data_missing_for_sorting.argsort() | ||||
|         # argsort result gets passed to take, so should be np.intp | ||||
|         expected = np.array([2, 0, 1], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_argsort_missing(self, data_missing_for_sorting): | ||||
|         msg = "The behavior of Series.argsort in the presence of NA values" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = pd.Series(data_missing_for_sorting).argsort() | ||||
|         expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): | ||||
|         # GH 24382 | ||||
|         is_bool = data_for_sorting.dtype._is_boolean | ||||
|  | ||||
|         exp_argmax = 1 | ||||
|         exp_argmax_repeated = 3 | ||||
|         if is_bool: | ||||
|             # See data_for_sorting docstring | ||||
|             exp_argmax = 0 | ||||
|             exp_argmax_repeated = 1 | ||||
|  | ||||
|         # data_for_sorting -> [B, C, A] with A < B < C | ||||
|         assert data_for_sorting.argmax() == exp_argmax | ||||
|         assert data_for_sorting.argmin() == 2 | ||||
|  | ||||
|         # with repeated values -> first occurrence | ||||
|         data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) | ||||
|         assert data.argmax() == exp_argmax_repeated | ||||
|         assert data.argmin() == 0 | ||||
|  | ||||
|         # with missing values | ||||
|         # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. | ||||
|         assert data_missing_for_sorting.argmax() == 0 | ||||
|         assert data_missing_for_sorting.argmin() == 2 | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["argmax", "argmin"]) | ||||
|     def test_argmin_argmax_empty_array(self, method, data): | ||||
|         # GH 24382 | ||||
|         err_msg = "attempt to get" | ||||
|         with pytest.raises(ValueError, match=err_msg): | ||||
|             getattr(data[:0], method)() | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["argmax", "argmin"]) | ||||
|     def test_argmin_argmax_all_na(self, method, data, na_value): | ||||
|         # all missing with skipna=True is the same as empty | ||||
|         err_msg = "attempt to get" | ||||
|         data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) | ||||
|         with pytest.raises(ValueError, match=err_msg): | ||||
|             getattr(data_na, method)() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "op_name, skipna, expected", | ||||
|         [ | ||||
|             ("idxmax", True, 0), | ||||
|             ("idxmin", True, 2), | ||||
|             ("argmax", True, 0), | ||||
|             ("argmin", True, 2), | ||||
|             ("idxmax", False, np.nan), | ||||
|             ("idxmin", False, np.nan), | ||||
|             ("argmax", False, -1), | ||||
|             ("argmin", False, -1), | ||||
|         ], | ||||
|     ) | ||||
|     def test_argreduce_series( | ||||
|         self, data_missing_for_sorting, op_name, skipna, expected | ||||
|     ): | ||||
|         # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. | ||||
|         warn = None | ||||
|         msg = "The behavior of Series.argmax/argmin" | ||||
|         if op_name.startswith("arg") and expected == -1: | ||||
|             warn = FutureWarning | ||||
|         if op_name.startswith("idx") and np.isnan(expected): | ||||
|             warn = FutureWarning | ||||
|             msg = f"The behavior of Series.{op_name}" | ||||
|         ser = pd.Series(data_missing_for_sorting) | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = getattr(ser, op_name)(skipna=skipna) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|     def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): | ||||
|         # GH#38733 | ||||
|         data = data_missing_for_sorting | ||||
|  | ||||
|         with pytest.raises(NotImplementedError, match=""): | ||||
|             data.argmin(skipna=False) | ||||
|  | ||||
|         with pytest.raises(NotImplementedError, match=""): | ||||
|             data.argmax(skipna=False) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "na_position, expected", | ||||
|         [ | ||||
|             ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), | ||||
|             ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), | ||||
|         ], | ||||
|     ) | ||||
|     def test_nargsort(self, data_missing_for_sorting, na_position, expected): | ||||
|         # GH 25439 | ||||
|         result = nargsort(data_missing_for_sorting, na_position=na_position) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     def test_sort_values(self, data_for_sorting, ascending, sort_by_key): | ||||
|         ser = pd.Series(data_for_sorting) | ||||
|         result = ser.sort_values(ascending=ascending, key=sort_by_key) | ||||
|         expected = ser.iloc[[2, 0, 1]] | ||||
|         if not ascending: | ||||
|             # GH 35922. Expect stable sort | ||||
|             if ser.nunique() == 2: | ||||
|                 expected = ser.iloc[[0, 1, 2]] | ||||
|             else: | ||||
|                 expected = ser.iloc[[1, 0, 2]] | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     def test_sort_values_missing( | ||||
|         self, data_missing_for_sorting, ascending, sort_by_key | ||||
|     ): | ||||
|         ser = pd.Series(data_missing_for_sorting) | ||||
|         result = ser.sort_values(ascending=ascending, key=sort_by_key) | ||||
|         if ascending: | ||||
|             expected = ser.iloc[[2, 0, 1]] | ||||
|         else: | ||||
|             expected = ser.iloc[[0, 2, 1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     def test_sort_values_frame(self, data_for_sorting, ascending): | ||||
|         df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting}) | ||||
|         result = df.sort_values(["A", "B"]) | ||||
|         expected = pd.DataFrame( | ||||
|             {"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("keep", ["first", "last", False]) | ||||
|     def test_duplicated(self, data, keep): | ||||
|         arr = data.take([0, 1, 0, 1]) | ||||
|         result = arr.duplicated(keep=keep) | ||||
|         if keep == "first": | ||||
|             expected = np.array([False, False, True, True]) | ||||
|         elif keep == "last": | ||||
|             expected = np.array([True, True, False, False]) | ||||
|         else: | ||||
|             expected = np.array([True, True, True, True]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) | ||||
|     @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) | ||||
|     def test_unique(self, data, box, method): | ||||
|         duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype)) | ||||
|  | ||||
|         result = method(duplicated) | ||||
|  | ||||
|         assert len(result) == 1 | ||||
|         assert isinstance(result, type(data)) | ||||
|         assert result[0] == duplicated[0] | ||||
|  | ||||
|     def test_factorize(self, data_for_grouping): | ||||
|         codes, uniques = pd.factorize(data_for_grouping, use_na_sentinel=True) | ||||
|  | ||||
|         is_bool = data_for_grouping.dtype._is_boolean | ||||
|         if is_bool: | ||||
|             # only 2 unique values | ||||
|             expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 0], dtype=np.intp) | ||||
|             expected_uniques = data_for_grouping.take([0, 4]) | ||||
|         else: | ||||
|             expected_codes = np.array([0, 0, -1, -1, 1, 1, 0, 2], dtype=np.intp) | ||||
|             expected_uniques = data_for_grouping.take([0, 4, 7]) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|         tm.assert_extension_array_equal(uniques, expected_uniques) | ||||
|  | ||||
|     def test_factorize_equivalence(self, data_for_grouping): | ||||
|         codes_1, uniques_1 = pd.factorize(data_for_grouping, use_na_sentinel=True) | ||||
|         codes_2, uniques_2 = data_for_grouping.factorize(use_na_sentinel=True) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(codes_1, codes_2) | ||||
|         tm.assert_extension_array_equal(uniques_1, uniques_2) | ||||
|         assert len(uniques_1) == len(pd.unique(uniques_1)) | ||||
|         assert uniques_1.dtype == data_for_grouping.dtype | ||||
|  | ||||
|     def test_factorize_empty(self, data): | ||||
|         codes, uniques = pd.factorize(data[:0]) | ||||
|         expected_codes = np.array([], dtype=np.intp) | ||||
|         expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|         tm.assert_extension_array_equal(uniques, expected_uniques) | ||||
|  | ||||
|     def test_fillna_copy_frame(self, data_missing): | ||||
|         arr = data_missing.take([1, 1]) | ||||
|         df = pd.DataFrame({"A": arr}) | ||||
|         df_orig = df.copy() | ||||
|  | ||||
|         filled_val = df.iloc[0, 0] | ||||
|         result = df.fillna(filled_val) | ||||
|  | ||||
|         result.iloc[0, 0] = filled_val | ||||
|  | ||||
|         tm.assert_frame_equal(df, df_orig) | ||||
|  | ||||
|     def test_fillna_copy_series(self, data_missing): | ||||
|         arr = data_missing.take([1, 1]) | ||||
|         ser = pd.Series(arr, copy=False) | ||||
|         ser_orig = ser.copy() | ||||
|  | ||||
|         filled_val = ser[0] | ||||
|         result = ser.fillna(filled_val) | ||||
|         result.iloc[0] = filled_val | ||||
|  | ||||
|         tm.assert_series_equal(ser, ser_orig) | ||||
|  | ||||
|     def test_fillna_length_mismatch(self, data_missing): | ||||
|         msg = "Length of 'value' does not match." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             data_missing.fillna(data_missing.take([1])) | ||||
|  | ||||
|     # Subclasses can override if we expect e.g Sparse[bool], boolean, pyarrow[bool] | ||||
|     _combine_le_expected_dtype: Dtype = NumpyEADtype("bool") | ||||
|  | ||||
|     def test_combine_le(self, data_repeated): | ||||
|         # GH 20825 | ||||
|         # Test that combine works when doing a <= (le) comparison | ||||
|         orig_data1, orig_data2 = data_repeated(2) | ||||
|         s1 = pd.Series(orig_data1) | ||||
|         s2 = pd.Series(orig_data2) | ||||
|         result = s1.combine(s2, lambda x1, x2: x1 <= x2) | ||||
|         expected = pd.Series( | ||||
|             pd.array( | ||||
|                 [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], | ||||
|                 dtype=self._combine_le_expected_dtype, | ||||
|             ) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         val = s1.iloc[0] | ||||
|         result = s1.combine(val, lambda x1, x2: x1 <= x2) | ||||
|         expected = pd.Series( | ||||
|             pd.array( | ||||
|                 [a <= val for a in list(orig_data1)], | ||||
|                 dtype=self._combine_le_expected_dtype, | ||||
|             ) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_combine_add(self, data_repeated): | ||||
|         # GH 20825 | ||||
|         orig_data1, orig_data2 = data_repeated(2) | ||||
|         s1 = pd.Series(orig_data1) | ||||
|         s2 = pd.Series(orig_data2) | ||||
|  | ||||
|         # Check if the operation is supported pointwise for our scalars. If not, | ||||
|         #  we will expect Series.combine to raise as well. | ||||
|         try: | ||||
|             with np.errstate(over="ignore"): | ||||
|                 expected = pd.Series( | ||||
|                     orig_data1._from_sequence( | ||||
|                         [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] | ||||
|                     ) | ||||
|                 ) | ||||
|         except TypeError: | ||||
|             # If the operation is not supported pointwise for our scalars, | ||||
|             #  then Series.combine should also raise | ||||
|             with pytest.raises(TypeError): | ||||
|                 s1.combine(s2, lambda x1, x2: x1 + x2) | ||||
|             return | ||||
|  | ||||
|         result = s1.combine(s2, lambda x1, x2: x1 + x2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         val = s1.iloc[0] | ||||
|         result = s1.combine(val, lambda x1, x2: x1 + x2) | ||||
|         expected = pd.Series( | ||||
|             orig_data1._from_sequence([a + val for a in list(orig_data1)]) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_combine_first(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/24147 | ||||
|         a = pd.Series(data[:3]) | ||||
|         b = pd.Series(data[2:5], index=[2, 3, 4]) | ||||
|         result = a.combine_first(b) | ||||
|         expected = pd.Series(data[:5]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("frame", [True, False]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "periods, indices", | ||||
|         [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], | ||||
|     ) | ||||
|     def test_container_shift(self, data, frame, periods, indices): | ||||
|         # https://github.com/pandas-dev/pandas/issues/22386 | ||||
|         subset = data[:5] | ||||
|         data = pd.Series(subset, name="A") | ||||
|         expected = pd.Series(subset.take(indices, allow_fill=True), name="A") | ||||
|  | ||||
|         if frame: | ||||
|             result = data.to_frame(name="A").assign(B=1).shift(periods) | ||||
|             expected = pd.concat( | ||||
|                 [expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1 | ||||
|             ) | ||||
|             compare = tm.assert_frame_equal | ||||
|         else: | ||||
|             result = data.shift(periods) | ||||
|             compare = tm.assert_series_equal | ||||
|  | ||||
|         compare(result, expected) | ||||
|  | ||||
|     def test_shift_0_periods(self, data): | ||||
|         # GH#33856 shifting with periods=0 should return a copy, not same obj | ||||
|         result = data.shift(0) | ||||
|         assert data[0] != data[1]  # otherwise below is invalid | ||||
|         data[0] = data[1] | ||||
|         assert result[0] != result[1]  # i.e. not the same object/view | ||||
|  | ||||
|     @pytest.mark.parametrize("periods", [1, -2]) | ||||
|     def test_diff(self, data, periods): | ||||
|         data = data[:5] | ||||
|         if is_bool_dtype(data.dtype): | ||||
|             op = operator.xor | ||||
|         else: | ||||
|             op = operator.sub | ||||
|         try: | ||||
|             # does this array implement ops? | ||||
|             op(data, data) | ||||
|         except Exception: | ||||
|             pytest.skip(f"{type(data)} does not support diff") | ||||
|         s = pd.Series(data) | ||||
|         result = s.diff(periods) | ||||
|         expected = pd.Series(op(data, data.shift(periods))) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         df = pd.DataFrame({"A": data, "B": [1.0] * 5}) | ||||
|         result = df.diff(periods) | ||||
|         if periods == 1: | ||||
|             b = [np.nan, 0, 0, 0, 0] | ||||
|         else: | ||||
|             b = [0, 0, 0, np.nan, np.nan] | ||||
|         expected = pd.DataFrame({"A": expected, "B": b}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "periods, indices", | ||||
|         [[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]], | ||||
|     ) | ||||
|     def test_shift_non_empty_array(self, data, periods, indices): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23911 | ||||
|         subset = data[:2] | ||||
|         result = subset.shift(periods) | ||||
|         expected = subset.take(indices, allow_fill=True) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4]) | ||||
|     def test_shift_empty_array(self, data, periods): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23911 | ||||
|         empty = data[:0] | ||||
|         result = empty.shift(periods) | ||||
|         expected = empty | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_shift_zero_copies(self, data): | ||||
|         # GH#31502 | ||||
|         result = data.shift(0) | ||||
|         assert result is not data | ||||
|  | ||||
|         result = data[:0].shift(2) | ||||
|         assert result is not data | ||||
|  | ||||
|     def test_shift_fill_value(self, data): | ||||
|         arr = data[:4] | ||||
|         fill_value = data[0] | ||||
|         result = arr.shift(1, fill_value=fill_value) | ||||
|         expected = data.take([0, 0, 1, 2]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = arr.shift(-2, fill_value=fill_value) | ||||
|         expected = data.take([2, 3, 0, 0]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_not_hashable(self, data): | ||||
|         # We are in general mutable, so not hashable | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             hash(data) | ||||
|  | ||||
|     def test_hash_pandas_object_works(self, data, as_frame): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23066 | ||||
|         data = pd.Series(data) | ||||
|         if as_frame: | ||||
|             data = data.to_frame() | ||||
|         a = pd.util.hash_pandas_object(data) | ||||
|         b = pd.util.hash_pandas_object(data) | ||||
|         tm.assert_equal(a, b) | ||||
|  | ||||
|     def test_searchsorted(self, data_for_sorting, as_series): | ||||
|         if data_for_sorting.dtype._is_boolean: | ||||
|             return self._test_searchsorted_bool_dtypes(data_for_sorting, as_series) | ||||
|  | ||||
|         b, c, a = data_for_sorting | ||||
|         arr = data_for_sorting.take([2, 0, 1])  # to get [a, b, c] | ||||
|  | ||||
|         if as_series: | ||||
|             arr = pd.Series(arr) | ||||
|         assert arr.searchsorted(a) == 0 | ||||
|         assert arr.searchsorted(a, side="right") == 1 | ||||
|  | ||||
|         assert arr.searchsorted(b) == 1 | ||||
|         assert arr.searchsorted(b, side="right") == 2 | ||||
|  | ||||
|         assert arr.searchsorted(c) == 2 | ||||
|         assert arr.searchsorted(c, side="right") == 3 | ||||
|  | ||||
|         result = arr.searchsorted(arr.take([0, 2])) | ||||
|         expected = np.array([0, 2], dtype=np.intp) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # sorter | ||||
|         sorter = np.array([1, 2, 0]) | ||||
|         assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 | ||||
|  | ||||
|     def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series): | ||||
|         # We call this from test_searchsorted in cases where we have a | ||||
|         #  boolean-like dtype. The non-bool test assumes we have more than 2 | ||||
|         #  unique values. | ||||
|         dtype = data_for_sorting.dtype | ||||
|         data_for_sorting = pd.array([True, False], dtype=dtype) | ||||
|         b, a = data_for_sorting | ||||
|         arr = type(data_for_sorting)._from_sequence([a, b]) | ||||
|  | ||||
|         if as_series: | ||||
|             arr = pd.Series(arr) | ||||
|         assert arr.searchsorted(a) == 0 | ||||
|         assert arr.searchsorted(a, side="right") == 1 | ||||
|  | ||||
|         assert arr.searchsorted(b) == 1 | ||||
|         assert arr.searchsorted(b, side="right") == 2 | ||||
|  | ||||
|         result = arr.searchsorted(arr.take([0, 1])) | ||||
|         expected = np.array([0, 1], dtype=np.intp) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # sorter | ||||
|         sorter = np.array([1, 0]) | ||||
|         assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 | ||||
|  | ||||
|     def test_where_series(self, data, na_value, as_frame): | ||||
|         assert data[0] != data[1] | ||||
|         cls = type(data) | ||||
|         a, b = data[:2] | ||||
|  | ||||
|         orig = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) | ||||
|         ser = orig.copy() | ||||
|         cond = np.array([True, True, False, False]) | ||||
|  | ||||
|         if as_frame: | ||||
|             ser = ser.to_frame(name="a") | ||||
|             cond = cond.reshape(-1, 1) | ||||
|  | ||||
|         result = ser.where(cond) | ||||
|         expected = pd.Series( | ||||
|             cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype) | ||||
|         ) | ||||
|  | ||||
|         if as_frame: | ||||
|             expected = expected.to_frame(name="a") | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         ser.mask(~cond, inplace=True) | ||||
|         tm.assert_equal(ser, expected) | ||||
|  | ||||
|         # array other | ||||
|         ser = orig.copy() | ||||
|         if as_frame: | ||||
|             ser = ser.to_frame(name="a") | ||||
|         cond = np.array([True, False, True, True]) | ||||
|         other = cls._from_sequence([a, b, a, b], dtype=data.dtype) | ||||
|         if as_frame: | ||||
|             other = pd.DataFrame({"a": other}) | ||||
|             cond = pd.DataFrame({"a": cond}) | ||||
|         result = ser.where(cond, other) | ||||
|         expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) | ||||
|         if as_frame: | ||||
|             expected = expected.to_frame(name="a") | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         ser.mask(~cond, other, inplace=True) | ||||
|         tm.assert_equal(ser, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) | ||||
|     def test_repeat(self, data, repeats, as_series, use_numpy): | ||||
|         arr = type(data)._from_sequence(data[:3], dtype=data.dtype) | ||||
|         if as_series: | ||||
|             arr = pd.Series(arr) | ||||
|  | ||||
|         result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) | ||||
|  | ||||
|         repeats = [repeats] * 3 if isinstance(repeats, int) else repeats | ||||
|         expected = [x for x, n in zip(arr, repeats) for _ in range(n)] | ||||
|         expected = type(data)._from_sequence(expected, dtype=data.dtype) | ||||
|         if as_series: | ||||
|             expected = pd.Series(expected, index=arr.index.repeat(repeats)) | ||||
|  | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "repeats, kwargs, error, msg", | ||||
|         [ | ||||
|             (2, {"axis": 1}, ValueError, "axis"), | ||||
|             (-1, {}, ValueError, "negative"), | ||||
|             ([1, 2], {}, ValueError, "shape"), | ||||
|             (2, {"foo": "bar"}, TypeError, "'foo'"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): | ||||
|         with pytest.raises(error, match=msg): | ||||
|             if use_numpy: | ||||
|                 np.repeat(data, repeats, **kwargs) | ||||
|             else: | ||||
|                 data.repeat(repeats, **kwargs) | ||||
|  | ||||
|     def test_delete(self, data): | ||||
|         result = data.delete(0) | ||||
|         expected = data[1:] | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = data.delete([1, 3]) | ||||
|         expected = data._concat_same_type([data[[0]], data[[2]], data[4:]]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_insert(self, data): | ||||
|         # insert at the beginning | ||||
|         result = data[1:].insert(0, data[0]) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         result = data[1:].insert(-len(data[1:]), data[0]) | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         # insert at the middle | ||||
|         result = data[:-1].insert(4, data[-1]) | ||||
|  | ||||
|         taker = np.arange(len(data)) | ||||
|         taker[5:] = taker[4:-1] | ||||
|         taker[4] = len(data) - 1 | ||||
|         expected = data.take(taker) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_insert_invalid(self, data, invalid_scalar): | ||||
|         item = invalid_scalar | ||||
|  | ||||
|         with pytest.raises((TypeError, ValueError)): | ||||
|             data.insert(0, item) | ||||
|  | ||||
|         with pytest.raises((TypeError, ValueError)): | ||||
|             data.insert(4, item) | ||||
|  | ||||
|         with pytest.raises((TypeError, ValueError)): | ||||
|             data.insert(len(data) - 1, item) | ||||
|  | ||||
|     def test_insert_invalid_loc(self, data): | ||||
|         ub = len(data) | ||||
|  | ||||
|         with pytest.raises(IndexError): | ||||
|             data.insert(ub + 1, data[0]) | ||||
|  | ||||
|         with pytest.raises(IndexError): | ||||
|             data.insert(-ub - 1, data[0]) | ||||
|  | ||||
|         with pytest.raises(TypeError): | ||||
|             # we expect TypeError here instead of IndexError to match np.insert | ||||
|             data.insert(1.5, data[0]) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) | ||||
|     def test_equals(self, data, na_value, as_series, box): | ||||
|         data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) | ||||
|         data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) | ||||
|  | ||||
|         data = tm.box_expected(data, box, transpose=False) | ||||
|         data2 = tm.box_expected(data2, box, transpose=False) | ||||
|         data_na = tm.box_expected(data_na, box, transpose=False) | ||||
|  | ||||
|         # we are asserting with `is True/False` explicitly, to test that the | ||||
|         # result is an actual Python bool, and not something "truthy" | ||||
|  | ||||
|         assert data.equals(data) is True | ||||
|         assert data.equals(data.copy()) is True | ||||
|  | ||||
|         # unequal other data | ||||
|         assert data.equals(data2) is False | ||||
|         assert data.equals(data_na) is False | ||||
|  | ||||
|         # different length | ||||
|         assert data[:2].equals(data[:3]) is False | ||||
|  | ||||
|         # empty are equal | ||||
|         assert data[:0].equals(data[:0]) is True | ||||
|  | ||||
|         # other types | ||||
|         assert data.equals(None) is False | ||||
|         assert data[[0]].equals(data[0]) is False | ||||
|  | ||||
|     def test_equals_same_data_different_object(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/34660 | ||||
|         assert pd.Series(data).equals(pd.Series(data)) | ||||
| @ -0,0 +1,190 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class BaseMissingTests: | ||||
|     def test_isna(self, data_missing): | ||||
|         expected = np.array([True, False]) | ||||
|  | ||||
|         result = pd.isna(data_missing) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = pd.Series(data_missing).isna() | ||||
|         expected = pd.Series(expected) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH 21189 | ||||
|         result = pd.Series(data_missing).drop([0, 1]).isna() | ||||
|         expected = pd.Series([], dtype=bool) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("na_func", ["isna", "notna"]) | ||||
|     def test_isna_returns_copy(self, data_missing, na_func): | ||||
|         result = pd.Series(data_missing) | ||||
|         expected = result.copy() | ||||
|         mask = getattr(result, na_func)() | ||||
|         if isinstance(mask.dtype, pd.SparseDtype): | ||||
|             # TODO: GH 57739 | ||||
|             mask = np.array(mask) | ||||
|             mask.flags.writeable = True | ||||
|  | ||||
|         mask[:] = True | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_dropna_array(self, data_missing): | ||||
|         result = data_missing.dropna() | ||||
|         expected = data_missing[[1]] | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_dropna_series(self, data_missing): | ||||
|         ser = pd.Series(data_missing) | ||||
|         result = ser.dropna() | ||||
|         expected = ser.iloc[[1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_dropna_frame(self, data_missing): | ||||
|         df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object)) | ||||
|  | ||||
|         # defaults | ||||
|         result = df.dropna() | ||||
|         expected = df.iloc[[1]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # axis = 1 | ||||
|         result = df.dropna(axis="columns") | ||||
|         expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([])) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # multiple | ||||
|         df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]}) | ||||
|         result = df.dropna() | ||||
|         expected = df.iloc[:0] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_scalar(self, data_missing): | ||||
|         valid = data_missing[1] | ||||
|         result = data_missing.fillna(valid) | ||||
|         expected = data_missing.fillna(valid) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:Series.fillna with 'method' is deprecated:FutureWarning" | ||||
|     ) | ||||
|     def test_fillna_limit_pad(self, data_missing): | ||||
|         arr = data_missing.take([1, 0, 0, 0, 1]) | ||||
|         result = pd.Series(arr).ffill(limit=2) | ||||
|         expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "limit_area, input_ilocs, expected_ilocs", | ||||
|         [ | ||||
|             ("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]), | ||||
|             ("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]), | ||||
|             ("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]), | ||||
|             ("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]), | ||||
|             ("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]), | ||||
|             ("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]), | ||||
|             ("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]), | ||||
|             ("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_ffill_limit_area( | ||||
|         self, data_missing, limit_area, input_ilocs, expected_ilocs | ||||
|     ): | ||||
|         # GH#56616 | ||||
|         arr = data_missing.take(input_ilocs) | ||||
|         result = pd.Series(arr).ffill(limit_area=limit_area) | ||||
|         expected = pd.Series(data_missing.take(expected_ilocs)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:Series.fillna with 'method' is deprecated:FutureWarning" | ||||
|     ) | ||||
|     def test_fillna_limit_backfill(self, data_missing): | ||||
|         arr = data_missing.take([1, 0, 0, 0, 1]) | ||||
|         result = pd.Series(arr).fillna(method="backfill", limit=2) | ||||
|         expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_no_op_returns_copy(self, data): | ||||
|         data = data[~data.isna()] | ||||
|  | ||||
|         valid = data[0] | ||||
|         result = data.fillna(valid) | ||||
|         assert result is not data | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|         result = data._pad_or_backfill(method="backfill") | ||||
|         assert result is not data | ||||
|         tm.assert_extension_array_equal(result, data) | ||||
|  | ||||
|     def test_fillna_series(self, data_missing): | ||||
|         fill_value = data_missing[1] | ||||
|         ser = pd.Series(data_missing) | ||||
|  | ||||
|         result = ser.fillna(fill_value) | ||||
|         expected = pd.Series( | ||||
|             data_missing._from_sequence( | ||||
|                 [fill_value, fill_value], dtype=data_missing.dtype | ||||
|             ) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Fill with a series | ||||
|         result = ser.fillna(expected) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Fill with a series not affecting the missing values | ||||
|         result = ser.fillna(ser) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|     def test_fillna_series_method(self, data_missing, fillna_method): | ||||
|         fill_value = data_missing[1] | ||||
|  | ||||
|         if fillna_method == "ffill": | ||||
|             data_missing = data_missing[::-1] | ||||
|  | ||||
|         result = getattr(pd.Series(data_missing), fillna_method)() | ||||
|         expected = pd.Series( | ||||
|             data_missing._from_sequence( | ||||
|                 [fill_value, fill_value], dtype=data_missing.dtype | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_frame(self, data_missing): | ||||
|         fill_value = data_missing[1] | ||||
|  | ||||
|         result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) | ||||
|  | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "A": data_missing._from_sequence( | ||||
|                     [fill_value, fill_value], dtype=data_missing.dtype | ||||
|                 ), | ||||
|                 "B": [1, 2], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_fillna_fill_other(self, data): | ||||
|         result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) | ||||
|  | ||||
|         expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_use_inf_as_na_no_effect(self, data_missing): | ||||
|         ser = pd.Series(data_missing) | ||||
|         expected = ser.isna() | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("mode.use_inf_as_na", True): | ||||
|                 result = ser.isna() | ||||
|         tm.assert_series_equal(result, expected) | ||||
							
								
								
									
										289
									
								
								lib/python3.11/site-packages/pandas/tests/extension/base/ops.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										289
									
								
								lib/python3.11/site-packages/pandas/tests/extension/base/ops.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,289 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from typing import final | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_string_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core import ops | ||||
|  | ||||
|  | ||||
| class BaseOpsUtil: | ||||
|     series_scalar_exc: type[Exception] | None = TypeError | ||||
|     frame_scalar_exc: type[Exception] | None = TypeError | ||||
|     series_array_exc: type[Exception] | None = TypeError | ||||
|     divmod_exc: type[Exception] | None = TypeError | ||||
|  | ||||
|     def _get_expected_exception( | ||||
|         self, op_name: str, obj, other | ||||
|     ) -> type[Exception] | tuple[type[Exception], ...] | None: | ||||
|         # Find the Exception, if any we expect to raise calling | ||||
|         #  obj.__op_name__(other) | ||||
|  | ||||
|         # The self.obj_bar_exc pattern isn't great in part because it can depend | ||||
|         #  on op_name or dtypes, but we use it here for backward-compatibility. | ||||
|         if op_name in ["__divmod__", "__rdivmod__"]: | ||||
|             result = self.divmod_exc | ||||
|         elif isinstance(obj, pd.Series) and isinstance(other, pd.Series): | ||||
|             result = self.series_array_exc | ||||
|         elif isinstance(obj, pd.Series): | ||||
|             result = self.series_scalar_exc | ||||
|         else: | ||||
|             result = self.frame_scalar_exc | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): | ||||
|         # In _check_op we check that the result of a pointwise operation | ||||
|         #  (found via _combine) matches the result of the vectorized | ||||
|         #  operation obj.__op_name__(other). | ||||
|         #  In some cases pandas dtype inference on the scalar result may not | ||||
|         #  give a matching dtype even if both operations are behaving "correctly". | ||||
|         #  In these cases, do extra required casting here. | ||||
|         return pointwise_result | ||||
|  | ||||
|     def get_op_from_name(self, op_name: str): | ||||
|         return tm.get_op_from_name(op_name) | ||||
|  | ||||
|     # Subclasses are not expected to need to override check_opname, _check_op, | ||||
|     #  _check_divmod_op, or _combine. | ||||
|     #  Ideally any relevant overriding can be done in _cast_pointwise_result, | ||||
|     #  get_op_from_name, and the specification of `exc`. If you find a use | ||||
|     #  case that still requires overriding _check_op or _combine, please let | ||||
|     #  us know at github.com/pandas-dev/pandas/issues | ||||
|     @final | ||||
|     def check_opname(self, ser: pd.Series, op_name: str, other): | ||||
|         exc = self._get_expected_exception(op_name, ser, other) | ||||
|         op = self.get_op_from_name(op_name) | ||||
|  | ||||
|         self._check_op(ser, op, other, op_name, exc) | ||||
|  | ||||
|     # see comment on check_opname | ||||
|     @final | ||||
|     def _combine(self, obj, other, op): | ||||
|         if isinstance(obj, pd.DataFrame): | ||||
|             if len(obj.columns) != 1: | ||||
|                 raise NotImplementedError | ||||
|             expected = obj.iloc[:, 0].combine(other, op).to_frame() | ||||
|         else: | ||||
|             expected = obj.combine(other, op) | ||||
|         return expected | ||||
|  | ||||
|     # see comment on check_opname | ||||
|     @final | ||||
|     def _check_op( | ||||
|         self, ser: pd.Series, op, other, op_name: str, exc=NotImplementedError | ||||
|     ): | ||||
|         # Check that the Series/DataFrame arithmetic/comparison method matches | ||||
|         #  the pointwise result from _combine. | ||||
|  | ||||
|         if exc is None: | ||||
|             result = op(ser, other) | ||||
|             expected = self._combine(ser, other, op) | ||||
|             expected = self._cast_pointwise_result(op_name, ser, other, expected) | ||||
|             assert isinstance(result, type(ser)) | ||||
|             tm.assert_equal(result, expected) | ||||
|         else: | ||||
|             with pytest.raises(exc): | ||||
|                 op(ser, other) | ||||
|  | ||||
|     # see comment on check_opname | ||||
|     @final | ||||
|     def _check_divmod_op(self, ser: pd.Series, op, other): | ||||
|         # check that divmod behavior matches behavior of floordiv+mod | ||||
|         if op is divmod: | ||||
|             exc = self._get_expected_exception("__divmod__", ser, other) | ||||
|         else: | ||||
|             exc = self._get_expected_exception("__rdivmod__", ser, other) | ||||
|         if exc is None: | ||||
|             result_div, result_mod = op(ser, other) | ||||
|             if op is divmod: | ||||
|                 expected_div, expected_mod = ser // other, ser % other | ||||
|             else: | ||||
|                 expected_div, expected_mod = other // ser, other % ser | ||||
|             tm.assert_series_equal(result_div, expected_div) | ||||
|             tm.assert_series_equal(result_mod, expected_mod) | ||||
|         else: | ||||
|             with pytest.raises(exc): | ||||
|                 divmod(ser, other) | ||||
|  | ||||
|  | ||||
| class BaseArithmeticOpsTests(BaseOpsUtil): | ||||
|     """ | ||||
|     Various Series and DataFrame arithmetic ops methods. | ||||
|  | ||||
|     Subclasses supporting various ops should set the class variables | ||||
|     to indicate that they support ops of that kind | ||||
|  | ||||
|     * series_scalar_exc = TypeError | ||||
|     * frame_scalar_exc = TypeError | ||||
|     * series_array_exc = TypeError | ||||
|     * divmod_exc = TypeError | ||||
|     """ | ||||
|  | ||||
|     series_scalar_exc: type[Exception] | None = TypeError | ||||
|     frame_scalar_exc: type[Exception] | None = TypeError | ||||
|     series_array_exc: type[Exception] | None = TypeError | ||||
|     divmod_exc: type[Exception] | None = TypeError | ||||
|  | ||||
|     def test_arith_series_with_scalar(self, data, all_arithmetic_operators): | ||||
|         # series & scalar | ||||
|         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): | ||||
|             pytest.skip("Skip testing Python string formatting") | ||||
|  | ||||
|         op_name = all_arithmetic_operators | ||||
|         ser = pd.Series(data) | ||||
|         self.check_opname(ser, op_name, ser.iloc[0]) | ||||
|  | ||||
|     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): | ||||
|         # frame & scalar | ||||
|         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): | ||||
|             pytest.skip("Skip testing Python string formatting") | ||||
|  | ||||
|         op_name = all_arithmetic_operators | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         self.check_opname(df, op_name, data[0]) | ||||
|  | ||||
|     def test_arith_series_with_array(self, data, all_arithmetic_operators): | ||||
|         # ndarray & other series | ||||
|         op_name = all_arithmetic_operators | ||||
|         ser = pd.Series(data) | ||||
|         self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) | ||||
|  | ||||
|     def test_divmod(self, data): | ||||
|         ser = pd.Series(data) | ||||
|         self._check_divmod_op(ser, divmod, 1) | ||||
|         self._check_divmod_op(1, ops.rdivmod, ser) | ||||
|  | ||||
|     def test_divmod_series_array(self, data, data_for_twos): | ||||
|         ser = pd.Series(data) | ||||
|         self._check_divmod_op(ser, divmod, data) | ||||
|  | ||||
|         other = data_for_twos | ||||
|         self._check_divmod_op(other, ops.rdivmod, ser) | ||||
|  | ||||
|         other = pd.Series(other) | ||||
|         self._check_divmod_op(other, ops.rdivmod, ser) | ||||
|  | ||||
|     def test_add_series_with_extension_array(self, data): | ||||
|         # Check adding an ExtensionArray to a Series of the same dtype matches | ||||
|         # the behavior of adding the arrays directly and then wrapping in a | ||||
|         # Series. | ||||
|  | ||||
|         ser = pd.Series(data) | ||||
|  | ||||
|         exc = self._get_expected_exception("__add__", ser, data) | ||||
|         if exc is not None: | ||||
|             with pytest.raises(exc): | ||||
|                 ser + data | ||||
|             return | ||||
|  | ||||
|         result = ser + data | ||||
|         expected = pd.Series(data + data) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame, pd.Index]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "op_name", | ||||
|         [ | ||||
|             x | ||||
|             for x in tm.arithmetic_dunder_methods + tm.comparison_dunder_methods | ||||
|             if not x.startswith("__r") | ||||
|         ], | ||||
|     ) | ||||
|     def test_direct_arith_with_ndframe_returns_not_implemented( | ||||
|         self, data, box, op_name | ||||
|     ): | ||||
|         # EAs should return NotImplemented for ops with Series/DataFrame/Index | ||||
|         # Pandas takes care of unboxing the series and calling the EA's op. | ||||
|         other = box(data) | ||||
|  | ||||
|         if hasattr(data, op_name): | ||||
|             result = getattr(data, op_name)(other) | ||||
|             assert result is NotImplemented | ||||
|  | ||||
|  | ||||
| class BaseComparisonOpsTests(BaseOpsUtil): | ||||
|     """Various Series and DataFrame comparison ops methods.""" | ||||
|  | ||||
|     def _compare_other(self, ser: pd.Series, data, op, other): | ||||
|         if op.__name__ in ["eq", "ne"]: | ||||
|             # comparison should match point-wise comparisons | ||||
|             result = op(ser, other) | ||||
|             expected = ser.combine(other, op) | ||||
|             expected = self._cast_pointwise_result(op.__name__, ser, other, expected) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         else: | ||||
|             exc = None | ||||
|             try: | ||||
|                 result = op(ser, other) | ||||
|             except Exception as err: | ||||
|                 exc = err | ||||
|  | ||||
|             if exc is None: | ||||
|                 # Didn't error, then should match pointwise behavior | ||||
|                 expected = ser.combine(other, op) | ||||
|                 expected = self._cast_pointwise_result( | ||||
|                     op.__name__, ser, other, expected | ||||
|                 ) | ||||
|                 tm.assert_series_equal(result, expected) | ||||
|             else: | ||||
|                 with pytest.raises(type(exc)): | ||||
|                     ser.combine(other, op) | ||||
|  | ||||
|     def test_compare_scalar(self, data, comparison_op): | ||||
|         ser = pd.Series(data) | ||||
|         self._compare_other(ser, data, comparison_op, 0) | ||||
|  | ||||
|     def test_compare_array(self, data, comparison_op): | ||||
|         ser = pd.Series(data) | ||||
|         other = pd.Series([data[0]] * len(data), dtype=data.dtype) | ||||
|         self._compare_other(ser, data, comparison_op, other) | ||||
|  | ||||
|  | ||||
| class BaseUnaryOpsTests(BaseOpsUtil): | ||||
|     def test_invert(self, data): | ||||
|         ser = pd.Series(data, name="name") | ||||
|         try: | ||||
|             # 10 is an arbitrary choice here, just avoid iterating over | ||||
|             #  the whole array to trim test runtime | ||||
|             [~x for x in data[:10]] | ||||
|         except TypeError: | ||||
|             # scalars don't support invert -> we don't expect the vectorized | ||||
|             #  operation to succeed | ||||
|             with pytest.raises(TypeError): | ||||
|                 ~ser | ||||
|             with pytest.raises(TypeError): | ||||
|                 ~data | ||||
|         else: | ||||
|             # Note we do not reuse the pointwise result to construct expected | ||||
|             #  because python semantics for negating bools are weird see GH#54569 | ||||
|             result = ~ser | ||||
|             expected = pd.Series(~data, name="name") | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) | ||||
|     def test_unary_ufunc_dunder_equivalence(self, data, ufunc): | ||||
|         # the dunder __pos__ works if and only if np.positive works, | ||||
|         #  same for __neg__/np.negative and __abs__/np.abs | ||||
|         attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[ | ||||
|             ufunc | ||||
|         ] | ||||
|  | ||||
|         exc = None | ||||
|         try: | ||||
|             result = getattr(data, attr)() | ||||
|         except Exception as err: | ||||
|             exc = err | ||||
|  | ||||
|             # if __pos__ raised, then so should the ufunc | ||||
|             with pytest.raises((type(exc), TypeError)): | ||||
|                 ufunc(data) | ||||
|         else: | ||||
|             alt = ufunc(data) | ||||
|             tm.assert_extension_array_equal(result, alt) | ||||
| @ -0,0 +1,41 @@ | ||||
| import io | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| class BasePrintingTests: | ||||
|     """Tests checking the formatting of your EA when printed.""" | ||||
|  | ||||
|     @pytest.mark.parametrize("size", ["big", "small"]) | ||||
|     def test_array_repr(self, data, size): | ||||
|         if size == "small": | ||||
|             data = data[:5] | ||||
|         else: | ||||
|             data = type(data)._concat_same_type([data] * 5) | ||||
|  | ||||
|         result = repr(data) | ||||
|         assert type(data).__name__ in result | ||||
|         assert f"Length: {len(data)}" in result | ||||
|         assert str(data.dtype) in result | ||||
|         if size == "big": | ||||
|             assert "..." in result | ||||
|  | ||||
|     def test_array_repr_unicode(self, data): | ||||
|         result = str(data) | ||||
|         assert isinstance(result, str) | ||||
|  | ||||
|     def test_series_repr(self, data): | ||||
|         ser = pd.Series(data) | ||||
|         assert data.dtype.name in repr(ser) | ||||
|  | ||||
|     def test_dataframe_repr(self, data): | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         repr(df) | ||||
|  | ||||
|     def test_dtype_name_in_info(self, data): | ||||
|         buf = io.StringIO() | ||||
|         pd.DataFrame({"A": data}).info(buf=buf) | ||||
|         result = buf.getvalue() | ||||
|         assert data.dtype.name in result | ||||
| @ -0,0 +1,153 @@ | ||||
| from typing import final | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_numeric_dtype | ||||
|  | ||||
|  | ||||
| class BaseReduceTests: | ||||
|     """ | ||||
|     Reduction specific tests. Generally these only | ||||
|     make sense for numeric/boolean operations. | ||||
|     """ | ||||
|  | ||||
|     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: | ||||
|         # Specify if we expect this reduction to succeed. | ||||
|         return False | ||||
|  | ||||
|     def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): | ||||
|         # We perform the same operation on the np.float64 data and check | ||||
|         #  that the results match. Override if you need to cast to something | ||||
|         #  other than float64. | ||||
|         res_op = getattr(ser, op_name) | ||||
|  | ||||
|         try: | ||||
|             alt = ser.astype("float64") | ||||
|         except (TypeError, ValueError): | ||||
|             # e.g. Interval can't cast (TypeError), StringArray can't cast | ||||
|             #  (ValueError), so let's cast to object and do | ||||
|             #  the reduction pointwise | ||||
|             alt = ser.astype(object) | ||||
|  | ||||
|         exp_op = getattr(alt, op_name) | ||||
|         if op_name == "count": | ||||
|             result = res_op() | ||||
|             expected = exp_op() | ||||
|         else: | ||||
|             result = res_op(skipna=skipna) | ||||
|             expected = exp_op(skipna=skipna) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|     def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): | ||||
|         # Find the expected dtype when the given reduction is done on a DataFrame | ||||
|         # column with this array.  The default assumes float64-like behavior, | ||||
|         # i.e. retains the dtype. | ||||
|         return arr.dtype | ||||
|  | ||||
|     # We anticipate that authors should not need to override check_reduce_frame, | ||||
|     #  but should be able to do any necessary overriding in | ||||
|     #  _get_expected_reduction_dtype. If you have a use case where this | ||||
|     #  does not hold, please let us know at github.com/pandas-dev/pandas/issues. | ||||
|     @final | ||||
|     def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): | ||||
|         # Check that the 2D reduction done in a DataFrame reduction "looks like" | ||||
|         # a wrapped version of the 1D reduction done by Series. | ||||
|         arr = ser.array | ||||
|         df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|         kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} | ||||
|  | ||||
|         cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna) | ||||
|  | ||||
|         # The DataFrame method just calls arr._reduce with keepdims=True, | ||||
|         #  so this first check is perfunctory. | ||||
|         result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs) | ||||
|         result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array | ||||
|         tm.assert_extension_array_equal(result1, result2) | ||||
|  | ||||
|         # Check that the 2D reduction looks like a wrapped version of the | ||||
|         #  1D reduction | ||||
|         if not skipna and ser.isna().any(): | ||||
|             expected = pd.array([pd.NA], dtype=cmp_dtype) | ||||
|         else: | ||||
|             exp_value = getattr(ser.dropna(), op_name)() | ||||
|             expected = pd.array([exp_value], dtype=cmp_dtype) | ||||
|  | ||||
|         tm.assert_extension_array_equal(result1, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): | ||||
|         op_name = all_boolean_reductions | ||||
|         ser = pd.Series(data) | ||||
|  | ||||
|         if not self._supports_reduction(ser, op_name): | ||||
|             # TODO: the message being checked here isn't actually checking anything | ||||
|             msg = ( | ||||
|                 "[Cc]annot perform|Categorical is not ordered for operation|" | ||||
|                 "does not support reduction|" | ||||
|             ) | ||||
|  | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(ser, op_name)(skipna=skipna) | ||||
|  | ||||
|         else: | ||||
|             self.check_reduce(ser, op_name, skipna) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore::RuntimeWarning") | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): | ||||
|         op_name = all_numeric_reductions | ||||
|         ser = pd.Series(data) | ||||
|  | ||||
|         if not self._supports_reduction(ser, op_name): | ||||
|             # TODO: the message being checked here isn't actually checking anything | ||||
|             msg = ( | ||||
|                 "[Cc]annot perform|Categorical is not ordered for operation|" | ||||
|                 "does not support reduction|" | ||||
|             ) | ||||
|  | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(ser, op_name)(skipna=skipna) | ||||
|  | ||||
|         else: | ||||
|             # min/max with empty produce numpy warnings | ||||
|             self.check_reduce(ser, op_name, skipna) | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_reduce_frame(self, data, all_numeric_reductions, skipna): | ||||
|         op_name = all_numeric_reductions | ||||
|         ser = pd.Series(data) | ||||
|         if not is_numeric_dtype(ser.dtype): | ||||
|             pytest.skip(f"{ser.dtype} is not numeric dtype") | ||||
|  | ||||
|         if op_name in ["count", "kurt", "sem"]: | ||||
|             pytest.skip(f"{op_name} not an array method") | ||||
|  | ||||
|         if not self._supports_reduction(ser, op_name): | ||||
|             pytest.skip(f"Reduction {op_name} not supported for this dtype") | ||||
|  | ||||
|         self.check_reduce_frame(ser, op_name, skipna) | ||||
|  | ||||
|  | ||||
| # TODO(3.0): remove BaseNoReduceTests, BaseNumericReduceTests, | ||||
| #  BaseBooleanReduceTests | ||||
| class BaseNoReduceTests(BaseReduceTests): | ||||
|     """we don't define any reductions""" | ||||
|  | ||||
|  | ||||
| class BaseNumericReduceTests(BaseReduceTests): | ||||
|     # For backward compatibility only, this only runs the numeric reductions | ||||
|     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: | ||||
|         if op_name in ["any", "all"]: | ||||
|             pytest.skip("These are tested in BaseBooleanReduceTests") | ||||
|         return True | ||||
|  | ||||
|  | ||||
| class BaseBooleanReduceTests(BaseReduceTests): | ||||
|     # For backward compatibility only, this only runs the numeric reductions | ||||
|     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: | ||||
|         if op_name not in ["any", "all"]: | ||||
|             pytest.skip("These are tested in BaseNumericReduceTests") | ||||
|         return True | ||||
| @ -0,0 +1,379 @@ | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.extensions import ExtensionArray | ||||
| from pandas.core.internals.blocks import EABackedBlock | ||||
|  | ||||
|  | ||||
| class BaseReshapingTests: | ||||
|     """Tests for reshaping and concatenation.""" | ||||
|  | ||||
|     @pytest.mark.parametrize("in_frame", [True, False]) | ||||
|     def test_concat(self, data, in_frame): | ||||
|         wrapped = pd.Series(data) | ||||
|         if in_frame: | ||||
|             wrapped = pd.DataFrame(wrapped) | ||||
|         result = pd.concat([wrapped, wrapped], ignore_index=True) | ||||
|  | ||||
|         assert len(result) == len(data) * 2 | ||||
|  | ||||
|         if in_frame: | ||||
|             dtype = result.dtypes[0] | ||||
|         else: | ||||
|             dtype = result.dtype | ||||
|  | ||||
|         assert dtype == data.dtype | ||||
|         if hasattr(result._mgr, "blocks"): | ||||
|             assert isinstance(result._mgr.blocks[0], EABackedBlock) | ||||
|         assert isinstance(result._mgr.arrays[0], ExtensionArray) | ||||
|  | ||||
|     @pytest.mark.parametrize("in_frame", [True, False]) | ||||
|     def test_concat_all_na_block(self, data_missing, in_frame): | ||||
|         valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) | ||||
|         na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) | ||||
|         if in_frame: | ||||
|             valid_block = pd.DataFrame({"a": valid_block}) | ||||
|             na_block = pd.DataFrame({"a": na_block}) | ||||
|         result = pd.concat([valid_block, na_block]) | ||||
|         if in_frame: | ||||
|             expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) | ||||
|             tm.assert_frame_equal(result, expected) | ||||
|         else: | ||||
|             expected = pd.Series(data_missing.take([1, 1, 0, 0])) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_mixed_dtypes(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20762 | ||||
|         df1 = pd.DataFrame({"A": data[:3]}) | ||||
|         df2 = pd.DataFrame({"A": [1, 2, 3]}) | ||||
|         df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") | ||||
|         dfs = [df1, df2, df3] | ||||
|  | ||||
|         # dataframes | ||||
|         result = pd.concat(dfs) | ||||
|         expected = pd.concat([x.astype(object) for x in dfs]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # series | ||||
|         result = pd.concat([x["A"] for x in dfs]) | ||||
|         expected = pd.concat([x["A"].astype(object) for x in dfs]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # simple test for just EA and one other | ||||
|         result = pd.concat([df1, df2.astype(object)]) | ||||
|         expected = pd.concat([df1.astype("object"), df2.astype("object")]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = pd.concat([df1["A"], df2["A"].astype(object)]) | ||||
|         expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_concat_columns(self, data, na_value): | ||||
|         df1 = pd.DataFrame({"A": data[:3]}) | ||||
|         df2 = pd.DataFrame({"B": [1, 2, 3]}) | ||||
|  | ||||
|         expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]}) | ||||
|         result = pd.concat([df1, df2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = pd.concat([df1["A"], df2["B"]], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # non-aligned | ||||
|         df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3]) | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), | ||||
|                 "B": [np.nan, 1, 2, 3], | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         result = pd.concat([df1, df2], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         result = pd.concat([df1["A"], df2["B"]], axis=1) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_extension_arrays_copy_false(self, data, na_value): | ||||
|         # GH 20756 | ||||
|         df1 = pd.DataFrame({"A": data[:3]}) | ||||
|         df2 = pd.DataFrame({"B": data[3:7]}) | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), | ||||
|                 "B": data[3:7], | ||||
|             } | ||||
|         ) | ||||
|         result = pd.concat([df1, df2], axis=1, copy=False) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_concat_with_reindex(self, data): | ||||
|         # GH-33027 | ||||
|         a = pd.DataFrame({"a": data[:5]}) | ||||
|         b = pd.DataFrame({"b": data[:5]}) | ||||
|         result = pd.concat([a, b], ignore_index=True) | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "a": data.take(list(range(5)) + ([-1] * 5), allow_fill=True), | ||||
|                 "b": data.take(([-1] * 5) + list(range(5)), allow_fill=True), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_align(self, data, na_value): | ||||
|         a = data[:3] | ||||
|         b = data[2:5] | ||||
|         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) | ||||
|  | ||||
|         # Assumes that the ctor can take a list of scalars of the type | ||||
|         e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype)) | ||||
|         e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype)) | ||||
|         tm.assert_series_equal(r1, e1) | ||||
|         tm.assert_series_equal(r2, e2) | ||||
|  | ||||
|     def test_align_frame(self, data, na_value): | ||||
|         a = data[:3] | ||||
|         b = data[2:5] | ||||
|         r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3])) | ||||
|  | ||||
|         # Assumes that the ctor can take a list of scalars of the type | ||||
|         e1 = pd.DataFrame( | ||||
|             {"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)} | ||||
|         ) | ||||
|         e2 = pd.DataFrame( | ||||
|             {"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)} | ||||
|         ) | ||||
|         tm.assert_frame_equal(r1, e1) | ||||
|         tm.assert_frame_equal(r2, e2) | ||||
|  | ||||
|     def test_align_series_frame(self, data, na_value): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20576 | ||||
|         ser = pd.Series(data, name="a") | ||||
|         df = pd.DataFrame({"col": np.arange(len(ser) + 1)}) | ||||
|         r1, r2 = ser.align(df) | ||||
|  | ||||
|         e1 = pd.Series( | ||||
|             data._from_sequence(list(data) + [na_value], dtype=data.dtype), | ||||
|             name=ser.name, | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(r1, e1) | ||||
|         tm.assert_frame_equal(r2, df) | ||||
|  | ||||
|     def test_set_frame_expand_regular_with_extension(self, data): | ||||
|         df = pd.DataFrame({"A": [1] * len(data)}) | ||||
|         df["B"] = data | ||||
|         expected = pd.DataFrame({"A": [1] * len(data), "B": data}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_set_frame_expand_extension_with_regular(self, data): | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         df["B"] = [1] * len(data) | ||||
|         expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_set_frame_overwrite_object(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20555 | ||||
|         df = pd.DataFrame({"A": [1] * len(data)}, dtype=object) | ||||
|         df["A"] = data | ||||
|         assert df.dtypes["A"] == data.dtype | ||||
|  | ||||
|     def test_merge(self, data, na_value): | ||||
|         # GH-20743 | ||||
|         df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]}) | ||||
|         df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]}) | ||||
|  | ||||
|         res = pd.merge(df1, df2) | ||||
|         exp = pd.DataFrame( | ||||
|             { | ||||
|                 "int1": [1, 1, 2], | ||||
|                 "int2": [1, 2, 3], | ||||
|                 "key": [0, 0, 1], | ||||
|                 "ext": data._from_sequence( | ||||
|                     [data[0], data[0], data[1]], dtype=data.dtype | ||||
|                 ), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) | ||||
|  | ||||
|         res = pd.merge(df1, df2, how="outer") | ||||
|         exp = pd.DataFrame( | ||||
|             { | ||||
|                 "int1": [1, 1, 2, 3, np.nan], | ||||
|                 "int2": [1, 2, 3, np.nan, 4], | ||||
|                 "key": [0, 0, 1, 2, 3], | ||||
|                 "ext": data._from_sequence( | ||||
|                     [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype | ||||
|                 ), | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) | ||||
|  | ||||
|     def test_merge_on_extension_array(self, data): | ||||
|         # GH 23020 | ||||
|         a, b = data[:2] | ||||
|         key = type(data)._from_sequence([a, b], dtype=data.dtype) | ||||
|  | ||||
|         df = pd.DataFrame({"key": key, "val": [1, 2]}) | ||||
|         result = pd.merge(df, df, on="key") | ||||
|         expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # order | ||||
|         result = pd.merge(df.iloc[[1, 0]], df, on="key") | ||||
|         expected = expected.iloc[[1, 0]].reset_index(drop=True) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_merge_on_extension_array_duplicates(self, data): | ||||
|         # GH 23020 | ||||
|         a, b = data[:2] | ||||
|         key = type(data)._from_sequence([a, b, a], dtype=data.dtype) | ||||
|         df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) | ||||
|         df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) | ||||
|  | ||||
|         result = pd.merge(df1, df2, on="key") | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "key": key.take([0, 0, 1, 2, 2]), | ||||
|                 "val_x": [1, 1, 2, 3, 3], | ||||
|                 "val_y": [1, 3, 2, 1, 3], | ||||
|             } | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:The previous implementation of stack is deprecated" | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "columns", | ||||
|         [ | ||||
|             ["A", "B"], | ||||
|             pd.MultiIndex.from_tuples( | ||||
|                 [("A", "a"), ("A", "b")], names=["outer", "inner"] | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("future_stack", [True, False]) | ||||
|     def test_stack(self, data, columns, future_stack): | ||||
|         df = pd.DataFrame({"A": data[:5], "B": data[:5]}) | ||||
|         df.columns = columns | ||||
|         result = df.stack(future_stack=future_stack) | ||||
|         expected = df.astype(object).stack(future_stack=future_stack) | ||||
|         # we need a second astype(object), in case the constructor inferred | ||||
|         # object -> specialized, as is done for period. | ||||
|         expected = expected.astype(object) | ||||
|  | ||||
|         if isinstance(expected, pd.Series): | ||||
|             assert result.dtype == df.iloc[:, 0].dtype | ||||
|         else: | ||||
|             assert all(result.dtypes == df.iloc[:, 0].dtype) | ||||
|  | ||||
|         result = result.astype(object) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "index", | ||||
|         [ | ||||
|             # Two levels, uniform. | ||||
|             pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), | ||||
|             # non-uniform | ||||
|             pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), | ||||
|             # three levels, non-uniform | ||||
|             pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), | ||||
|             pd.MultiIndex.from_tuples( | ||||
|                 [ | ||||
|                     ("A", "a", 1), | ||||
|                     ("A", "b", 0), | ||||
|                     ("A", "a", 0), | ||||
|                     ("B", "a", 0), | ||||
|                     ("B", "c", 1), | ||||
|                 ] | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("obj", ["series", "frame"]) | ||||
|     def test_unstack(self, data, index, obj): | ||||
|         data = data[: len(index)] | ||||
|         if obj == "series": | ||||
|             ser = pd.Series(data, index=index) | ||||
|         else: | ||||
|             ser = pd.DataFrame({"A": data, "B": data}, index=index) | ||||
|  | ||||
|         n = index.nlevels | ||||
|         levels = list(range(n)) | ||||
|         # [0, 1, 2] | ||||
|         # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] | ||||
|         combinations = itertools.chain.from_iterable( | ||||
|             itertools.permutations(levels, i) for i in range(1, n) | ||||
|         ) | ||||
|  | ||||
|         for level in combinations: | ||||
|             result = ser.unstack(level=level) | ||||
|             assert all( | ||||
|                 isinstance(result[col].array, type(data)) for col in result.columns | ||||
|             ) | ||||
|  | ||||
|             if obj == "series": | ||||
|                 # We should get the same result with to_frame+unstack+droplevel | ||||
|                 df = ser.to_frame() | ||||
|  | ||||
|                 alt = df.unstack(level=level).droplevel(0, axis=1) | ||||
|                 tm.assert_frame_equal(result, alt) | ||||
|  | ||||
|             obj_ser = ser.astype(object) | ||||
|  | ||||
|             expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value) | ||||
|             if obj == "series": | ||||
|                 assert (expected.dtypes == object).all() | ||||
|  | ||||
|             result = result.astype(object) | ||||
|             tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_ravel(self, data): | ||||
|         # as long as EA is 1D-only, ravel is a no-op | ||||
|         result = data.ravel() | ||||
|         assert type(result) == type(data) | ||||
|  | ||||
|         if data.dtype._is_immutable: | ||||
|             pytest.skip(f"test_ravel assumes mutability and {data.dtype} is immutable") | ||||
|  | ||||
|         # Check that we have a view, not a copy | ||||
|         result[0] = result[1] | ||||
|         assert data[0] == data[1] | ||||
|  | ||||
|     def test_transpose(self, data): | ||||
|         result = data.transpose() | ||||
|         assert type(result) == type(data) | ||||
|  | ||||
|         # check we get a new object | ||||
|         assert result is not data | ||||
|  | ||||
|         # If we ever _did_ support 2D, shape should be reversed | ||||
|         assert result.shape == data.shape[::-1] | ||||
|  | ||||
|         if data.dtype._is_immutable: | ||||
|             pytest.skip( | ||||
|                 f"test_transpose assumes mutability and {data.dtype} is immutable" | ||||
|             ) | ||||
|  | ||||
|         # Check that we have a view, not a copy | ||||
|         result[0] = result[1] | ||||
|         assert data[0] == data[1] | ||||
|  | ||||
|     def test_transpose_frame(self, data): | ||||
|         df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) | ||||
|         result = df.T | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), | ||||
|                 "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), | ||||
|                 "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), | ||||
|                 "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), | ||||
|             }, | ||||
|             index=["A", "B"], | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         tm.assert_frame_equal(np.transpose(np.transpose(df)), df) | ||||
|         tm.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) | ||||
| @ -0,0 +1,451 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class BaseSetitemTests: | ||||
|     @pytest.fixture( | ||||
|         params=[ | ||||
|             lambda x: x.index, | ||||
|             lambda x: list(x.index), | ||||
|             lambda x: slice(None), | ||||
|             lambda x: slice(0, len(x)), | ||||
|             lambda x: range(len(x)), | ||||
|             lambda x: list(range(len(x))), | ||||
|             lambda x: np.ones(len(x), dtype=bool), | ||||
|         ], | ||||
|         ids=[ | ||||
|             "index", | ||||
|             "list[index]", | ||||
|             "null_slice", | ||||
|             "full_slice", | ||||
|             "range", | ||||
|             "list(range)", | ||||
|             "mask", | ||||
|         ], | ||||
|     ) | ||||
|     def full_indexer(self, request): | ||||
|         """ | ||||
|         Fixture for an indexer to pass to obj.loc to get/set the full length of the | ||||
|         object. | ||||
|  | ||||
|         In some cases, assumes that obj.index is the default RangeIndex. | ||||
|         """ | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture(autouse=True) | ||||
|     def skip_if_immutable(self, dtype, request): | ||||
|         if dtype._is_immutable: | ||||
|             node = request.node | ||||
|             if node.name.split("[")[0] == "test_is_immutable": | ||||
|                 # This fixture is auto-used, but we want to not-skip | ||||
|                 # test_is_immutable. | ||||
|                 return | ||||
|  | ||||
|             # When BaseSetitemTests is mixed into ExtensionTests, we only | ||||
|             #  want this fixture to operate on the tests defined in this | ||||
|             #  class/file. | ||||
|             defined_in = node.function.__qualname__.split(".")[0] | ||||
|             if defined_in == "BaseSetitemTests": | ||||
|                 pytest.skip("__setitem__ test not applicable with immutable dtype") | ||||
|  | ||||
|     def test_is_immutable(self, data): | ||||
|         if data.dtype._is_immutable: | ||||
|             with pytest.raises(TypeError): | ||||
|                 data[0] = data[0] | ||||
|         else: | ||||
|             data[0] = data[1] | ||||
|             assert data[0] == data[1] | ||||
|  | ||||
|     def test_setitem_scalar_series(self, data, box_in_series): | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|         data[0] = data[1] | ||||
|         assert data[0] == data[1] | ||||
|  | ||||
|     def test_setitem_sequence(self, data, box_in_series): | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|         original = data.copy() | ||||
|  | ||||
|         data[[0, 1]] = [data[1], data[0]] | ||||
|         assert data[0] == original[1] | ||||
|         assert data[1] == original[0] | ||||
|  | ||||
|     def test_setitem_sequence_mismatched_length_raises(self, data, as_array): | ||||
|         ser = pd.Series(data) | ||||
|         original = ser.copy() | ||||
|         value = [data[0]] | ||||
|         if as_array: | ||||
|             value = data._from_sequence(value, dtype=data.dtype) | ||||
|  | ||||
|         xpr = "cannot set using a {} indexer with a different length" | ||||
|         with pytest.raises(ValueError, match=xpr.format("list-like")): | ||||
|             ser[[0, 1]] = value | ||||
|         # Ensure no modifications made before the exception | ||||
|         tm.assert_series_equal(ser, original) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=xpr.format("slice")): | ||||
|             ser[slice(3)] = value | ||||
|         tm.assert_series_equal(ser, original) | ||||
|  | ||||
|     def test_setitem_empty_indexer(self, data, box_in_series): | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|         original = data.copy() | ||||
|         data[np.array([], dtype=int)] = [] | ||||
|         tm.assert_equal(data, original) | ||||
|  | ||||
|     def test_setitem_sequence_broadcasts(self, data, box_in_series): | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|         data[[0, 1]] = data[2] | ||||
|         assert data[0] == data[2] | ||||
|         assert data[1] == data[2] | ||||
|  | ||||
|     @pytest.mark.parametrize("setter", ["loc", "iloc"]) | ||||
|     def test_setitem_scalar(self, data, setter): | ||||
|         arr = pd.Series(data) | ||||
|         setter = getattr(arr, setter) | ||||
|         setter[0] = data[1] | ||||
|         assert arr[0] == data[1] | ||||
|  | ||||
|     def test_setitem_loc_scalar_mixed(self, data): | ||||
|         df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) | ||||
|         df.loc[0, "B"] = data[1] | ||||
|         assert df.loc[0, "B"] == data[1] | ||||
|  | ||||
|     def test_setitem_loc_scalar_single(self, data): | ||||
|         df = pd.DataFrame({"B": data}) | ||||
|         df.loc[10, "B"] = data[1] | ||||
|         assert df.loc[10, "B"] == data[1] | ||||
|  | ||||
|     def test_setitem_loc_scalar_multiple_homogoneous(self, data): | ||||
|         df = pd.DataFrame({"A": data, "B": data}) | ||||
|         df.loc[10, "B"] = data[1] | ||||
|         assert df.loc[10, "B"] == data[1] | ||||
|  | ||||
|     def test_setitem_iloc_scalar_mixed(self, data): | ||||
|         df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) | ||||
|         df.iloc[0, 1] = data[1] | ||||
|         assert df.loc[0, "B"] == data[1] | ||||
|  | ||||
|     def test_setitem_iloc_scalar_single(self, data): | ||||
|         df = pd.DataFrame({"B": data}) | ||||
|         df.iloc[10, 0] = data[1] | ||||
|         assert df.loc[10, "B"] == data[1] | ||||
|  | ||||
|     def test_setitem_iloc_scalar_multiple_homogoneous(self, data): | ||||
|         df = pd.DataFrame({"A": data, "B": data}) | ||||
|         df.iloc[10, 1] = data[1] | ||||
|         assert df.loc[10, "B"] == data[1] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "mask", | ||||
|         [ | ||||
|             np.array([True, True, True, False, False]), | ||||
|             pd.array([True, True, True, False, False], dtype="boolean"), | ||||
|             pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), | ||||
|         ], | ||||
|         ids=["numpy-array", "boolean-array", "boolean-array-na"], | ||||
|     ) | ||||
|     def test_setitem_mask(self, data, mask, box_in_series): | ||||
|         arr = data[:5].copy() | ||||
|         expected = arr.take([0, 0, 0, 3, 4]) | ||||
|         if box_in_series: | ||||
|             arr = pd.Series(arr) | ||||
|             expected = pd.Series(expected) | ||||
|         arr[mask] = data[0] | ||||
|         tm.assert_equal(expected, arr) | ||||
|  | ||||
|     def test_setitem_mask_raises(self, data, box_in_series): | ||||
|         # wrong length | ||||
|         mask = np.array([True, False]) | ||||
|  | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|  | ||||
|         with pytest.raises(IndexError, match="wrong length"): | ||||
|             data[mask] = data[0] | ||||
|  | ||||
|         mask = pd.array(mask, dtype="boolean") | ||||
|         with pytest.raises(IndexError, match="wrong length"): | ||||
|             data[mask] = data[0] | ||||
|  | ||||
|     def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): | ||||
|         mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") | ||||
|         mask[:3] = True | ||||
|         mask[3:5] = pd.NA | ||||
|  | ||||
|         if box_in_series: | ||||
|             data = pd.Series(data) | ||||
|  | ||||
|         data[mask] = data[0] | ||||
|  | ||||
|         assert (data[:3] == data[0]).all() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx", | ||||
|         [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], | ||||
|         ids=["list", "integer-array", "numpy-array"], | ||||
|     ) | ||||
|     def test_setitem_integer_array(self, data, idx, box_in_series): | ||||
|         arr = data[:5].copy() | ||||
|         expected = data.take([0, 0, 0, 3, 4]) | ||||
|  | ||||
|         if box_in_series: | ||||
|             arr = pd.Series(arr) | ||||
|             expected = pd.Series(expected) | ||||
|  | ||||
|         arr[idx] = arr[0] | ||||
|         tm.assert_equal(arr, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx, box_in_series", | ||||
|         [ | ||||
|             ([0, 1, 2, pd.NA], False), | ||||
|             pytest.param( | ||||
|                 [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") | ||||
|             ), | ||||
|             (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), | ||||
|             (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), | ||||
|         ], | ||||
|         ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], | ||||
|     ) | ||||
|     def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): | ||||
|         arr = data.copy() | ||||
|  | ||||
|         # TODO(xfail) this raises KeyError about labels not found (it tries label-based) | ||||
|         # for list of labels with Series | ||||
|         if box_in_series: | ||||
|             arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))]) | ||||
|  | ||||
|         msg = "Cannot index with an integer indexer containing NA values" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[idx] = arr[0] | ||||
|  | ||||
|     @pytest.mark.parametrize("as_callable", [True, False]) | ||||
|     @pytest.mark.parametrize("setter", ["loc", None]) | ||||
|     def test_setitem_mask_aligned(self, data, as_callable, setter): | ||||
|         ser = pd.Series(data) | ||||
|         mask = np.zeros(len(data), dtype=bool) | ||||
|         mask[:2] = True | ||||
|  | ||||
|         if as_callable: | ||||
|             mask2 = lambda x: mask | ||||
|         else: | ||||
|             mask2 = mask | ||||
|  | ||||
|         if setter: | ||||
|             # loc | ||||
|             target = getattr(ser, setter) | ||||
|         else: | ||||
|             # Series.__setitem__ | ||||
|             target = ser | ||||
|  | ||||
|         target[mask2] = data[5:7] | ||||
|  | ||||
|         ser[mask2] = data[5:7] | ||||
|         assert ser[0] == data[5] | ||||
|         assert ser[1] == data[6] | ||||
|  | ||||
|     @pytest.mark.parametrize("setter", ["loc", None]) | ||||
|     def test_setitem_mask_broadcast(self, data, setter): | ||||
|         ser = pd.Series(data) | ||||
|         mask = np.zeros(len(data), dtype=bool) | ||||
|         mask[:2] = True | ||||
|  | ||||
|         if setter:  # loc | ||||
|             target = getattr(ser, setter) | ||||
|         else:  # __setitem__ | ||||
|             target = ser | ||||
|  | ||||
|         target[mask] = data[10] | ||||
|         assert ser[0] == data[10] | ||||
|         assert ser[1] == data[10] | ||||
|  | ||||
|     def test_setitem_expand_columns(self, data): | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|         result = df.copy() | ||||
|         result["B"] = 1 | ||||
|         expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.copy() | ||||
|         result.loc[:, "B"] = 1 | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # overwrite with new type | ||||
|         result["B"] = data | ||||
|         expected = pd.DataFrame({"A": data, "B": data}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_expand_with_extension(self, data): | ||||
|         df = pd.DataFrame({"A": [1] * len(data)}) | ||||
|         result = df.copy() | ||||
|         result["B"] = data | ||||
|         expected = pd.DataFrame({"A": [1] * len(data), "B": data}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = df.copy() | ||||
|         result.loc[:, "B"] = data | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_frame_invalid_length(self, data): | ||||
|         df = pd.DataFrame({"A": [1] * len(data)}) | ||||
|         xpr = ( | ||||
|             rf"Length of values \({len(data[:5])}\) " | ||||
|             rf"does not match length of index \({len(df)}\)" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=xpr): | ||||
|             df["B"] = data[:5] | ||||
|  | ||||
|     def test_setitem_tuple_index(self, data): | ||||
|         ser = pd.Series(data[:2], index=[(0, 0), (0, 1)]) | ||||
|         expected = pd.Series(data.take([1, 1]), index=ser.index) | ||||
|         ser[(0, 0)] = data[1] | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_setitem_slice(self, data, box_in_series): | ||||
|         arr = data[:5].copy() | ||||
|         expected = data.take([0, 0, 0, 3, 4]) | ||||
|         if box_in_series: | ||||
|             arr = pd.Series(arr) | ||||
|             expected = pd.Series(expected) | ||||
|  | ||||
|         arr[:3] = data[0] | ||||
|         tm.assert_equal(arr, expected) | ||||
|  | ||||
|     def test_setitem_loc_iloc_slice(self, data): | ||||
|         arr = data[:5].copy() | ||||
|         s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) | ||||
|         expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index) | ||||
|  | ||||
|         result = s.copy() | ||||
|         result.iloc[:3] = data[0] | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         result = s.copy() | ||||
|         result.loc[:"c"] = data[0] | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_slice_mismatch_length_raises(self, data): | ||||
|         arr = data[:5] | ||||
|         with pytest.raises(ValueError): | ||||
|             arr[:1] = arr[:2] | ||||
|  | ||||
|     def test_setitem_slice_array(self, data): | ||||
|         arr = data[:5].copy() | ||||
|         arr[:5] = data[-5:] | ||||
|         tm.assert_extension_array_equal(arr, data[-5:]) | ||||
|  | ||||
|     def test_setitem_scalar_key_sequence_raise(self, data): | ||||
|         arr = data[:5].copy() | ||||
|         with pytest.raises(ValueError): | ||||
|             arr[0] = arr[[0, 1]] | ||||
|  | ||||
|     def test_setitem_preserves_views(self, data): | ||||
|         # GH#28150 setitem shouldn't swap the underlying data | ||||
|         view1 = data.view() | ||||
|         view2 = data[:] | ||||
|  | ||||
|         data[0] = data[1] | ||||
|         assert view1[0] == data[1] | ||||
|         assert view2[0] == data[1] | ||||
|  | ||||
|     def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): | ||||
|         # https://github.com/pandas-dev/pandas/issues/32395 | ||||
|         df = expected = pd.DataFrame({0: pd.Series(data)}) | ||||
|         result = pd.DataFrame(index=df.index) | ||||
|  | ||||
|         key = full_indexer(df) | ||||
|         result.loc[key, 0] = df[0] | ||||
|  | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_with_expansion_row(self, data, na_value): | ||||
|         df = pd.DataFrame({"data": data[:1]}) | ||||
|  | ||||
|         df.loc[1, "data"] = data[1] | ||||
|         expected = pd.DataFrame({"data": data[:2]}) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/47284 | ||||
|         df.loc[2, "data"] = na_value | ||||
|         expected = pd.DataFrame( | ||||
|             {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)} | ||||
|         ) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     def test_setitem_series(self, data, full_indexer): | ||||
|         # https://github.com/pandas-dev/pandas/issues/32395 | ||||
|         ser = pd.Series(data, name="data") | ||||
|         result = pd.Series(index=ser.index, dtype=object, name="data") | ||||
|  | ||||
|         # because result has object dtype, the attempt to do setting inplace | ||||
|         #  is successful, and object dtype is retained | ||||
|         key = full_indexer(ser) | ||||
|         result.loc[key] = ser | ||||
|  | ||||
|         expected = pd.Series( | ||||
|             data.astype(object), index=ser.index, name="data", dtype=object | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_frame_2d_values(self, data): | ||||
|         # GH#44514 | ||||
|         df = pd.DataFrame({"A": data}) | ||||
|  | ||||
|         # Avoiding using_array_manager fixture | ||||
|         #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 | ||||
|         using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) | ||||
|         using_copy_on_write = pd.options.mode.copy_on_write | ||||
|  | ||||
|         blk_data = df._mgr.arrays[0] | ||||
|  | ||||
|         orig = df.copy() | ||||
|  | ||||
|         df.iloc[:] = df.copy() | ||||
|         tm.assert_frame_equal(df, orig) | ||||
|  | ||||
|         df.iloc[:-1] = df.iloc[:-1].copy() | ||||
|         tm.assert_frame_equal(df, orig) | ||||
|  | ||||
|         df.iloc[:] = df.values | ||||
|         tm.assert_frame_equal(df, orig) | ||||
|         if not using_array_manager and not using_copy_on_write: | ||||
|             # GH#33457 Check that this setting occurred in-place | ||||
|             # FIXME(ArrayManager): this should work there too | ||||
|             assert df._mgr.arrays[0] is blk_data | ||||
|  | ||||
|         df.iloc[:-1] = df.values[:-1] | ||||
|         tm.assert_frame_equal(df, orig) | ||||
|  | ||||
|     def test_delitem_series(self, data): | ||||
|         # GH#40763 | ||||
|         ser = pd.Series(data, name="data") | ||||
|  | ||||
|         taker = np.arange(len(ser)) | ||||
|         taker = np.delete(taker, 1) | ||||
|  | ||||
|         expected = ser[taker] | ||||
|         del ser[1] | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_setitem_invalid(self, data, invalid_scalar): | ||||
|         msg = ""  # messages vary by subclass, so we do not test it | ||||
|         with pytest.raises((ValueError, TypeError), match=msg): | ||||
|             data[0] = invalid_scalar | ||||
|  | ||||
|         with pytest.raises((ValueError, TypeError), match=msg): | ||||
|             data[:] = invalid_scalar | ||||
|  | ||||
|     def test_setitem_2d_values(self, data): | ||||
|         # GH50085 | ||||
|         original = data.copy() | ||||
|         df = pd.DataFrame({"a": data, "b": data}) | ||||
|         df.loc[[0, 1], :] = df.loc[[1, 0], :].values | ||||
|         assert (df.loc[0, :] == original[1]).all() | ||||
|         assert (df.loc[1, :] == original[0]).all() | ||||
		Reference in New Issue
	
	Block a user