done
This commit is contained in:
		| @ -0,0 +1,248 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from typing import Any | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| # integer dtypes | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| scalars: list[Any] = [2] * len(arrays) | ||||
| # floating dtypes | ||||
| arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] | ||||
| scalars += [0.2, 0.2] | ||||
| # boolean | ||||
| arrays += [pd.array([True, False, True, None], dtype="boolean")] | ||||
| scalars += [False] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """Fixture returning parametrized (array, scalar) tuple. | ||||
|  | ||||
|     Used to test equivalence of scalars, numpy arrays with array ops, and the | ||||
|     equivalence of DataFrame and Series ops. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def check_skip(data, op_name): | ||||
|     if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name: | ||||
|         pytest.skip("subtract not implemented for boolean") | ||||
|  | ||||
|  | ||||
| def is_bool_not_implemented(data, op_name): | ||||
|     # match non-masked behavior | ||||
|     return data.dtype.kind == "b" and op_name.strip("_").lstrip("r") in [ | ||||
|         "pow", | ||||
|         "truediv", | ||||
|         "floordiv", | ||||
|     ] | ||||
|  | ||||
|  | ||||
| # Test equivalence of scalars, numpy arrays with array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_array_scalar_like_equivalence(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) | ||||
|  | ||||
|     # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) | ||||
|     for scalar in [scalar, data.dtype.type(scalar)]: | ||||
|         if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|             msg = "operator '.*' not implemented for bool dtypes" | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(data, scalar) | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(data, scalar_array) | ||||
|         else: | ||||
|             result = op(data, scalar) | ||||
|             expected = op(data, scalar_array) | ||||
|             tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_NA(data, all_arithmetic_operators): | ||||
|     data, _ = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     scalar = pd.NA | ||||
|     scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) | ||||
|  | ||||
|     mask = data._mask.copy() | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, scalar) | ||||
|         # GH#45421 check op doesn't alter data._mask inplace | ||||
|         tm.assert_numpy_array_equal(mask, data._mask) | ||||
|         return | ||||
|  | ||||
|     result = op(data, scalar) | ||||
|     # GH#45421 check op doesn't alter data._mask inplace | ||||
|     tm.assert_numpy_array_equal(mask, data._mask) | ||||
|  | ||||
|     expected = op(data, scalar_array) | ||||
|     tm.assert_numpy_array_equal(mask, data._mask) | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_numpy_array_equivalence(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) | ||||
|     pd_array = pd.array(numpy_array, dtype=data.dtype) | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, numpy_array) | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, pd_array) | ||||
|         return | ||||
|  | ||||
|     result = op(data, numpy_array) | ||||
|     expected = op(data, pd_array) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test equivalence with Series and DataFrame ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_frame(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     # DataFrame with scalar | ||||
|     df = pd.DataFrame({"A": data}) | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(df, scalar) | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, scalar) | ||||
|         return | ||||
|  | ||||
|     result = op(df, scalar) | ||||
|     expected = pd.DataFrame({"A": op(data, scalar)}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     ser = pd.Series(data) | ||||
|  | ||||
|     others = [ | ||||
|         scalar, | ||||
|         np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype), | ||||
|         pd.array([scalar] * len(data), dtype=data.dtype), | ||||
|         pd.Series([scalar] * len(data), dtype=data.dtype), | ||||
|     ] | ||||
|  | ||||
|     for other in others: | ||||
|         if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|             msg = "operator '.*' not implemented for bool dtypes" | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(ser, other) | ||||
|  | ||||
|         else: | ||||
|             result = op(ser, other) | ||||
|             expected = pd.Series(op(data, other)) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_object(data, all_arithmetic_operators): | ||||
|     data, _ = data | ||||
|  | ||||
|     op = all_arithmetic_operators | ||||
|     opa = getattr(data, op) | ||||
|  | ||||
|     # 2d -> return NotImplemented | ||||
|     result = opa(pd.DataFrame({"A": data})) | ||||
|     assert result is NotImplemented | ||||
|  | ||||
|     msg = r"can only perform ops with 1-d structures" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         opa(np.arange(len(data)).reshape(-1, len(data))) | ||||
|  | ||||
|  | ||||
| def test_error_len_mismatch(data, all_arithmetic_operators): | ||||
|     # operating with a list-like with non-matching length raises | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|  | ||||
|     other = [scalar] * (len(data) - 1) | ||||
|  | ||||
|     err = ValueError | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"operands could not be broadcast together with shapes \(3,\) \(4,\)", | ||||
|             r"operands could not be broadcast together with shapes \(4,\) \(3,\)", | ||||
|         ] | ||||
|     ) | ||||
|     if data.dtype.kind == "b" and all_arithmetic_operators.strip("_") in [ | ||||
|         "sub", | ||||
|         "rsub", | ||||
|     ]: | ||||
|         err = TypeError | ||||
|         msg = ( | ||||
|             r"numpy boolean subtract, the `\-` operator, is not supported, use " | ||||
|             r"the bitwise_xor, the `\^` operator, or the logical_xor function instead" | ||||
|         ) | ||||
|     elif is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         err = NotImplementedError | ||||
|  | ||||
|     for other in [other, np.array(other)]: | ||||
|         with pytest.raises(err, match=msg): | ||||
|             op(data, other) | ||||
|  | ||||
|         s = pd.Series(data) | ||||
|         with pytest.raises(err, match=msg): | ||||
|             op(s, other) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) | ||||
| def test_unary_op_does_not_propagate_mask(data, op): | ||||
|     # https://github.com/pandas-dev/pandas/issues/39943 | ||||
|     data, _ = data | ||||
|     ser = pd.Series(data) | ||||
|  | ||||
|     if op == "__invert__" and data.dtype.kind == "f": | ||||
|         # we follow numpy in raising | ||||
|         msg = "ufunc 'invert' not supported for the input types" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(ser, op)() | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(data, op)() | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # Check that this is still the numpy behavior | ||||
|             getattr(data._data, op)() | ||||
|  | ||||
|         return | ||||
|  | ||||
|     result = getattr(ser, op)() | ||||
|     expected = result.copy(deep=True) | ||||
|     ser[0] = None | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,210 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
| from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask | ||||
|  | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] | ||||
| arrays += [pd.array([True, False, True, None], dtype="boolean")] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """ | ||||
|     Fixture returning parametrized array from given dtype, including integer, | ||||
|     float and boolean | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_arrow_array(data): | ||||
|     arr = pa.array(data) | ||||
|     expected = pa.array( | ||||
|         data.to_numpy(object, na_value=None), | ||||
|         type=pa.from_numpy_dtype(data.dtype.numpy_dtype), | ||||
|     ) | ||||
|     assert arr.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_roundtrip(data): | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     assert table.field("a").type == str(data.dtype.numpy_dtype) | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|     assert result["a"].dtype == data.dtype | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_dataframe_from_arrow_types_mapper(): | ||||
|     def types_mapper(arrow_type): | ||||
|         if pa.types.is_boolean(arrow_type): | ||||
|             return pd.BooleanDtype() | ||||
|         elif pa.types.is_integer(arrow_type): | ||||
|             return pd.Int64Dtype() | ||||
|  | ||||
|     bools_array = pa.array([True, None, False], type=pa.bool_()) | ||||
|     ints_array = pa.array([1, None, 2], type=pa.int64()) | ||||
|     small_ints_array = pa.array([-1, 0, 7], type=pa.int8()) | ||||
|     record_batch = pa.RecordBatch.from_arrays( | ||||
|         [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] | ||||
|     ) | ||||
|     result = record_batch.to_pandas(types_mapper=types_mapper) | ||||
|     bools = pd.Series([True, None, False], dtype="boolean") | ||||
|     ints = pd.Series([1, None, 2], dtype="Int64") | ||||
|     small_ints = pd.Series([-1, 0, 7], dtype="Int64") | ||||
|     expected = pd.DataFrame({"bools": bools, "ints": ints, "small_ints": small_ints}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_load_from_zero_chunks(data): | ||||
|     # GH-41040 | ||||
|  | ||||
|     df = pd.DataFrame({"a": data[0:0]}) | ||||
|     table = pa.table(df) | ||||
|     assert table.field("a").type == str(data.dtype.numpy_dtype) | ||||
|     table = pa.table( | ||||
|         [pa.chunked_array([], type=table.field("a").type)], schema=table.schema | ||||
|     ) | ||||
|     result = table.to_pandas() | ||||
|     assert result["a"].dtype == data.dtype | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_arrow_from_arrow_uint(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31896 | ||||
|     # possible mismatch in types | ||||
|  | ||||
|     dtype = pd.UInt32Dtype() | ||||
|     result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) | ||||
|     expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_sliced(data): | ||||
|     # https://github.com/pandas-dev/pandas/issues/38525 | ||||
|  | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     result = table.slice(2, None).to_pandas() | ||||
|     expected = df.iloc[2:].reset_index(drop=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # no missing values | ||||
|     df2 = df.fillna(data[0]) | ||||
|     table = pa.table(df2) | ||||
|     result = table.slice(2, None).to_pandas() | ||||
|     expected = df2.iloc[2:].reset_index(drop=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def np_dtype_to_arrays(any_real_numpy_dtype): | ||||
|     """ | ||||
|     Fixture returning actual and expected dtype, pandas and numpy arrays and | ||||
|     mask from a given numpy dtype | ||||
|     """ | ||||
|     np_dtype = np.dtype(any_real_numpy_dtype) | ||||
|     pa_type = pa.from_numpy_dtype(np_dtype) | ||||
|  | ||||
|     # None ensures the creation of a bitmask buffer. | ||||
|     pa_array = pa.array([0, 1, 2, None], type=pa_type) | ||||
|     # Since masked Arrow buffer slots are not required to contain a specific | ||||
|     # value, assert only the first three values of the created np.array | ||||
|     np_expected = np.array([0, 1, 2], dtype=np_dtype) | ||||
|     mask_expected = np.array([True, True, True, False]) | ||||
|     return np_dtype, pa_array, np_expected, mask_expected | ||||
|  | ||||
|  | ||||
| def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): | ||||
|     """ | ||||
|     Test conversion from pyarrow array to numpy array. | ||||
|  | ||||
|     Modifies the pyarrow buffer to contain padding and offset, which are | ||||
|     considered valid buffers by pyarrow. | ||||
|  | ||||
|     Also tests empty pyarrow arrays with non empty buffers. | ||||
|     See https://github.com/pandas-dev/pandas/issues/40896 | ||||
|     """ | ||||
|     np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     mask_buffer = pa_array.buffers()[0] | ||||
|     data_buffer = pa_array.buffers()[1] | ||||
|     data_buffer_bytes = pa_array.buffers()[1].to_pybytes() | ||||
|  | ||||
|     # Add trailing padding to the buffer. | ||||
|     data_buffer_trail = pa.py_buffer(data_buffer_bytes + b"\x00") | ||||
|     pa_array_trail = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=len(pa_array), | ||||
|         buffers=[mask_buffer, data_buffer_trail], | ||||
|         offset=pa_array.offset, | ||||
|     ) | ||||
|     pa_array_trail.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     # Add offset to the buffer. | ||||
|     offset = b"\x00" * (pa_array.type.bit_width // 8) | ||||
|     data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes) | ||||
|     mask_buffer_offset = pa.py_buffer(b"\x0E") | ||||
|     pa_array_offset = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=len(pa_array), | ||||
|         buffers=[mask_buffer_offset, data_buffer_offset], | ||||
|         offset=pa_array.offset + 1, | ||||
|     ) | ||||
|     pa_array_offset.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     # Empty array | ||||
|     np_expected_empty = np.array([], dtype=np_dtype) | ||||
|     mask_expected_empty = np.array([], dtype=np.bool_) | ||||
|  | ||||
|     pa_array_offset = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=0, | ||||
|         buffers=[mask_buffer, data_buffer], | ||||
|         offset=pa_array.offset, | ||||
|     ) | ||||
|     pa_array_offset.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected_empty) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected_empty) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", [pa.nulls(10), pa.chunked_array([pa.nulls(4), pa.nulls(6)])] | ||||
| ) | ||||
| def test_from_arrow_null(data, arr): | ||||
|     res = data.dtype.__from_arrow__(arr) | ||||
|     assert res.isna().all() | ||||
|     assert len(res) == 10 | ||||
|  | ||||
|  | ||||
| def test_from_arrow_type_error(data): | ||||
|     # ensure that __from_arrow__ returns a TypeError when getting a wrong | ||||
|     # array type | ||||
|  | ||||
|     arr = pa.array(data).cast("string") | ||||
|     with pytest.raises(TypeError, match=None): | ||||
|         # we don't test the exact error message, only the fact that it raises | ||||
|         # a TypeError is relevant | ||||
|         data.dtype.__from_arrow__(arr) | ||||
| @ -0,0 +1,74 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_integer_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import BaseMaskedArray | ||||
|  | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| arrays += [ | ||||
|     pd.array([0.141, -0.268, 5.895, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES | ||||
| ] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """ | ||||
|     Fixture returning parametrized 'data' array with different integer and | ||||
|     floating point types | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture() | ||||
| def numpy_dtype(data): | ||||
|     """ | ||||
|     Fixture returning numpy dtype from 'data' input array. | ||||
|     """ | ||||
|     # For integer dtype, the numpy conversion must be done to float | ||||
|     if is_integer_dtype(data): | ||||
|         numpy_dtype = float | ||||
|     else: | ||||
|         numpy_dtype = data.dtype.type | ||||
|     return numpy_dtype | ||||
|  | ||||
|  | ||||
| def test_round(data, numpy_dtype): | ||||
|     # No arguments | ||||
|     result = data.round() | ||||
|     expected = pd.array( | ||||
|         np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # Decimals argument | ||||
|     result = data.round(decimals=2) | ||||
|     expected = pd.array( | ||||
|         np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2), | ||||
|         dtype=data.dtype, | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_tolist(data): | ||||
|     result = data.tolist() | ||||
|     expected = list(data) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy(): | ||||
|     # GH#56991 | ||||
|  | ||||
|     class MyStringArray(BaseMaskedArray): | ||||
|         dtype = pd.StringDtype() | ||||
|         _dtype_cls = pd.StringDtype | ||||
|         _internal_fill_value = pd.NA | ||||
|  | ||||
|     arr = MyStringArray( | ||||
|         values=np.array(["a", "b", "c"]), mask=np.array([False, True, False]) | ||||
|     ) | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array(["a", pd.NA, "c"]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,60 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| class TestSetitemValidation: | ||||
|     def _check_setitem_invalid(self, arr, invalid): | ||||
|         msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'" | ||||
|         msg = re.escape(msg) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[0] = invalid | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[:] = invalid | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[[0]] = invalid | ||||
|  | ||||
|         # FIXME: don't leave commented-out | ||||
|         # with pytest.raises(TypeError): | ||||
|         #    arr[[0]] = [invalid] | ||||
|  | ||||
|         # with pytest.raises(TypeError): | ||||
|         #    arr[[0]] = np.array([invalid], dtype=object) | ||||
|  | ||||
|         # Series non-coercion, behavior subject to change | ||||
|         ser = pd.Series(arr) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser[0] = invalid | ||||
|             # TODO: so, so many other variants of this... | ||||
|  | ||||
|     _invalid_scalars = [ | ||||
|         1 + 2j, | ||||
|         "True", | ||||
|         "1", | ||||
|         "1.0", | ||||
|         pd.NaT, | ||||
|         np.datetime64("NaT"), | ||||
|         np.timedelta64("NaT"), | ||||
|     ] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] | ||||
|     ) | ||||
|     def test_setitem_validation_scalar_bool(self, invalid): | ||||
|         arr = pd.array([True, False, None], dtype="boolean") | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
|  | ||||
|     @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) | ||||
|     def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype): | ||||
|         arr = pd.array([1, 2, None], dtype=any_int_ea_dtype) | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
|  | ||||
|     @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) | ||||
|     def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype): | ||||
|         arr = pd.array([1, 2, None], dtype=float_ea_dtype) | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
		Reference in New Issue
	
	Block a user