done
This commit is contained in:
		| @ -0,0 +1,68 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int16Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
|     UInt8Dtype, | ||||
|     UInt16Dtype, | ||||
|     UInt32Dtype, | ||||
|     UInt64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         Int8Dtype, | ||||
|         Int16Dtype, | ||||
|         Int32Dtype, | ||||
|         Int64Dtype, | ||||
|         UInt8Dtype, | ||||
|         UInt16Dtype, | ||||
|         UInt32Dtype, | ||||
|         UInt64Dtype, | ||||
|     ] | ||||
| ) | ||||
| def dtype(request): | ||||
|     """Parametrized fixture returning integer 'dtype'""" | ||||
|     return request.param() | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(dtype): | ||||
|     """ | ||||
|     Fixture returning 'data' array with valid and missing values according to | ||||
|     parametrized integer 'dtype'. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     return pd.array( | ||||
|         list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100], | ||||
|         dtype=dtype, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_missing(dtype): | ||||
|     """ | ||||
|     Fixture returning array with exactly one NaN and one valid integer, | ||||
|     according to parametrized integer 'dtype'. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     return pd.array([np.nan, 1], dtype=dtype) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["data", "data_missing"]) | ||||
| def all_data(request, data, data_missing): | ||||
|     """Parametrized fixture returning 'data' or 'data_missing' integer arrays. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     if request.param == "data": | ||||
|         return data | ||||
|     elif request.param == "data_missing": | ||||
|         return data_missing | ||||
| @ -0,0 +1,345 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core import ops | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
| # Basic test for the arithmetic array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "opname, exp", | ||||
|     [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])], | ||||
|     ids=["add", "mul"], | ||||
| ) | ||||
| def test_add_mul(dtype, opname, exp): | ||||
|     a = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|     b = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|  | ||||
|     # array / array | ||||
|     expected = pd.array(exp, dtype=dtype) | ||||
|  | ||||
|     op = getattr(operator, opname) | ||||
|     result = op(a, b) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     op = getattr(ops, "r" + opname) | ||||
|     result = op(a, b) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_sub(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a - b | ||||
|     expected = pd.array([1, 1, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_div(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a / b | ||||
|     expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) | ||||
| def test_divide_by_zero(zero, negative): | ||||
|     # https://github.com/pandas-dev/pandas/issues/27398, GH#22793 | ||||
|     a = pd.array([0, 1, -1, None], dtype="Int64") | ||||
|     result = a / zero | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"), | ||||
|         np.array([False, False, False, True]), | ||||
|     ) | ||||
|     if negative: | ||||
|         expected *= -1 | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_floordiv(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a // b | ||||
|     # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) | ||||
|     expected = pd.array([0, 2, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype): | ||||
|     # GH 48223: Aligns with non-masked floordiv | ||||
|     # but differs from numpy | ||||
|     # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740 | ||||
|     ser = pd.Series([0, 1], dtype=any_int_ea_dtype) | ||||
|     result = 1 // ser | ||||
|     expected = pd.Series([np.inf, 1.0], dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     ser_non_nullable = ser.astype(ser.dtype.numpy_dtype) | ||||
|     result = 1 // ser_non_nullable | ||||
|     expected = expected.astype(np.float64) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mod(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a % b | ||||
|     expected = pd.array([0, 0, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_scalar(): | ||||
|     a = pd.array([-1, 0, 1, None, 2], dtype="Int64") | ||||
|     result = a**0 | ||||
|     expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**1 | ||||
|     expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**pd.NA | ||||
|     expected = pd.array([None, None, 1, None, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**np.nan | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"), | ||||
|         np.array([False, False, False, True, False]), | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # reversed | ||||
|     a = a[1:]  # Can't raise integers to negative powers. | ||||
|  | ||||
|     result = 0**a | ||||
|     expected = pd.array([1, 0, None, 0], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = 1**a | ||||
|     expected = pd.array([1, 1, 1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = pd.NA**a | ||||
|     expected = pd.array([1, None, None, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = np.nan**a | ||||
|     expected = FloatingArray( | ||||
|         np.array([1, np.nan, np.nan, np.nan], dtype="float64"), | ||||
|         np.array([False, False, True, False]), | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_array(): | ||||
|     a = pd.array([0, 0, 0, 1, 1, 1, None, None, None]) | ||||
|     b = pd.array([0, 1, None, 0, 1, None, 0, 1, None]) | ||||
|     result = a**b | ||||
|     expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None]) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_rpow_one_to_na(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/22022 | ||||
|     # https://github.com/pandas-dev/pandas/issues/29997 | ||||
|     arr = pd.array([np.nan, np.nan], dtype="Int64") | ||||
|     result = np.array([1.0, 2.0]) ** arr | ||||
|     expected = pd.array([1.0, np.nan], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("other", [0, 0.5]) | ||||
| def test_numpy_zero_dim_ndarray(other): | ||||
|     arr = pd.array([1, None, 2]) | ||||
|     result = arr + np.array(other) | ||||
|     expected = arr + other | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_values(data, all_arithmetic_operators): | ||||
|     op = all_arithmetic_operators | ||||
|     s = pd.Series(data) | ||||
|     ops = getattr(s, op) | ||||
|  | ||||
|     # invalid scalars | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops("foo") | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops(pd.Timestamp("20180101")) | ||||
|  | ||||
|     # invalid array-likes | ||||
|     str_ser = pd.Series("foo", index=s.index) | ||||
|     # with pytest.raises(TypeError, match=msg): | ||||
|     if all_arithmetic_operators in [ | ||||
|         "__mul__", | ||||
|         "__rmul__", | ||||
|     ]:  # (data[~data.isna()] >= 0).all(): | ||||
|         res = ops(str_ser) | ||||
|         expected = pd.Series(["foo" * x for x in data], index=s.index) | ||||
|         expected = expected.fillna(np.nan) | ||||
|         # TODO: doing this fillna to keep tests passing as we make | ||||
|         #  assert_almost_equal stricter, but the expected with pd.NA seems | ||||
|         #  more-correct than np.nan here. | ||||
|         tm.assert_series_equal(res, expected) | ||||
|     else: | ||||
|         with tm.external_error_raised(TypeError): | ||||
|             ops(str_ser) | ||||
|  | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops(pd.Series(pd.date_range("20180101", periods=len(s)))) | ||||
|  | ||||
|  | ||||
| # Various | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| # TODO test unsigned overflow | ||||
|  | ||||
|  | ||||
| def test_arith_coerce_scalar(data, all_arithmetic_operators): | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     s = pd.Series(data) | ||||
|     other = 0.01 | ||||
|  | ||||
|     result = op(s, other) | ||||
|     expected = op(s.astype(float), other) | ||||
|     expected = expected.astype("Float64") | ||||
|  | ||||
|     # rmod results in NaN that wasn't NA in original nullable Series -> unmask it | ||||
|     if all_arithmetic_operators == "__rmod__": | ||||
|         mask = (s == 0).fillna(False).to_numpy(bool) | ||||
|         expected.array._mask[mask] = False | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("other", [1.0, np.array(1.0)]) | ||||
| def test_arithmetic_conversion(all_arithmetic_operators, other): | ||||
|     # if we have a float operand we should have a float result | ||||
|     # if that is equal to an integer | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|  | ||||
|     s = pd.Series([1, 2, 3], dtype="Int64") | ||||
|     result = op(s, other) | ||||
|     assert result.dtype == "Float64" | ||||
|  | ||||
|  | ||||
| def test_cross_type_arithmetic(): | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": pd.Series([1, 2, np.nan], dtype="Int64"), | ||||
|             "B": pd.Series([1, np.nan, 3], dtype="UInt8"), | ||||
|             "C": [1, 2, 3], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     result = df.A + df.C | ||||
|     expected = pd.Series([2, 4, np.nan], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = (df.A + df.C) * 3 == 12 | ||||
|     expected = pd.Series([False, True, None], dtype="boolean") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df.A + df.B | ||||
|     expected = pd.Series([2, np.nan, np.nan], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["mean"]) | ||||
| def test_reduce_to_float(op): | ||||
|     # some reduce ops always return float, even if the result | ||||
|     # is a rounded number | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": pd.array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # op | ||||
|     result = getattr(df.C, op)() | ||||
|     assert isinstance(result, float) | ||||
|  | ||||
|     # groupby | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")}, | ||||
|         index=pd.Index(["a", "b"], name="A"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "source, neg_target, abs_target", | ||||
|     [ | ||||
|         ([1, 2, 3], [-1, -2, -3], [1, 2, 3]), | ||||
|         ([1, 2, None], [-1, -2, None], [1, 2, None]), | ||||
|         ([-1, 0, 1], [1, 0, -1], [1, 0, 1]), | ||||
|     ], | ||||
| ) | ||||
| def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target): | ||||
|     dtype = any_signed_int_ea_dtype | ||||
|     arr = pd.array(source, dtype=dtype) | ||||
|     neg_result, pos_result, abs_result = -arr, +arr, abs(arr) | ||||
|     neg_target = pd.array(neg_target, dtype=dtype) | ||||
|     abs_target = pd.array(abs_target, dtype=dtype) | ||||
|  | ||||
|     tm.assert_extension_array_equal(neg_result, neg_target) | ||||
|     tm.assert_extension_array_equal(pos_result, arr) | ||||
|     assert not tm.shares_memory(pos_result, arr) | ||||
|     tm.assert_extension_array_equal(abs_result, abs_target) | ||||
|  | ||||
|  | ||||
| def test_values_multiplying_large_series_by_NA(): | ||||
|     # GH#33701 | ||||
|  | ||||
|     result = pd.NA * pd.Series(np.zeros(10001)) | ||||
|     expected = pd.Series([pd.NA] * 10001) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_bitwise(dtype): | ||||
|     left = pd.array([1, None, 3, 4], dtype=dtype) | ||||
|     right = pd.array([None, 3, 5, 4], dtype=dtype) | ||||
|  | ||||
|     result = left | right | ||||
|     expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = left & right | ||||
|     expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = left ^ right | ||||
|     expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # TODO: desired behavior when operating with boolean?  defer? | ||||
|  | ||||
|     floats = right.astype("Float64") | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left | floats | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left & floats | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left ^ floats | ||||
| @ -0,0 +1,39 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.arrays.masked_shared import ( | ||||
|     ComparisonOps, | ||||
|     NumericOps, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestComparisonOps(NumericOps, ComparisonOps): | ||||
|     @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1]) | ||||
|     def test_scalar(self, other, comparison_op, dtype): | ||||
|         ComparisonOps.test_scalar(self, other, comparison_op, dtype) | ||||
|  | ||||
|     def test_compare_to_int(self, dtype, comparison_op): | ||||
|         # GH 28930 | ||||
|         op_name = f"__{comparison_op.__name__}__" | ||||
|         s1 = pd.Series([1, None, 3], dtype=dtype) | ||||
|         s2 = pd.Series([1, None, 3], dtype="float") | ||||
|  | ||||
|         method = getattr(s1, op_name) | ||||
|         result = method(2) | ||||
|  | ||||
|         method = getattr(s2, op_name) | ||||
|         expected = method(2).astype("boolean") | ||||
|         expected[s2.isna()] = pd.NA | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals(): | ||||
|     # GH-30652 | ||||
|     # equals is generally tested in /tests/extension/base/methods, but this | ||||
|     # specifically tests that two arrays of the same class but different dtype | ||||
|     # do not evaluate equal | ||||
|     a1 = pd.array([1, 2, None], dtype="Int64") | ||||
|     a2 = pd.array([1, 2, None], dtype="Int32") | ||||
|     assert a1.equals(a2) is False | ||||
| @ -0,0 +1,69 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         (["Int64", "Int64"], "Int64"), | ||||
|         (["UInt64", "UInt64"], "UInt64"), | ||||
|         (["Int8", "Int8"], "Int8"), | ||||
|         (["Int8", "Int16"], "Int16"), | ||||
|         (["UInt8", "Int8"], "Int16"), | ||||
|         (["Int32", "UInt32"], "Int64"), | ||||
|         (["Int64", "UInt64"], "Float64"), | ||||
|         (["Int64", "boolean"], "object"), | ||||
|         (["UInt8", "boolean"], "object"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series(to_concat_dtypes, result_dtype): | ||||
|     # we expect the same dtypes as we would get with non-masked inputs, | ||||
|     #  just masked where available. | ||||
|  | ||||
|     result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes]) | ||||
|     expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( | ||||
|         result_dtype | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # order doesn't matter for result | ||||
|     result = pd.concat( | ||||
|         [pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]] | ||||
|     ) | ||||
|     expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( | ||||
|         result_dtype | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         (["Int64", "int64"], "Int64"), | ||||
|         (["UInt64", "uint64"], "UInt64"), | ||||
|         (["Int8", "int8"], "Int8"), | ||||
|         (["Int8", "int16"], "Int16"), | ||||
|         (["UInt8", "int8"], "Int16"), | ||||
|         (["Int32", "uint32"], "Int64"), | ||||
|         (["Int64", "uint64"], "Float64"), | ||||
|         (["Int64", "bool"], "object"), | ||||
|         (["UInt8", "bool"], "object"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series_with_numpy(to_concat_dtypes, result_dtype): | ||||
|     # we expect the same dtypes as we would get with non-masked inputs, | ||||
|     #  just masked where available. | ||||
|  | ||||
|     s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0]) | ||||
|     s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1])) | ||||
|     result = pd.concat([s1, s2], ignore_index=True) | ||||
|     expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # order doesn't matter for result | ||||
|     result = pd.concat([s2, s1], ignore_index=True) | ||||
|     expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,245 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_integer | ||||
| from pandas.core.arrays import IntegerArray | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[pd.array, IntegerArray._from_sequence]) | ||||
| def constructor(request): | ||||
|     """Fixture returning parametrized IntegerArray from given sequence. | ||||
|  | ||||
|     Used to test dtype conversions. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_uses_pandas_na(): | ||||
|     a = pd.array([1, None], dtype=Int64Dtype()) | ||||
|     assert a[1] is pd.NA | ||||
|  | ||||
|  | ||||
| def test_from_dtype_from_float(data): | ||||
|     # construct from our dtype & string dtype | ||||
|     dtype = data.dtype | ||||
|  | ||||
|     # from float | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # from int / list | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # from int / array | ||||
|     expected = pd.Series(data).dropna().reset_index(drop=True) | ||||
|     dropped = np.array(data.dropna()).astype(np.dtype(dtype.type)) | ||||
|     result = pd.Series(dropped, dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_conversions(data_missing): | ||||
|     # astype to object series | ||||
|     df = pd.DataFrame({"A": data_missing}) | ||||
|     result = df["A"].astype("object") | ||||
|     expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # convert to object ndarray | ||||
|     # we assert that we are exactly equal | ||||
|     # including type conversions of scalars | ||||
|     result = df["A"].astype("object").values | ||||
|     expected = np.array([pd.NA, 1], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     for r, e in zip(result, expected): | ||||
|         if pd.isnull(r): | ||||
|             assert pd.isnull(e) | ||||
|         elif is_integer(r): | ||||
|             assert r == e | ||||
|             assert is_integer(e) | ||||
|         else: | ||||
|             assert r == e | ||||
|             assert type(r) == type(e) | ||||
|  | ||||
|  | ||||
| def test_integer_array_constructor(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="int64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = IntegerArray(values, mask) | ||||
|     expected = pd.array([1, 2, 3, np.nan], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     msg = r".* should be .* numpy array. Use the 'pd.array' function instead" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values.tolist(), mask) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values, mask.tolist()) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values.astype(float), mask) | ||||
|     msg = r"__init__\(\) missing 1 required positional argument: 'mask'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values) | ||||
|  | ||||
|  | ||||
| def test_integer_array_constructor_copy(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="int64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = IntegerArray(values, mask) | ||||
|     assert result._data is values | ||||
|     assert result._mask is mask | ||||
|  | ||||
|     result = IntegerArray(values, mask, copy=True) | ||||
|     assert result._data is not values | ||||
|     assert result._mask is not mask | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         ([1, None], [1, np.nan]), | ||||
|         ([None], [np.nan]), | ||||
|         ([None, np.nan], [np.nan, np.nan]), | ||||
|         ([np.nan, np.nan], [np.nan, np.nan]), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_none_is_nan(a, b): | ||||
|     result = pd.array(a, dtype="Int64") | ||||
|     expected = pd.array(b, dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values", | ||||
|     [ | ||||
|         ["foo", "bar"], | ||||
|         "foo", | ||||
|         1, | ||||
|         1.0, | ||||
|         pd.date_range("20130101", periods=2), | ||||
|         np.array(["foo"]), | ||||
|         [[1, 2], [3, 4]], | ||||
|         [np.nan, {"a": 1}], | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_error(values): | ||||
|     # error in converting existing arrays to IntegerArrays | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"cannot be converted to IntegerDtype", | ||||
|             r"invalid literal for int\(\) with base 10:", | ||||
|             r"values must be a 1D list-like", | ||||
|             r"Cannot pass scalar", | ||||
|             r"int\(\) argument must be a string", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises((ValueError, TypeError), match=msg): | ||||
|         pd.array(values, dtype="Int64") | ||||
|  | ||||
|     with pytest.raises((ValueError, TypeError), match=msg): | ||||
|         IntegerArray._from_sequence(values) | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_inferred_dtype(constructor): | ||||
|     # if values has dtype -> respect it | ||||
|     result = constructor(np.array([1, 2], dtype="int8")) | ||||
|     assert result.dtype == Int8Dtype() | ||||
|     result = constructor(np.array([1, 2], dtype="int32")) | ||||
|     assert result.dtype == Int32Dtype() | ||||
|  | ||||
|     # if values have no dtype -> always int64 | ||||
|     result = constructor([1, 2]) | ||||
|     assert result.dtype == Int64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_dtype_keyword(constructor): | ||||
|     result = constructor([1, 2], dtype="Int8") | ||||
|     assert result.dtype == Int8Dtype() | ||||
|  | ||||
|     # if values has dtype -> override it | ||||
|     result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32") | ||||
|     assert result.dtype == Int32Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_float(): | ||||
|     result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64") | ||||
|     expected = pd.array([1, 2], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): | ||||
|         IntegerArray._from_sequence([1.5, 2.0], dtype="Int64") | ||||
|  | ||||
|     # for float dtypes, the itemsize is not preserved | ||||
|     result = IntegerArray._from_sequence( | ||||
|         np.array([1.0, 2.0], dtype="float32"), dtype="Int64" | ||||
|     ) | ||||
|     assert result.dtype == Int64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_str(): | ||||
|     result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64") | ||||
|     expected = pd.array([1, 2, np.nan], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, match=r"invalid literal for int\(\) with base 10: .*" | ||||
|     ): | ||||
|         IntegerArray._from_sequence(["1", "2", ""], dtype="Int64") | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, match=r"invalid literal for int\(\) with base 10: .*" | ||||
|     ): | ||||
|         IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "bool_values, int_values, target_dtype, expected_dtype", | ||||
|     [ | ||||
|         ([False, True], [0, 1], Int64Dtype(), Int64Dtype()), | ||||
|         ([False, True], [0, 1], "Int64", Int64Dtype()), | ||||
|         ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_bool( | ||||
|     constructor, bool_values, int_values, target_dtype, expected_dtype | ||||
| ): | ||||
|     result = constructor(bool_values, dtype=target_dtype) | ||||
|     assert result.dtype == expected_dtype | ||||
|     expected = pd.array(int_values, dtype=target_dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, to_dtype, result_dtype", | ||||
|     [ | ||||
|         (np.array([1], dtype="int64"), None, Int64Dtype), | ||||
|         (np.array([1, np.nan]), None, Int64Dtype), | ||||
|         (np.array([1, np.nan]), "int8", Int8Dtype), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array(values, to_dtype, result_dtype): | ||||
|     # convert existing arrays to IntegerArrays | ||||
|     result = IntegerArray._from_sequence(values, dtype=to_dtype) | ||||
|     assert result.dtype == result_dtype() | ||||
|     expected = pd.array(values, dtype=result_dtype()) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_integer_array_from_boolean(): | ||||
|     # GH31104 | ||||
|     expected = pd.array(np.array([True, False]), dtype="Int64") | ||||
|     result = pd.array(np.array([True, False], dtype=object), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,301 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.generic import ABCIndex | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     UInt32Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_dtypes(dtype): | ||||
|     # smoke tests on auto dtype construction | ||||
|  | ||||
|     if dtype.is_signed_integer: | ||||
|         assert np.dtype(dtype.type).kind == "i" | ||||
|     else: | ||||
|         assert np.dtype(dtype.type).kind == "u" | ||||
|     assert dtype.name is not None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) | ||||
| def test_preserve_dtypes(op): | ||||
|     # for ops that enable (mean would actually work here | ||||
|     # but generally it is a float return value) | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": pd.array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # op | ||||
|     result = getattr(df.C, op)() | ||||
|     if op in {"sum", "prod", "min", "max"}: | ||||
|         assert isinstance(result, np.int64) | ||||
|     else: | ||||
|         assert isinstance(result, int) | ||||
|  | ||||
|     # groupby | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")}, | ||||
|         index=pd.Index(["a", "b"], name="A"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_nansafe(): | ||||
|     # see gh-22343 | ||||
|     arr = pd.array([np.nan, 1, 2], dtype="Int8") | ||||
|     msg = "cannot convert NA to integer" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         arr.astype("uint32") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dropna", [True, False]) | ||||
| def test_construct_index(all_data, dropna): | ||||
|     # ensure that we do not coerce to different Index dtype or non-index | ||||
|  | ||||
|     all_data = all_data[:10] | ||||
|     if dropna: | ||||
|         other = np.array(all_data[~all_data.isna()]) | ||||
|     else: | ||||
|         other = all_data | ||||
|  | ||||
|     result = pd.Index(pd.array(other, dtype=all_data.dtype)) | ||||
|     expected = pd.Index(other, dtype=all_data.dtype) | ||||
|     assert all_data.dtype == expected.dtype  # dont coerce to object | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dropna", [True, False]) | ||||
| def test_astype_index(all_data, dropna): | ||||
|     # as an int/uint index to Index | ||||
|  | ||||
|     all_data = all_data[:10] | ||||
|     if dropna: | ||||
|         other = all_data[~all_data.isna()] | ||||
|     else: | ||||
|         other = all_data | ||||
|  | ||||
|     dtype = all_data.dtype | ||||
|     idx = pd.Index(np.array(other)) | ||||
|     assert isinstance(idx, ABCIndex) | ||||
|  | ||||
|     result = idx.astype(dtype) | ||||
|     expected = idx.astype(object).astype(dtype) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype(all_data): | ||||
|     all_data = all_data[:10] | ||||
|  | ||||
|     ints = all_data[~all_data.isna()] | ||||
|     mixed = all_data | ||||
|     dtype = Int8Dtype() | ||||
|  | ||||
|     # coerce to same type - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(all_data.dtype) | ||||
|     expected = pd.Series(ints) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same other - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series(ints, dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same numpy_dtype - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(all_data.dtype.numpy_dtype) | ||||
|     expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same type - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype(all_data.dtype) | ||||
|     expected = pd.Series(mixed) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same other - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series(mixed, dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same numpy_dtype - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     msg = "cannot convert NA to integer" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         s.astype(all_data.dtype.numpy_dtype) | ||||
|  | ||||
|     # coerce to object | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype("object") | ||||
|     expected = pd.Series(np.asarray(mixed, dtype=object)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_copy(): | ||||
|     arr = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|     orig = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|  | ||||
|     # copy=True -> ensure both data and mask are actual copies | ||||
|     result = arr.astype("Int64", copy=True) | ||||
|     assert result is not arr | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|     # copy=False | ||||
|     result = arr.astype("Int64", copy=False) | ||||
|     assert result is arr | ||||
|     assert np.shares_memory(result._data, arr._data) | ||||
|     assert np.shares_memory(result._mask, arr._mask) | ||||
|     result[0] = 10 | ||||
|     assert arr[0] == 10 | ||||
|     result[0] = pd.NA | ||||
|     assert arr[0] is pd.NA | ||||
|  | ||||
|     # astype to different dtype -> always needs a copy -> even with copy=False | ||||
|     # we need to ensure that also the mask is actually copied | ||||
|     arr = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|     orig = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|  | ||||
|     result = arr.astype("Int32", copy=False) | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|  | ||||
| def test_astype_to_larger_numpy(): | ||||
|     a = pd.array([1, 2], dtype="Int32") | ||||
|     result = a.astype("int64") | ||||
|     expected = np.array([1, 2], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     a = pd.array([1, 2], dtype="UInt32") | ||||
|     result = a.astype("uint64") | ||||
|     expected = np.array([1, 2], dtype="uint64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) | ||||
| def test_astype_specific_casting(dtype): | ||||
|     s = pd.Series([1, 2, 3], dtype="Int64") | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series([1, 2, 3], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series([1, 2, 3, None], dtype="Int64") | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series([1, 2, 3, None], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_floating(): | ||||
|     arr = pd.array([1, 2, None], dtype="Int64") | ||||
|     result = arr.astype("Float64") | ||||
|     expected = pd.array([1.0, 2.0, None], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_dt64(): | ||||
|     # GH#32435 | ||||
|     arr = pd.array([1, 2, 3, pd.NA]) * 10**9 | ||||
|  | ||||
|     result = arr.astype("datetime64[ns]") | ||||
|  | ||||
|     expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_construct_cast_invalid(dtype): | ||||
|     msg = "cannot safely" | ||||
|     arr = [1.2, 2.3, 3.7] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(arr, dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.Series(arr).astype(dtype) | ||||
|  | ||||
|     arr = [1.2, 2.3, 3.7, np.nan] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(arr, dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.Series(arr).astype(dtype) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("in_series", [True, False]) | ||||
| def test_to_numpy_na_nan(in_series): | ||||
|     a = pd.array([0, 1, None], dtype="Int64") | ||||
|     if in_series: | ||||
|         a = pd.Series(a) | ||||
|  | ||||
|     result = a.to_numpy(dtype="float64", na_value=np.nan) | ||||
|     expected = np.array([0.0, 1.0, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = a.to_numpy(dtype="int64", na_value=-1) | ||||
|     expected = np.array([0, 1, -1], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = a.to_numpy(dtype="bool", na_value=False) | ||||
|     expected = np.array([False, True, False], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("in_series", [True, False]) | ||||
| @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) | ||||
| def test_to_numpy_dtype(dtype, in_series): | ||||
|     a = pd.array([0, 1], dtype="Int64") | ||||
|     if in_series: | ||||
|         a = pd.Series(a) | ||||
|  | ||||
|     result = a.to_numpy(dtype=dtype) | ||||
|     expected = np.array([0, 1], dtype=dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["int64", "bool"]) | ||||
| def test_to_numpy_na_raises(dtype): | ||||
|     a = pd.array([0, 1, None], dtype="Int64") | ||||
|     with pytest.raises(ValueError, match=dtype): | ||||
|         a.to_numpy(dtype=dtype) | ||||
|  | ||||
|  | ||||
| def test_astype_str(using_infer_string): | ||||
|     a = pd.array([1, 2, None], dtype="Int64") | ||||
|  | ||||
|     if using_infer_string: | ||||
|         expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan)) | ||||
|  | ||||
|         tm.assert_extension_array_equal(a.astype(str), expected) | ||||
|         tm.assert_extension_array_equal(a.astype("str"), expected) | ||||
|     else: | ||||
|         expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(a.astype(str), expected) | ||||
|         tm.assert_numpy_array_equal(a.astype("str"), expected) | ||||
|  | ||||
|  | ||||
| def test_astype_boolean(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31102 | ||||
|     a = pd.array([1, 0, -1, 2, None], dtype="Int64") | ||||
|     result = a.astype("boolean") | ||||
|     expected = pd.array([True, False, True, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,203 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.sign]) | ||||
| # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127> | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning") | ||||
| def test_ufuncs_single_int(ufunc): | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     result = ufunc(a) | ||||
|     expected = pd.array(ufunc(a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     result = ufunc(s) | ||||
|     expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64")) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) | ||||
| def test_ufuncs_single_float(ufunc): | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(a) | ||||
|         expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(s) | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) | ||||
| def test_ufuncs_binary_int(ufunc): | ||||
|     # two IntegerArrays | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     result = ufunc(a, a) | ||||
|     expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # IntegerArray with numpy array | ||||
|     arr = np.array([1, 2, 3, 4]) | ||||
|     result = ufunc(a, arr) | ||||
|     expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(arr, a) | ||||
|     expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # IntegerArray with scalar | ||||
|     result = ufunc(a, 1) | ||||
|     expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(1, a) | ||||
|     expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_ufunc_binary_output(): | ||||
|     a = pd.array([1, 2, np.nan]) | ||||
|     result = np.modf(a) | ||||
|     expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) | ||||
|     expected = (pd.array(expected[0]), pd.array(expected[1])) | ||||
|  | ||||
|     assert isinstance(result, tuple) | ||||
|     assert len(result) == 2 | ||||
|  | ||||
|     for x, y in zip(result, expected): | ||||
|         tm.assert_extension_array_equal(x, y) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[0, 1], [0, None]]) | ||||
| def test_ufunc_reduce_raises(values): | ||||
|     arr = pd.array(values) | ||||
|  | ||||
|     res = np.add.reduce(arr) | ||||
|     expected = arr.sum(skipna=False) | ||||
|     tm.assert_almost_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "pandasmethname, kwargs", | ||||
|     [ | ||||
|         ("var", {"ddof": 0}), | ||||
|         ("var", {"ddof": 1}), | ||||
|         ("std", {"ddof": 0}), | ||||
|         ("std", {"ddof": 1}), | ||||
|         ("kurtosis", {}), | ||||
|         ("skew", {}), | ||||
|         ("sem", {}), | ||||
|     ], | ||||
| ) | ||||
| def test_stat_method(pandasmethname, kwargs): | ||||
|     s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") | ||||
|     pandasmeth = getattr(s, pandasmethname) | ||||
|     result = pandasmeth(**kwargs) | ||||
|     s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") | ||||
|     pandasmeth = getattr(s2, pandasmethname) | ||||
|     expected = pandasmeth(**kwargs) | ||||
|     assert expected == result | ||||
|  | ||||
|  | ||||
| def test_value_counts_na(): | ||||
|     arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") | ||||
|     result = arr.value_counts(dropna=False) | ||||
|     ex_index = pd.Index([1, 2, pd.NA], dtype="Int64") | ||||
|     assert ex_index.dtype == "Int64" | ||||
|     expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = arr.value_counts(dropna=True) | ||||
|     expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count") | ||||
|     assert expected.index.dtype == arr.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_empty(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/33317 | ||||
|     ser = pd.Series([], dtype="Int64") | ||||
|     result = ser.value_counts() | ||||
|     idx = pd.Index([], dtype=ser.dtype) | ||||
|     assert idx.dtype == ser.dtype | ||||
|     expected = pd.Series([], index=idx, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_with_normalize(): | ||||
|     # GH 33172 | ||||
|     ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") | ||||
|     result = ser.value_counts(normalize=True) | ||||
|     expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3 | ||||
|     assert expected.index.dtype == ser.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 4]) | ||||
| def test_integer_array_sum(skipna, min_count, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([1, 2, 3, None], dtype=dtype) | ||||
|     result = arr.sum(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 6 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("method", ["min", "max"]) | ||||
| def test_integer_array_min_max(skipna, method, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([0, 1, None], dtype=dtype) | ||||
|     func = getattr(arr, method) | ||||
|     result = func(skipna=skipna) | ||||
|     if skipna: | ||||
|         assert result == (0 if method == "min" else 1) | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 9]) | ||||
| def test_integer_array_prod(skipna, min_count, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([1, 2, None], dtype=dtype) | ||||
|     result = arr.prod(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 2 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] | ||||
| ) | ||||
| def test_integer_array_numpy_sum(values, expected): | ||||
|     arr = pd.array(values, dtype="Int64") | ||||
|     result = np.sum(arr) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["sum", "prod", "min", "max"]) | ||||
| def test_dataframe_reductions(op): | ||||
|     # https://github.com/pandas-dev/pandas/pull/32867 | ||||
|     # ensure the integers are not cast to float during reductions | ||||
|     df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")}) | ||||
|     result = df.max() | ||||
|     assert isinstance(result["a"], np.int64) | ||||
|  | ||||
|  | ||||
| # TODO(jreback) - these need testing / are broken | ||||
|  | ||||
| # shift | ||||
|  | ||||
| # set_index (destroys type) | ||||
| @ -0,0 +1,19 @@ | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_array_setitem_nullable_boolean_mask(): | ||||
|     # GH 31446 | ||||
|     ser = pd.Series([1, 2], dtype="Int64") | ||||
|     result = ser.where(ser > 1) | ||||
|     expected = pd.Series([pd.NA, 2], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_setitem(): | ||||
|     # GH 31446 | ||||
|     arr = pd.Series([1, 2], dtype="Int64").array | ||||
|     arr[arr > 1] = 1 | ||||
|  | ||||
|     expected = pd.array([1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
| @ -0,0 +1,123 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     array, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", np.int64(3)], | ||||
|         ["prod", np.int64(2)], | ||||
|         ["min", np.int64(1)], | ||||
|         ["max", np.int64(2)], | ||||
|         ["mean", np.float64(1.5)], | ||||
|         ["median", np.float64(1.5)], | ||||
|         ["var", np.float64(0.5)], | ||||
|         ["std", np.float64(0.5**0.5)], | ||||
|         ["skew", pd.NA], | ||||
|         ["kurt", pd.NA], | ||||
|         ["any", True], | ||||
|         ["all", True], | ||||
|     ], | ||||
| ) | ||||
| def test_series_reductions(op, expected): | ||||
|     ser = Series([1, 2], dtype="Int64") | ||||
|     result = getattr(ser, op)() | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", Series([3], index=["a"], dtype="Int64")], | ||||
|         ["prod", Series([2], index=["a"], dtype="Int64")], | ||||
|         ["min", Series([1], index=["a"], dtype="Int64")], | ||||
|         ["max", Series([2], index=["a"], dtype="Int64")], | ||||
|         ["mean", Series([1.5], index=["a"], dtype="Float64")], | ||||
|         ["median", Series([1.5], index=["a"], dtype="Float64")], | ||||
|         ["var", Series([0.5], index=["a"], dtype="Float64")], | ||||
|         ["std", Series([0.5**0.5], index=["a"], dtype="Float64")], | ||||
|         ["skew", Series([pd.NA], index=["a"], dtype="Float64")], | ||||
|         ["kurt", Series([pd.NA], index=["a"], dtype="Float64")], | ||||
|         ["any", Series([True], index=["a"], dtype="boolean")], | ||||
|         ["all", Series([True], index=["a"], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_dataframe_reductions(op, expected): | ||||
|     df = DataFrame({"a": array([1, 2], dtype="Int64")}) | ||||
|     result = getattr(df, op)() | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", array([1, 3], dtype="Int64")], | ||||
|         ["prod", array([1, 3], dtype="Int64")], | ||||
|         ["min", array([1, 3], dtype="Int64")], | ||||
|         ["max", array([1, 3], dtype="Int64")], | ||||
|         ["mean", array([1, 3], dtype="Float64")], | ||||
|         ["median", array([1, 3], dtype="Float64")], | ||||
|         ["var", array([pd.NA], dtype="Float64")], | ||||
|         ["std", array([pd.NA], dtype="Float64")], | ||||
|         ["skew", array([pd.NA], dtype="Float64")], | ||||
|         ["any", array([True, True], dtype="boolean")], | ||||
|         ["all", array([True, True], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_groupby_reductions(op, expected): | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|     expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"]) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", Series([4, 4], index=["B", "C"], dtype="Float64")], | ||||
|         ["prod", Series([3, 3], index=["B", "C"], dtype="Float64")], | ||||
|         ["min", Series([1, 1], index=["B", "C"], dtype="Float64")], | ||||
|         ["max", Series([3, 3], index=["B", "C"], dtype="Float64")], | ||||
|         ["mean", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["median", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["var", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")], | ||||
|         ["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")], | ||||
|         ["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")], | ||||
|         ["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], | ||||
|         ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_mixed_reductions(op, expected): | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # series | ||||
|     result = getattr(df.C, op)() | ||||
|     tm.assert_equal(result, expected["C"]) | ||||
|  | ||||
|     # frame | ||||
|     if op in ["any", "all"]: | ||||
|         result = getattr(df, op)() | ||||
|     else: | ||||
|         result = getattr(df, op)(numeric_only=True) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,67 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int16Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
|     UInt8Dtype, | ||||
|     UInt16Dtype, | ||||
|     UInt32Dtype, | ||||
|     UInt64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_dtypes(dtype): | ||||
|     # smoke tests on auto dtype construction | ||||
|  | ||||
|     if dtype.is_signed_integer: | ||||
|         assert np.dtype(dtype.type).kind == "i" | ||||
|     else: | ||||
|         assert np.dtype(dtype.type).kind == "u" | ||||
|     assert dtype.name is not None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         (Int8Dtype(), "Int8Dtype()"), | ||||
|         (Int16Dtype(), "Int16Dtype()"), | ||||
|         (Int32Dtype(), "Int32Dtype()"), | ||||
|         (Int64Dtype(), "Int64Dtype()"), | ||||
|         (UInt8Dtype(), "UInt8Dtype()"), | ||||
|         (UInt16Dtype(), "UInt16Dtype()"), | ||||
|         (UInt32Dtype(), "UInt32Dtype()"), | ||||
|         (UInt64Dtype(), "UInt64Dtype()"), | ||||
|     ], | ||||
| ) | ||||
| def test_repr_dtype(dtype, expected): | ||||
|     assert repr(dtype) == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array(): | ||||
|     result = repr(pd.array([1, None, 3])) | ||||
|     expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array_long(): | ||||
|     data = pd.array([1, 2, None] * 1000) | ||||
|     expected = ( | ||||
|         "<IntegerArray>\n" | ||||
|         "[   1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,\n" | ||||
|         " ...\n" | ||||
|         " <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>]\n" | ||||
|         "Length: 3000, dtype: Int64" | ||||
|     ) | ||||
|     result = repr(data) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_frame_repr(data_missing): | ||||
|     df = pd.DataFrame({"A": data_missing}) | ||||
|     result = repr(df) | ||||
|     expected = "      A\n0  <NA>\n1     1" | ||||
|     assert result == expected | ||||
		Reference in New Issue
	
	Block a user