done
This commit is contained in:
		| @ -0,0 +1,134 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(): | ||||
|     """Fixture returning boolean array with valid and missing values.""" | ||||
|     return pd.array( | ||||
|         [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], | ||||
|         dtype="boolean", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def left_array(): | ||||
|     """Fixture returning boolean array with valid and missing values.""" | ||||
|     return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def right_array(): | ||||
|     """Fixture returning boolean array with valid and missing values.""" | ||||
|     return pd.array([True, False, None] * 3, dtype="boolean") | ||||
|  | ||||
|  | ||||
| # Basic test for the arithmetic array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "opname, exp", | ||||
|     [ | ||||
|         ("add", [True, True, None, True, False, None, None, None, None]), | ||||
|         ("mul", [True, False, None, False, False, None, None, None, None]), | ||||
|     ], | ||||
|     ids=["add", "mul"], | ||||
| ) | ||||
| def test_add_mul(left_array, right_array, opname, exp): | ||||
|     op = getattr(operator, opname) | ||||
|     result = op(left_array, right_array) | ||||
|     expected = pd.array(exp, dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_sub(left_array, right_array): | ||||
|     msg = ( | ||||
|         r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), " | ||||
|         r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\." | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         left_array - right_array | ||||
|  | ||||
|  | ||||
| def test_div(left_array, right_array): | ||||
|     msg = "operator '.*' not implemented for bool dtypes" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         # check that we are matching the non-masked Series behavior | ||||
|         pd.Series(left_array._data) / pd.Series(right_array._data) | ||||
|  | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         left_array / right_array | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "opname", | ||||
|     [ | ||||
|         "floordiv", | ||||
|         "mod", | ||||
|         "pow", | ||||
|     ], | ||||
| ) | ||||
| def test_op_int8(left_array, right_array, opname): | ||||
|     op = getattr(operator, opname) | ||||
|     if opname != "mod": | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             result = op(left_array, right_array) | ||||
|         return | ||||
|     result = op(left_array, right_array) | ||||
|     expected = op(left_array.astype("Int8"), right_array.astype("Int8")) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_values(data, all_arithmetic_operators): | ||||
|     # invalid ops | ||||
|     op = all_arithmetic_operators | ||||
|     s = pd.Series(data) | ||||
|     ops = getattr(s, op) | ||||
|  | ||||
|     # invalid scalars | ||||
|     msg = ( | ||||
|         "did not contain a loop with signature matching types|" | ||||
|         "BooleanArray cannot perform the operation|" | ||||
|         "not supported for the input types, and the inputs could not be safely coerced " | ||||
|         "to any supported types according to the casting rule ''safe''|" | ||||
|         "not supported for dtype" | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops("foo") | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"unsupported operand type\(s\) for", | ||||
|             "Concatenation operation is not implemented for NumPy arrays", | ||||
|             "has no kernel", | ||||
|             "not supported for dtype", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops(pd.Timestamp("20180101")) | ||||
|  | ||||
|     # invalid array-likes | ||||
|     if op not in ("__mul__", "__rmul__"): | ||||
|         # TODO(extension) numpy's mul with object array sees booleans as numbers | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 r"unsupported operand type\(s\) for", | ||||
|                 "can only concatenate str", | ||||
|                 "not all arguments converted during string formatting", | ||||
|                 "has no kernel", | ||||
|                 "not implemented", | ||||
|                 "not supported for dtype", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ops(pd.Series("foo", index=s.index)) | ||||
| @ -0,0 +1,59 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_astype(using_infer_string): | ||||
|     # with missing values | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|  | ||||
|     with pytest.raises(ValueError, match="cannot convert NA to integer"): | ||||
|         arr.astype("int64") | ||||
|  | ||||
|     with pytest.raises(ValueError, match="cannot convert float NaN to"): | ||||
|         arr.astype("bool") | ||||
|  | ||||
|     result = arr.astype("float64") | ||||
|     expected = np.array([1, 0, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.astype("str") | ||||
|     if using_infer_string: | ||||
|         expected = pd.array( | ||||
|             ["True", "False", None], dtype=pd.StringDtype(na_value=np.nan) | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|     else: | ||||
|         expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # no missing values | ||||
|     arr = pd.array([True, False, True], dtype="boolean") | ||||
|     result = arr.astype("int64") | ||||
|     expected = np.array([1, 0, 1], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.astype("bool") | ||||
|     expected = np.array([True, False, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_to_boolean_array(): | ||||
|     # astype to BooleanArray | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|  | ||||
|     result = arr.astype("boolean") | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|     result = arr.astype(pd.BooleanDtype()) | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|  | ||||
|  | ||||
| def test_astype_to_integer_array(): | ||||
|     # astype to IntegerArray | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|  | ||||
|     result = arr.astype("Int64") | ||||
|     expected = pd.array([1, 0, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,60 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.arrays import BooleanArray | ||||
| from pandas.tests.arrays.masked_shared import ComparisonOps | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(): | ||||
|     """Fixture returning boolean array with valid and missing data""" | ||||
|     return pd.array( | ||||
|         [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], | ||||
|         dtype="boolean", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def dtype(): | ||||
|     """Fixture returning BooleanDtype""" | ||||
|     return pd.BooleanDtype() | ||||
|  | ||||
|  | ||||
| class TestComparisonOps(ComparisonOps): | ||||
|     def test_compare_scalar(self, data, comparison_op): | ||||
|         self._compare_other(data, comparison_op, True) | ||||
|  | ||||
|     def test_compare_array(self, data, comparison_op): | ||||
|         other = pd.array([True] * len(data), dtype="boolean") | ||||
|         self._compare_other(data, comparison_op, other) | ||||
|         other = np.array([True] * len(data)) | ||||
|         self._compare_other(data, comparison_op, other) | ||||
|         other = pd.Series([True] * len(data)) | ||||
|         self._compare_other(data, comparison_op, other) | ||||
|  | ||||
|     @pytest.mark.parametrize("other", [True, False, pd.NA]) | ||||
|     def test_scalar(self, other, comparison_op, dtype): | ||||
|         ComparisonOps.test_scalar(self, other, comparison_op, dtype) | ||||
|  | ||||
|     def test_array(self, comparison_op): | ||||
|         op = comparison_op | ||||
|         a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         b = pd.array([True, False, None] * 3, dtype="boolean") | ||||
|  | ||||
|         result = op(a, b) | ||||
|  | ||||
|         values = op(a._data, b._data) | ||||
|         mask = a._mask | b._mask | ||||
|         expected = BooleanArray(values, mask) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         result[0] = None | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         ) | ||||
|         tm.assert_extension_array_equal( | ||||
|             b, pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         ) | ||||
| @ -0,0 +1,325 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.arrays import BooleanArray | ||||
| from pandas.core.arrays.boolean import coerce_to_array | ||||
|  | ||||
|  | ||||
| def test_boolean_array_constructor(): | ||||
|     values = np.array([True, False, True, False], dtype="bool") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = BooleanArray(values, mask) | ||||
|     expected = pd.array([True, False, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="values should be boolean numpy array"): | ||||
|         BooleanArray(values.tolist(), mask) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="mask should be boolean numpy array"): | ||||
|         BooleanArray(values, mask.tolist()) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="values should be boolean numpy array"): | ||||
|         BooleanArray(values.astype(int), mask) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="mask should be boolean numpy array"): | ||||
|         BooleanArray(values, None) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="values.shape must match mask.shape"): | ||||
|         BooleanArray(values.reshape(1, -1), mask) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="values.shape must match mask.shape"): | ||||
|         BooleanArray(values, mask.reshape(1, -1)) | ||||
|  | ||||
|  | ||||
| def test_boolean_array_constructor_copy(): | ||||
|     values = np.array([True, False, True, False], dtype="bool") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = BooleanArray(values, mask) | ||||
|     assert result._data is values | ||||
|     assert result._mask is mask | ||||
|  | ||||
|     result = BooleanArray(values, mask, copy=True) | ||||
|     assert result._data is not values | ||||
|     assert result._mask is not mask | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array(): | ||||
|     expected = BooleanArray( | ||||
|         np.array([True, False, True]), np.array([False, False, False]) | ||||
|     ) | ||||
|  | ||||
|     result = pd.array([True, False, True], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = pd.array(np.array([True, False, True]), dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # with missing values | ||||
|     expected = BooleanArray( | ||||
|         np.array([True, False, True]), np.array([False, False, True]) | ||||
|     ) | ||||
|  | ||||
|     result = pd.array([True, False, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_all_none(): | ||||
|     expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) | ||||
|  | ||||
|     result = pd.array([None, None, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]), | ||||
|         ([True, np.nan], [True, None]), | ||||
|         ([True, pd.NA], [True, None]), | ||||
|         ([np.nan, np.nan], [None, None]), | ||||
|         (np.array([np.nan, np.nan], dtype=float), [None, None]), | ||||
|     ], | ||||
| ) | ||||
| def test_to_boolean_array_missing_indicators(a, b): | ||||
|     result = pd.array(a, dtype="boolean") | ||||
|     expected = pd.array(b, dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values", | ||||
|     [ | ||||
|         ["foo", "bar"], | ||||
|         ["1", "2"], | ||||
|         # "foo", | ||||
|         [1, 2], | ||||
|         [1.0, 2.0], | ||||
|         pd.date_range("20130101", periods=2), | ||||
|         np.array(["foo"]), | ||||
|         np.array([1, 2]), | ||||
|         np.array([1.0, 2.0]), | ||||
|         [np.nan, {"a": 1}], | ||||
|     ], | ||||
| ) | ||||
| def test_to_boolean_array_error(values): | ||||
|     # error in converting existing arrays to BooleanArray | ||||
|     msg = "Need to pass bool-like value" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(values, dtype="boolean") | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_from_integer_array(): | ||||
|     result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") | ||||
|     expected = pd.array([True, False, True, False], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # with missing values | ||||
|     result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") | ||||
|     expected = pd.array([True, False, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_from_float_array(): | ||||
|     result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") | ||||
|     expected = pd.array([True, False, True, False], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # with missing values | ||||
|     result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") | ||||
|     expected = pd.array([True, False, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_integer_like(): | ||||
|     # integers of 0's and 1's | ||||
|     result = pd.array([1, 0, 1, 0], dtype="boolean") | ||||
|     expected = pd.array([True, False, True, False], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # with missing values | ||||
|     result = pd.array([1, 0, 1, None], dtype="boolean") | ||||
|     expected = pd.array([True, False, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_coerce_to_array(): | ||||
|     # TODO this is currently not public API | ||||
|     values = np.array([True, False, True, False], dtype="bool") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|     result = BooleanArray(*coerce_to_array(values, mask=mask)) | ||||
|     expected = BooleanArray(values, mask) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     assert result._data is values | ||||
|     assert result._mask is mask | ||||
|     result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) | ||||
|     expected = BooleanArray(values, mask) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     assert result._data is not values | ||||
|     assert result._mask is not mask | ||||
|  | ||||
|     # mixed missing from values and mask | ||||
|     values = [True, False, None, False] | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|     result = BooleanArray(*coerce_to_array(values, mask=mask)) | ||||
|     expected = BooleanArray( | ||||
|         np.array([True, False, True, True]), np.array([False, False, True, True]) | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask)) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # raise errors for wrong dimension | ||||
|     values = np.array([True, False, True, False], dtype="bool") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     # passing 2D values is OK as long as no mask | ||||
|     coerce_to_array(values.reshape(1, -1)) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="values.shape and mask.shape must match"): | ||||
|         coerce_to_array(values.reshape(1, -1), mask=mask) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="values.shape and mask.shape must match"): | ||||
|         coerce_to_array(values, mask=mask.reshape(1, -1)) | ||||
|  | ||||
|  | ||||
| def test_coerce_to_array_from_boolean_array(): | ||||
|     # passing BooleanArray to coerce_to_array | ||||
|     values = np.array([True, False, True, False], dtype="bool") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|     arr = BooleanArray(values, mask) | ||||
|     result = BooleanArray(*coerce_to_array(arr)) | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|     # no copy | ||||
|     assert result._data is arr._data | ||||
|     assert result._mask is arr._mask | ||||
|  | ||||
|     result = BooleanArray(*coerce_to_array(arr), copy=True) | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|     assert result._data is not arr._data | ||||
|     assert result._mask is not arr._mask | ||||
|  | ||||
|     with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): | ||||
|         coerce_to_array(arr, mask=mask) | ||||
|  | ||||
|  | ||||
| def test_coerce_to_numpy_array(): | ||||
|     # with missing values -> object dtype | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|     result = np.array(arr) | ||||
|     expected = np.array([True, False, pd.NA], dtype="object") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # also with no missing values -> object dtype | ||||
|     arr = pd.array([True, False, True], dtype="boolean") | ||||
|     result = np.array(arr) | ||||
|     expected = np.array([True, False, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # force bool dtype | ||||
|     result = np.array(arr, dtype="bool") | ||||
|     expected = np.array([True, False, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|     # with missing values will raise error | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|     msg = ( | ||||
|         "cannot convert to 'bool'-dtype NumPy array with missing values. " | ||||
|         "Specify an appropriate 'na_value' for this dtype." | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         np.array(arr, dtype="bool") | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_from_strings(): | ||||
|     result = BooleanArray._from_sequence_of_strings( | ||||
|         np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object), | ||||
|         dtype="boolean", | ||||
|     ) | ||||
|     expected = BooleanArray( | ||||
|         np.array([True, False, True, True, False, False, False]), | ||||
|         np.array([False, False, False, False, False, False, True]), | ||||
|     ) | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_boolean_array_from_strings_invalid_string(): | ||||
|     with pytest.raises(ValueError, match="cannot be cast"): | ||||
|         BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy(box): | ||||
|     con = pd.Series if box else pd.array | ||||
|     # default (with or without missing values) -> object dtype | ||||
|     arr = con([True, False, True], dtype="boolean") | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array([True, False, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([True, False, None], dtype="boolean") | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array([True, False, pd.NA], dtype="object") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([True, False, None], dtype="boolean") | ||||
|     result = arr.to_numpy(dtype="str") | ||||
|     expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # no missing values -> can convert to bool, otherwise raises | ||||
|     arr = con([True, False, True], dtype="boolean") | ||||
|     result = arr.to_numpy(dtype="bool") | ||||
|     expected = np.array([True, False, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([True, False, None], dtype="boolean") | ||||
|     with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): | ||||
|         result = arr.to_numpy(dtype="bool") | ||||
|  | ||||
|     # specify dtype and na_value | ||||
|     arr = con([True, False, None], dtype="boolean") | ||||
|     result = arr.to_numpy(dtype=object, na_value=None) | ||||
|     expected = np.array([True, False, None], dtype="object") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype=bool, na_value=False) | ||||
|     expected = np.array([True, False, False], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype="int64", na_value=-99) | ||||
|     expected = np.array([1, 0, -99], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype="float64", na_value=np.nan) | ||||
|     expected = np.array([1, 0, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # converting to int or float without specifying na_value raises | ||||
|     with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): | ||||
|         arr.to_numpy(dtype="int64") | ||||
|  | ||||
|  | ||||
| def test_to_numpy_copy(): | ||||
|     # to_numpy can be zero-copy if no missing values | ||||
|     arr = pd.array([True, False, True], dtype="boolean") | ||||
|     result = arr.to_numpy(dtype=bool) | ||||
|     result[0] = False | ||||
|     tm.assert_extension_array_equal( | ||||
|         arr, pd.array([False, False, True], dtype="boolean") | ||||
|     ) | ||||
|  | ||||
|     arr = pd.array([True, False, True], dtype="boolean") | ||||
|     result = arr.to_numpy(dtype=bool, copy=True) | ||||
|     result[0] = False | ||||
|     tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) | ||||
| @ -0,0 +1,126 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] | ||||
| ) | ||||
| def test_ufuncs_binary(ufunc): | ||||
|     # two BooleanArrays | ||||
|     a = pd.array([True, False, None], dtype="boolean") | ||||
|     result = ufunc(a, a) | ||||
|     expected = pd.array(ufunc(a._data, a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     result = ufunc(s, a) | ||||
|     expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # Boolean with numpy array | ||||
|     arr = np.array([True, True, False]) | ||||
|     result = ufunc(a, arr) | ||||
|     expected = pd.array(ufunc(a._data, arr), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(arr, a) | ||||
|     expected = pd.array(ufunc(arr, a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # BooleanArray with scalar | ||||
|     result = ufunc(a, True) | ||||
|     expected = pd.array(ufunc(a._data, True), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(True, a) | ||||
|     expected = pd.array(ufunc(True, a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # not handled types | ||||
|     msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ufunc(a, "test") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.logical_not]) | ||||
| def test_ufuncs_unary(ufunc): | ||||
|     a = pd.array([True, False, None], dtype="boolean") | ||||
|     result = ufunc(a) | ||||
|     expected = pd.array(ufunc(a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     ser = pd.Series(a) | ||||
|     result = ufunc(ser) | ||||
|     expected = pd.Series(ufunc(a._data), dtype="boolean") | ||||
|     expected[a._mask] = np.nan | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_ufunc_numeric(): | ||||
|     # np.sqrt on np.bool_ returns float16, which we upcast to Float32 | ||||
|     #  bc we do not have Float16 | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|  | ||||
|     res = np.sqrt(arr) | ||||
|  | ||||
|     expected = pd.array([1, 0, None], dtype="Float32") | ||||
|     tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[True, False], [True, None]]) | ||||
| def test_ufunc_reduce_raises(values): | ||||
|     arr = pd.array(values, dtype="boolean") | ||||
|  | ||||
|     res = np.add.reduce(arr) | ||||
|     if arr[-1] is pd.NA: | ||||
|         expected = pd.NA | ||||
|     else: | ||||
|         expected = arr._data.sum() | ||||
|     tm.assert_almost_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_na(): | ||||
|     arr = pd.array([True, False, pd.NA], dtype="boolean") | ||||
|     result = arr.value_counts(dropna=False) | ||||
|     expected = pd.Series([1, 1, 1], index=arr, dtype="Int64", name="count") | ||||
|     assert expected.index.dtype == arr.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = arr.value_counts(dropna=True) | ||||
|     expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64", name="count") | ||||
|     assert expected.index.dtype == arr.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_with_normalize(): | ||||
|     ser = pd.Series([True, False, pd.NA], dtype="boolean") | ||||
|     result = ser.value_counts(normalize=True) | ||||
|     expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64", name="proportion") / 2 | ||||
|     assert expected.index.dtype == "boolean" | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_diff(): | ||||
|     a = pd.array( | ||||
|         [True, True, False, False, True, None, True, None, False], dtype="boolean" | ||||
|     ) | ||||
|     result = pd.core.algorithms.diff(a, 1) | ||||
|     expected = pd.array( | ||||
|         [None, False, True, False, True, None, None, None, None], dtype="boolean" | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     ser = pd.Series(a) | ||||
|     result = ser.diff() | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,13 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na", [None, np.nan, pd.NA]) | ||||
| def test_setitem_missing_values(na): | ||||
|     arr = pd.array([True, False, None], dtype="boolean") | ||||
|     expected = pd.array([True, None, None], dtype="boolean") | ||||
|     arr[1] = na | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
| @ -0,0 +1,254 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.arrays import BooleanArray | ||||
| from pandas.core.ops.mask_ops import ( | ||||
|     kleene_and, | ||||
|     kleene_or, | ||||
|     kleene_xor, | ||||
| ) | ||||
| from pandas.tests.extension.base import BaseOpsUtil | ||||
|  | ||||
|  | ||||
| class TestLogicalOps(BaseOpsUtil): | ||||
|     def test_numpy_scalars_ok(self, all_logical_operators): | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         op = getattr(a, all_logical_operators) | ||||
|  | ||||
|         tm.assert_extension_array_equal(op(True), op(np.bool_(True))) | ||||
|         tm.assert_extension_array_equal(op(False), op(np.bool_(False))) | ||||
|  | ||||
|     def get_op_from_name(self, op_name): | ||||
|         short_opname = op_name.strip("_") | ||||
|         short_opname = short_opname if "xor" in short_opname else short_opname + "_" | ||||
|         try: | ||||
|             op = getattr(operator, short_opname) | ||||
|         except AttributeError: | ||||
|             # Assume it is the reverse operator | ||||
|             rop = getattr(operator, short_opname[1:]) | ||||
|             op = lambda x, y: rop(y, x) | ||||
|  | ||||
|         return op | ||||
|  | ||||
|     def test_empty_ok(self, all_logical_operators): | ||||
|         a = pd.array([], dtype="boolean") | ||||
|         op_name = all_logical_operators | ||||
|         result = getattr(a, op_name)(True) | ||||
|         tm.assert_extension_array_equal(a, result) | ||||
|  | ||||
|         result = getattr(a, op_name)(False) | ||||
|         tm.assert_extension_array_equal(a, result) | ||||
|  | ||||
|         result = getattr(a, op_name)(pd.NA) | ||||
|         tm.assert_extension_array_equal(a, result) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)] | ||||
|     ) | ||||
|     def test_eq_mismatched_type(self, other): | ||||
|         # GH-44499 | ||||
|         arr = pd.array([True, False]) | ||||
|         result = arr == other | ||||
|         expected = pd.array([False, False]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = arr != other | ||||
|         expected = pd.array([True, True]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_logical_length_mismatch_raises(self, all_logical_operators): | ||||
|         op_name = all_logical_operators | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         msg = "Lengths must match" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             getattr(a, op_name)([True, False]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             getattr(a, op_name)(np.array([True, False])) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             getattr(a, op_name)(pd.array([True, False], dtype="boolean")) | ||||
|  | ||||
|     def test_logical_nan_raises(self, all_logical_operators): | ||||
|         op_name = all_logical_operators | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         msg = "Got float instead" | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(a, op_name)(np.nan) | ||||
|  | ||||
|     @pytest.mark.parametrize("other", ["a", 1]) | ||||
|     def test_non_bool_or_na_other_raises(self, other, all_logical_operators): | ||||
|         a = pd.array([True, False], dtype="boolean") | ||||
|         with pytest.raises(TypeError, match=str(type(other).__name__)): | ||||
|             getattr(a, all_logical_operators)(other) | ||||
|  | ||||
|     def test_kleene_or(self): | ||||
|         # A clear test of behavior. | ||||
|         a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         b = pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         result = a | b | ||||
|         expected = pd.array( | ||||
|             [True, True, True, True, False, None, True, None, None], dtype="boolean" | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = b | a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         ) | ||||
|         tm.assert_extension_array_equal( | ||||
|             b, pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other, expected", | ||||
|         [ | ||||
|             (pd.NA, [True, None, None]), | ||||
|             (True, [True, True, True]), | ||||
|             (np.bool_(True), [True, True, True]), | ||||
|             (False, [True, False, None]), | ||||
|             (np.bool_(False), [True, False, None]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_kleene_or_scalar(self, other, expected): | ||||
|         # TODO: test True & False | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         result = a | other | ||||
|         expected = pd.array(expected, dtype="boolean") | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = other | a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True, False, None], dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     def test_kleene_and(self): | ||||
|         # A clear test of behavior. | ||||
|         a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         b = pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         result = a & b | ||||
|         expected = pd.array( | ||||
|             [True, False, None, False, False, False, None, False, None], dtype="boolean" | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = b & a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         ) | ||||
|         tm.assert_extension_array_equal( | ||||
|             b, pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other, expected", | ||||
|         [ | ||||
|             (pd.NA, [None, False, None]), | ||||
|             (True, [True, False, None]), | ||||
|             (False, [False, False, False]), | ||||
|             (np.bool_(True), [True, False, None]), | ||||
|             (np.bool_(False), [False, False, False]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_kleene_and_scalar(self, other, expected): | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         result = a & other | ||||
|         expected = pd.array(expected, dtype="boolean") | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = other & a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True, False, None], dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     def test_kleene_xor(self): | ||||
|         a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         b = pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         result = a ^ b | ||||
|         expected = pd.array( | ||||
|             [False, True, None, True, False, None, None, None, None], dtype="boolean" | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = b ^ a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         ) | ||||
|         tm.assert_extension_array_equal( | ||||
|             b, pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other, expected", | ||||
|         [ | ||||
|             (pd.NA, [None, None, None]), | ||||
|             (True, [False, True, None]), | ||||
|             (np.bool_(True), [False, True, None]), | ||||
|             (np.bool_(False), [True, False, None]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_kleene_xor_scalar(self, other, expected): | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         result = a ^ other | ||||
|         expected = pd.array(expected, dtype="boolean") | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = other ^ a | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         tm.assert_extension_array_equal( | ||||
|             a, pd.array([True, False, None], dtype="boolean") | ||||
|         ) | ||||
|  | ||||
|     @pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3]) | ||||
|     def test_no_masked_assumptions(self, other, all_logical_operators): | ||||
|         # The logical operations should not assume that masked values are False! | ||||
|         a = pd.arrays.BooleanArray( | ||||
|             np.array([True, True, True, False, False, False, True, False, True]), | ||||
|             np.array([False] * 6 + [True, True, True]), | ||||
|         ) | ||||
|         b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") | ||||
|         if isinstance(other, list): | ||||
|             other = pd.array(other, dtype="boolean") | ||||
|  | ||||
|         result = getattr(a, all_logical_operators)(other) | ||||
|         expected = getattr(b, all_logical_operators)(other) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         if isinstance(other, BooleanArray): | ||||
|             other._data[other._mask] = True | ||||
|             a._data[a._mask] = False | ||||
|  | ||||
|             result = getattr(a, all_logical_operators)(other) | ||||
|             expected = getattr(b, all_logical_operators)(other) | ||||
|             tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and]) | ||||
| def test_error_both_scalar(operation): | ||||
|     msg = r"Either `left` or `right` need to be a np\.ndarray." | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         # masks need to be non-None, otherwise it ends up in an infinite recursion | ||||
|         operation(True, True, np.zeros(1), np.zeros(1)) | ||||
| @ -0,0 +1,27 @@ | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestUnaryOps: | ||||
|     def test_invert(self): | ||||
|         a = pd.array([True, False, None], dtype="boolean") | ||||
|         expected = pd.array([False, True, None], dtype="boolean") | ||||
|         tm.assert_extension_array_equal(~a, expected) | ||||
|  | ||||
|         expected = pd.Series(expected, index=["a", "b", "c"], name="name") | ||||
|         result = ~pd.Series(a, index=["a", "b", "c"], name="name") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) | ||||
|         result = ~df | ||||
|         expected = pd.DataFrame( | ||||
|             {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_abs(self): | ||||
|         # matching numpy behavior, abs is the identity function | ||||
|         arr = pd.array([True, False, None], dtype="boolean") | ||||
|         result = abs(arr) | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, arr) | ||||
| @ -0,0 +1,62 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(): | ||||
|     """Fixture returning boolean array, with valid and missing values.""" | ||||
|     return pd.array( | ||||
|         [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False], | ||||
|         dtype="boolean", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", | ||||
|     [ | ||||
|         ([True, pd.NA], True, True, True, pd.NA), | ||||
|         ([False, pd.NA], False, False, pd.NA, False), | ||||
|         ([pd.NA], False, True, pd.NA, pd.NA), | ||||
|         ([], False, True, False, True), | ||||
|         # GH-33253: all True / all False values buggy with skipna=False | ||||
|         ([True, True], True, True, True, True), | ||||
|         ([False, False], False, False, False, False), | ||||
|     ], | ||||
| ) | ||||
| def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): | ||||
|     # the methods return numpy scalars | ||||
|     exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) | ||||
|     exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) | ||||
|     exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) | ||||
|     exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) | ||||
|  | ||||
|     for con in [pd.array, pd.Series]: | ||||
|         a = con(values, dtype="boolean") | ||||
|         assert a.any() is exp_any | ||||
|         assert a.all() is exp_all | ||||
|         assert a.any(skipna=False) is exp_any_noskip | ||||
|         assert a.all(skipna=False) is exp_all_noskip | ||||
|  | ||||
|         assert np.any(a.any()) is exp_any | ||||
|         assert np.all(a.all()) is exp_all | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dropna", [True, False]) | ||||
| def test_reductions_return_types(dropna, data, all_numeric_reductions): | ||||
|     op = all_numeric_reductions | ||||
|     s = pd.Series(data) | ||||
|     if dropna: | ||||
|         s = s.dropna() | ||||
|  | ||||
|     if op in ("sum", "prod"): | ||||
|         assert isinstance(getattr(s, op)(), np.int_) | ||||
|     elif op == "count": | ||||
|         # Oddly on the 32 bit build (but not Windows), this is intc (!= intp) | ||||
|         assert isinstance(getattr(s, op)(), np.integer) | ||||
|     elif op in ("min", "max"): | ||||
|         assert isinstance(getattr(s, op)(), np.bool_) | ||||
|     else: | ||||
|         # "mean", "std", "var", "median", "kurt", "skew" | ||||
|         assert isinstance(getattr(s, op)(), np.float64) | ||||
| @ -0,0 +1,13 @@ | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| def test_repr(): | ||||
|     df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) | ||||
|     expected = "       A\n0   True\n1  False\n2   <NA>" | ||||
|     assert repr(df) == expected | ||||
|  | ||||
|     expected = "0     True\n1    False\n2     <NA>\nName: A, dtype: boolean" | ||||
|     assert repr(df.A) == expected | ||||
|  | ||||
|     expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean" | ||||
|     assert repr(df.A.array) == expected | ||||
| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ordered", [True, False]) | ||||
| @pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]]) | ||||
| def test_factorize(categories, ordered): | ||||
|     cat = pd.Categorical( | ||||
|         ["b", "b", "a", "c", None], categories=categories, ordered=ordered | ||||
|     ) | ||||
|     codes, uniques = pd.factorize(cat) | ||||
|     expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical( | ||||
|         ["b", "a", "c"], categories=categories, ordered=ordered | ||||
|     ) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_factorized_sort(): | ||||
|     cat = pd.Categorical(["b", "b", None, "a"]) | ||||
|     codes, uniques = pd.factorize(cat, sort=True) | ||||
|     expected_codes = np.array([1, 1, -1, 0], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical(["a", "b"]) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_factorized_sort_ordered(): | ||||
|     cat = pd.Categorical( | ||||
|         ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True | ||||
|     ) | ||||
|  | ||||
|     codes, uniques = pd.factorize(cat, sort=True) | ||||
|     expected_codes = np.array([0, 0, -1, 1], dtype=np.intp) | ||||
|     expected_uniques = pd.Categorical( | ||||
|         ["b", "a"], categories=["c", "b", "a"], ordered=True | ||||
|     ) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(codes, expected_codes) | ||||
|     tm.assert_categorical_equal(uniques, expected_uniques) | ||||
|  | ||||
|  | ||||
| def test_isin_cats(): | ||||
|     # GH2003 | ||||
|     cat = pd.Categorical(["a", "b", np.nan]) | ||||
|  | ||||
|     result = cat.isin(["a", np.nan]) | ||||
|     expected = np.array([True, False, True], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|     result = cat.isin(["a", "c"]) | ||||
|     expected = np.array([True, False, False], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("value", [[""], [None, ""], [pd.NaT, ""]]) | ||||
| def test_isin_cats_corner_cases(value): | ||||
|     # GH36550 | ||||
|     cat = pd.Categorical([""]) | ||||
|     result = cat.isin(value) | ||||
|     expected = np.array([True], dtype=bool) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) | ||||
| def test_isin_empty(empty): | ||||
|     s = pd.Categorical(["a", "b"]) | ||||
|     expected = np.array([False, False], dtype=bool) | ||||
|  | ||||
|     result = s.isin(empty) | ||||
|     tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_diff(): | ||||
|     ser = pd.Series([1, 2, 3], dtype="category") | ||||
|  | ||||
|     msg = "Convert to a suitable dtype" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser.diff() | ||||
|  | ||||
|     df = ser.to_frame(name="A") | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.diff() | ||||
| @ -0,0 +1,355 @@ | ||||
| import re | ||||
| import sys | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PYPY | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     NaT, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_scalar | ||||
|  | ||||
|  | ||||
| class TestCategoricalAnalytics: | ||||
|     @pytest.mark.parametrize("aggregation", ["min", "max"]) | ||||
|     def test_min_max_not_ordered_raises(self, aggregation): | ||||
|         # unordered cats have no min/max | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=False) | ||||
|         msg = f"Categorical is not ordered for operation {aggregation}" | ||||
|         agg_func = getattr(cat, aggregation) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             agg_func() | ||||
|  | ||||
|         ufunc = np.minimum if aggregation == "min" else np.maximum | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ufunc.reduce(cat) | ||||
|  | ||||
|     def test_min_max_ordered(self, index_or_series_or_array): | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         obj = index_or_series_or_array(cat) | ||||
|         _min = obj.min() | ||||
|         _max = obj.max() | ||||
|         assert _min == "a" | ||||
|         assert _max == "d" | ||||
|  | ||||
|         assert np.minimum.reduce(obj) == "a" | ||||
|         assert np.maximum.reduce(obj) == "d" | ||||
|         # TODO: raises if we pass axis=0  (on Index and Categorical, not Series) | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         obj = index_or_series_or_array(cat) | ||||
|         _min = obj.min() | ||||
|         _max = obj.max() | ||||
|         assert _min == "d" | ||||
|         assert _max == "a" | ||||
|         assert np.minimum.reduce(obj) == "d" | ||||
|         assert np.maximum.reduce(obj) == "a" | ||||
|  | ||||
|     def test_min_max_reduce(self): | ||||
|         # GH52788 | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         df = DataFrame(cat) | ||||
|  | ||||
|         result_max = df.agg("max") | ||||
|         expected_max = Series(Categorical(["d"], dtype=cat.dtype)) | ||||
|         tm.assert_series_equal(result_max, expected_max) | ||||
|  | ||||
|         result_min = df.agg("min") | ||||
|         expected_min = Series(Categorical(["a"], dtype=cat.dtype)) | ||||
|         tm.assert_series_equal(result_min, expected_min) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "categories,expected", | ||||
|         [ | ||||
|             (list("ABC"), np.nan), | ||||
|             ([1, 2, 3], np.nan), | ||||
|             pytest.param( | ||||
|                 Series(date_range("2020-01-01", periods=3), dtype="category"), | ||||
|                 NaT, | ||||
|                 marks=pytest.mark.xfail( | ||||
|                     reason="https://github.com/pandas-dev/pandas/issues/29962" | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("aggregation", ["min", "max"]) | ||||
|     def test_min_max_ordered_empty(self, categories, expected, aggregation): | ||||
|         # GH 30227 | ||||
|         cat = Categorical([], categories=categories, ordered=True) | ||||
|  | ||||
|         agg_func = getattr(cat, aggregation) | ||||
|         result = agg_func() | ||||
|         assert result is expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories", | ||||
|         [(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     @pytest.mark.parametrize("function", ["min", "max"]) | ||||
|     def test_min_max_with_nan(self, values, categories, function, skipna): | ||||
|         # GH 25303 | ||||
|         cat = Categorical(values, categories=categories, ordered=True) | ||||
|         result = getattr(cat, function)(skipna=skipna) | ||||
|  | ||||
|         if skipna is False: | ||||
|             assert result is np.nan | ||||
|         else: | ||||
|             expected = categories[0] if function == "min" else categories[2] | ||||
|             assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("function", ["min", "max"]) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_min_max_only_nan(self, function, skipna): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33450 | ||||
|         cat = Categorical([np.nan], categories=[1, 2], ordered=True) | ||||
|         result = getattr(cat, function)(skipna=skipna) | ||||
|         assert result is np.nan | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numeric_only_min_max_raises(self, method): | ||||
|         # GH 25303 | ||||
|         cat = Categorical( | ||||
|             [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=".* got an unexpected keyword"): | ||||
|             getattr(cat, method)(numeric_only=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numpy_min_max_raises(self, method): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=False) | ||||
|         msg = ( | ||||
|             f"Categorical is not ordered for operation {method}\n" | ||||
|             "you can use .as_ordered() to change the Categorical to an ordered one" | ||||
|         ) | ||||
|         method = getattr(np, method) | ||||
|         with pytest.raises(TypeError, match=re.escape(msg)): | ||||
|             method(cat) | ||||
|  | ||||
|     @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) | ||||
|     @pytest.mark.parametrize("method", ["min", "max"]) | ||||
|     def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=True) | ||||
|         msg = ( | ||||
|             f"the '{kwarg}' parameter is not supported in the pandas implementation " | ||||
|             f"of {method}" | ||||
|         ) | ||||
|         if kwarg == "axis": | ||||
|             msg = r"`axis` must be fewer than the number of dimensions \(1\)" | ||||
|         kwargs = {kwarg: 42} | ||||
|         method = getattr(np, method) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             method(cat, **kwargs) | ||||
|  | ||||
|     @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) | ||||
|     def test_numpy_min_max_axis_equals_none(self, method, expected): | ||||
|         cat = Categorical(["a", "b", "c", "b"], ordered=True) | ||||
|         method = getattr(np, method) | ||||
|         result = method(cat, axis=None) | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values,categories,exp_mode", | ||||
|         [ | ||||
|             ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), | ||||
|             ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), | ||||
|             ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), | ||||
|             ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), | ||||
|             ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), | ||||
|             ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_mode(self, values, categories, exp_mode): | ||||
|         cat = Categorical(values, categories=categories, ordered=True) | ||||
|         res = Series(cat).mode()._values | ||||
|         exp = Categorical(exp_mode, categories=categories, ordered=True) | ||||
|         tm.assert_categorical_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted(self, ordered): | ||||
|         # https://github.com/pandas-dev/pandas/issues/8420 | ||||
|         # https://github.com/pandas-dev/pandas/issues/14522 | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["cheese", "milk", "apple", "bread", "bread"], | ||||
|             categories=["cheese", "milk", "apple", "bread"], | ||||
|             ordered=ordered, | ||||
|         ) | ||||
|         ser = Series(cat) | ||||
|  | ||||
|         # Searching for single item argument, side='left' (default) | ||||
|         res_cat = cat.searchsorted("apple") | ||||
|         assert res_cat == 2 | ||||
|         assert is_scalar(res_cat) | ||||
|  | ||||
|         res_ser = ser.searchsorted("apple") | ||||
|         assert res_ser == 2 | ||||
|         assert is_scalar(res_ser) | ||||
|  | ||||
|         # Searching for single item array, side='left' (default) | ||||
|         res_cat = cat.searchsorted(["bread"]) | ||||
|         res_ser = ser.searchsorted(["bread"]) | ||||
|         exp = np.array([3], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res_cat, exp) | ||||
|         tm.assert_numpy_array_equal(res_ser, exp) | ||||
|  | ||||
|         # Searching for several items array, side='right' | ||||
|         res_cat = cat.searchsorted(["apple", "bread"], side="right") | ||||
|         res_ser = ser.searchsorted(["apple", "bread"], side="right") | ||||
|         exp = np.array([3, 5], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res_cat, exp) | ||||
|         tm.assert_numpy_array_equal(res_ser, exp) | ||||
|  | ||||
|         # Searching for a single value that is not from the Categorical | ||||
|         with pytest.raises(TypeError, match="cucumber"): | ||||
|             cat.searchsorted("cucumber") | ||||
|         with pytest.raises(TypeError, match="cucumber"): | ||||
|             ser.searchsorted("cucumber") | ||||
|  | ||||
|         # Searching for multiple values one of each is not from the Categorical | ||||
|         msg = ( | ||||
|             "Cannot setitem on a Categorical with a new category, " | ||||
|             "set the categories first" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.searchsorted(["bread", "cucumber"]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.searchsorted(["bread", "cucumber"]) | ||||
|  | ||||
|     def test_unique(self, ordered): | ||||
|         # GH38140 | ||||
|         dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered) | ||||
|  | ||||
|         # categories are reordered based on value when ordered=False | ||||
|         cat = Categorical(["a", "b", "c"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         tm.assert_categorical_equal(res, cat) | ||||
|  | ||||
|         cat = Categorical(["a", "b", "a", "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype)) | ||||
|  | ||||
|         cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         exp_cat = Categorical(["c", "a", "b"], dtype=dtype) | ||||
|         tm.assert_categorical_equal(res, exp_cat) | ||||
|  | ||||
|         # nan must be removed | ||||
|         cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype) | ||||
|         res = cat.unique() | ||||
|         exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype) | ||||
|         tm.assert_categorical_equal(res, exp_cat) | ||||
|  | ||||
|     def test_unique_index_series(self, ordered): | ||||
|         # GH38140 | ||||
|         dtype = CategoricalDtype([3, 2, 1], ordered=ordered) | ||||
|  | ||||
|         c = Categorical([3, 1, 2, 2, 1], dtype=dtype) | ||||
|         # Categorical.unique sorts categories by appearance order | ||||
|         # if ordered=False | ||||
|         exp = Categorical([3, 1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(c.unique(), exp) | ||||
|  | ||||
|         tm.assert_index_equal(Index(c).unique(), Index(exp)) | ||||
|         tm.assert_categorical_equal(Series(c).unique(), exp) | ||||
|  | ||||
|         c = Categorical([1, 1, 2, 2], dtype=dtype) | ||||
|         exp = Categorical([1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(c.unique(), exp) | ||||
|         tm.assert_index_equal(Index(c).unique(), Index(exp)) | ||||
|         tm.assert_categorical_equal(Series(c).unique(), exp) | ||||
|  | ||||
|     def test_shift(self): | ||||
|         # GH 9416 | ||||
|         cat = Categorical(["a", "b", "c", "d", "a"]) | ||||
|  | ||||
|         # shift forward | ||||
|         sp1 = cat.shift(1) | ||||
|         xp1 = Categorical([np.nan, "a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(sp1, xp1) | ||||
|         tm.assert_categorical_equal(cat[:-1], sp1[1:]) | ||||
|  | ||||
|         # shift back | ||||
|         sn2 = cat.shift(-2) | ||||
|         xp2 = Categorical( | ||||
|             ["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"] | ||||
|         ) | ||||
|         tm.assert_categorical_equal(sn2, xp2) | ||||
|         tm.assert_categorical_equal(cat[2:], sn2[:-2]) | ||||
|  | ||||
|         # shift by zero | ||||
|         tm.assert_categorical_equal(cat, cat.shift(0)) | ||||
|  | ||||
|     def test_nbytes(self): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|         exp = 3 + 3 * 8  # 3 int8s for values + 3 int64s for categories | ||||
|         assert cat.nbytes == exp | ||||
|  | ||||
|     def test_memory_usage(self, using_infer_string): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|  | ||||
|         # .categories is an index, so we include the hashtable | ||||
|         assert 0 < cat.nbytes <= cat.memory_usage() | ||||
|         assert 0 < cat.nbytes <= cat.memory_usage(deep=True) | ||||
|  | ||||
|         cat = Categorical(["foo", "foo", "bar"]) | ||||
|         if using_infer_string: | ||||
|             if cat.categories.dtype.storage == "python": | ||||
|                 assert cat.memory_usage(deep=True) > cat.nbytes | ||||
|             else: | ||||
|                 assert cat.memory_usage(deep=True) >= cat.nbytes | ||||
|         else: | ||||
|             assert cat.memory_usage(deep=True) > cat.nbytes | ||||
|  | ||||
|         if not PYPY: | ||||
|             # sys.getsizeof will call the .memory_usage with | ||||
|             # deep=True, and add on some GC overhead | ||||
|             diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) | ||||
|             assert abs(diff) < 100 | ||||
|  | ||||
|     def test_map(self): | ||||
|         c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|         result = c.map(lambda x: x.lower(), na_action=None) | ||||
|         exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|         c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) | ||||
|         result = c.map(lambda x: x.lower(), na_action=None) | ||||
|         exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|         result = c.map(lambda x: 1, na_action=None) | ||||
|         # GH 12766: Return an index not an array | ||||
|         tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) | ||||
|  | ||||
|     @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) | ||||
|     def test_validate_inplace_raises(self, value): | ||||
|         cat = Categorical(["A", "B", "B", "C", "A"]) | ||||
|         msg = ( | ||||
|             'For argument "inplace" expected type bool, ' | ||||
|             f"received type {type(value).__name__}" | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.sort_values(inplace=value) | ||||
|  | ||||
|     def test_quantile_empty(self): | ||||
|         # make sure we have correct itemsize on resulting codes | ||||
|         cat = Categorical(["A", "B"]) | ||||
|         idx = Index([0.0, 0.5]) | ||||
|         result = cat[:0]._quantile(idx, interpolation="linear") | ||||
|         assert result._codes.dtype == np.int8 | ||||
|  | ||||
|         expected = cat.take([-1, -1], allow_fill=True) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,501 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     StringDtype, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.categorical import recode_for_categories | ||||
|  | ||||
|  | ||||
| class TestCategoricalAPI: | ||||
|     def test_to_list_deprecated(self): | ||||
|         # GH#51254 | ||||
|         cat1 = Categorical(list("acb"), ordered=False) | ||||
|         msg = "Categorical.to_list is deprecated and will be removed" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             cat1.to_list() | ||||
|  | ||||
|     def test_ordered_api(self): | ||||
|         # GH 9347 | ||||
|         cat1 = Categorical(list("acb"), ordered=False) | ||||
|         tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"])) | ||||
|         assert not cat1.ordered | ||||
|  | ||||
|         cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False) | ||||
|         tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"])) | ||||
|         assert not cat2.ordered | ||||
|  | ||||
|         cat3 = Categorical(list("acb"), ordered=True) | ||||
|         tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"])) | ||||
|         assert cat3.ordered | ||||
|  | ||||
|         cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True) | ||||
|         tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"])) | ||||
|         assert cat4.ordered | ||||
|  | ||||
|     def test_set_ordered(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         cat2 = cat.as_unordered() | ||||
|         assert not cat2.ordered | ||||
|         cat2 = cat.as_ordered() | ||||
|         assert cat2.ordered | ||||
|  | ||||
|         assert cat2.set_ordered(True).ordered | ||||
|         assert not cat2.set_ordered(False).ordered | ||||
|  | ||||
|         # removed in 0.19.0 | ||||
|         msg = ( | ||||
|             "property 'ordered' of 'Categorical' object has no setter" | ||||
|             if PY311 | ||||
|             else "can't set attribute" | ||||
|         ) | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             cat.ordered = True | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             cat.ordered = False | ||||
|  | ||||
|     def test_rename_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|  | ||||
|         # inplace=False: the old one must not be changed | ||||
|         res = cat.rename_categories([1, 2, 3]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) | ||||
|         ) | ||||
|         tm.assert_index_equal(res.categories, Index([1, 2, 3])) | ||||
|  | ||||
|         exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(cat.__array__(), exp_cat) | ||||
|  | ||||
|         exp_cat = Index(["a", "b", "c"]) | ||||
|         tm.assert_index_equal(cat.categories, exp_cat) | ||||
|  | ||||
|         # GH18862 (let rename_categories take callables) | ||||
|         result = cat.rename_categories(lambda x: x.upper()) | ||||
|         expected = Categorical(["A", "B", "C", "A"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) | ||||
|     def test_rename_categories_wrong_length_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|         msg = ( | ||||
|             "new categories need to have the same number of items as the " | ||||
|             "old categories!" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.rename_categories(new_categories) | ||||
|  | ||||
|     def test_rename_categories_series(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/17981 | ||||
|         c = Categorical(["a", "b"]) | ||||
|         result = c.rename_categories(Series([0, 1], index=["a", "b"])) | ||||
|         expected = Categorical([0, 1]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_rename_categories_dict(self): | ||||
|         # GH 17336 | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}) | ||||
|         expected = Index([4, 3, 2, 1]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts of smaller length | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 1, "c": 3}) | ||||
|  | ||||
|         expected = Index([1, "b", 3, "d"]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts with bigger length | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}) | ||||
|         expected = Index([1, 2, 3, 4]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|         # Test for dicts with no items from old categories | ||||
|         cat = Categorical(["a", "b", "c", "d"]) | ||||
|         res = cat.rename_categories({"f": 1, "g": 3}) | ||||
|  | ||||
|         expected = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(res.categories, expected) | ||||
|  | ||||
|     def test_reorder_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical( | ||||
|             ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|  | ||||
|         res = cat.reorder_categories(["c", "b", "a"]) | ||||
|         # cat must be the same as before | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         # only res is changed | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "new_categories", | ||||
|         [ | ||||
|             ["a"],  # not all "old" included in "new" | ||||
|             ["a", "b", "d"],  # still not all "old" in "new" | ||||
|             ["a", "b", "c", "d"],  # all "old" included in "new", but too long | ||||
|         ], | ||||
|     ) | ||||
|     def test_reorder_categories_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         msg = "items in new_categories are not the same as in old categories" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.reorder_categories(new_categories) | ||||
|  | ||||
|     def test_add_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical( | ||||
|             ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True | ||||
|         ) | ||||
|  | ||||
|         res = cat.add_categories("d") | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         res = cat.add_categories(["d"]) | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         # GH 9927 | ||||
|         cat = Categorical(list("abc"), ordered=True) | ||||
|         expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) | ||||
|         # test with Series, np.array, index, list | ||||
|         res = cat.add_categories(Series(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(np.array(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(Index(["d", "e"])) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|         res = cat.add_categories(["d", "e"]) | ||||
|         tm.assert_categorical_equal(res, expected) | ||||
|  | ||||
|     def test_add_categories_existing_raises(self): | ||||
|         # new is in old categories | ||||
|         cat = Categorical(["a", "b", "c", "d"], ordered=True) | ||||
|         msg = re.escape("new categories must not include old categories: {'d'}") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.add_categories(["d"]) | ||||
|  | ||||
|     def test_add_categories_losing_dtype_information(self): | ||||
|         # GH#48812 | ||||
|         cat = Categorical(Series([1, 2], dtype="Int64")) | ||||
|         ser = Series([4], dtype="Int64") | ||||
|         result = cat.add_categories(ser) | ||||
|         expected = Categorical( | ||||
|             Series([1, 2], dtype="Int64"), categories=Series([1, 2, 4], dtype="Int64") | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         cat = Categorical(Series(["a", "b", "a"], dtype=StringDtype())) | ||||
|         ser = Series(["d"], dtype=StringDtype()) | ||||
|         result = cat.add_categories(ser) | ||||
|         expected = Categorical( | ||||
|             Series(["a", "b", "a"], dtype=StringDtype()), | ||||
|             categories=Series(["a", "b", "d"], dtype=StringDtype()), | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         exp_categories = Index(["c", "b", "a"]) | ||||
|         exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) | ||||
|  | ||||
|         cat = cat.set_categories(["c", "b", "a"]) | ||||
|         res = cat.set_categories(["a", "b", "c"]) | ||||
|         # cat must be the same as before | ||||
|         tm.assert_index_equal(cat.categories, exp_categories) | ||||
|         tm.assert_numpy_array_equal(cat.__array__(), exp_values) | ||||
|         # only res is changed | ||||
|         exp_categories_back = Index(["a", "b", "c"]) | ||||
|         tm.assert_index_equal(res.categories, exp_categories_back) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_values) | ||||
|  | ||||
|         # not all "old" included in "new" -> all not included ones are now | ||||
|         # np.nan | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         res = cat.set_categories(["a"]) | ||||
|         tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|         # still not all "old" in "new" | ||||
|         res = cat.set_categories(["a", "b", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|         tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) | ||||
|  | ||||
|         # all "old" included in "new" | ||||
|         cat = cat.set_categories(["a", "b", "c", "d"]) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_index_equal(cat.categories, exp_categories) | ||||
|  | ||||
|         # internals... | ||||
|         c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) | ||||
|         tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) | ||||
|  | ||||
|         exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), exp) | ||||
|  | ||||
|         # all "pointers" to '4' must be changed from 3 to 0,... | ||||
|         c = c.set_categories([4, 3, 2, 1]) | ||||
|  | ||||
|         # positions are changed | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) | ||||
|  | ||||
|         # categories are now in new order | ||||
|         tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) | ||||
|  | ||||
|         # output is the same | ||||
|         exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), exp) | ||||
|         assert c.min() == 4 | ||||
|         assert c.max() == 1 | ||||
|  | ||||
|         # set_categories should set the ordering if specified | ||||
|         c2 = c.set_categories([4, 3, 2, 1], ordered=False) | ||||
|         assert not c2.ordered | ||||
|  | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) | ||||
|  | ||||
|         # set_categories should pass thru the ordering | ||||
|         c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) | ||||
|         assert not c2.ordered | ||||
|  | ||||
|         tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories, new_categories", | ||||
|         [ | ||||
|             # No NaNs, same cats, same order | ||||
|             (["a", "b", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["a", "b", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # Same, unsorted | ||||
|             (["b", "a", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["b", "a", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b", "a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             # Introduce NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             # No overlap | ||||
|             (["a", "b", "c"], ["a", "b"], ["d", "e"]), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_set_categories_many(self, values, categories, new_categories, ordered): | ||||
|         c = Categorical(values, categories) | ||||
|         expected = Categorical(values, new_categories, ordered) | ||||
|         result = c.set_categories(new_categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories_rename_less(self): | ||||
|         # GH 24675 | ||||
|         cat = Categorical(["A", "B"]) | ||||
|         result = cat.set_categories(["A"], rename=True) | ||||
|         expected = Categorical(["A", np.nan]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_categories_private(self): | ||||
|         cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         cat._set_categories(["a", "c", "d", "e"]) | ||||
|         expected = Categorical(["a", "c", "d"], categories=list("acde")) | ||||
|         tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|         # fastpath | ||||
|         cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) | ||||
|         cat._set_categories(["a", "c", "d", "e"], fastpath=True) | ||||
|         expected = Categorical(["a", "c", "d"], categories=list("acde")) | ||||
|         tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|     def test_remove_categories(self): | ||||
|         cat = Categorical(["a", "b", "c", "a"], ordered=True) | ||||
|         old = cat.copy() | ||||
|         new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) | ||||
|  | ||||
|         res = cat.remove_categories("c") | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|         res = cat.remove_categories(["c"]) | ||||
|         tm.assert_categorical_equal(cat, old) | ||||
|         tm.assert_categorical_equal(res, new) | ||||
|  | ||||
|     @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) | ||||
|     def test_remove_categories_raises(self, removals): | ||||
|         cat = Categorical(["a", "b", "a"]) | ||||
|         message = re.escape("removals must all be in old categories: {'c'}") | ||||
|  | ||||
|         with pytest.raises(ValueError, match=message): | ||||
|             cat.remove_categories(removals) | ||||
|  | ||||
|     def test_remove_unused_categories(self): | ||||
|         c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) | ||||
|         exp_categories_all = Index(["a", "b", "c", "d", "e"]) | ||||
|         exp_categories_dropped = Index(["a", "b", "c", "d"]) | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         res = c.remove_unused_categories() | ||||
|         tm.assert_index_equal(res.categories, exp_categories_dropped) | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         # with NaN values (GH11599) | ||||
|         c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) | ||||
|         res = c.remove_unused_categories() | ||||
|         tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"]))) | ||||
|         exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(res.codes, exp_codes) | ||||
|         tm.assert_index_equal(c.categories, exp_categories_all) | ||||
|  | ||||
|         val = ["F", np.nan, "D", "B", "D", "F", np.nan] | ||||
|         cat = Categorical(values=val, categories=list("ABCDEFG")) | ||||
|         out = cat.remove_unused_categories() | ||||
|         tm.assert_index_equal(out.categories, Index(["B", "D", "F"])) | ||||
|         exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(out.codes, exp_codes) | ||||
|         assert out.tolist() == val | ||||
|  | ||||
|         alpha = list("abcdefghijklmnopqrstuvwxyz") | ||||
|         val = np.random.default_rng(2).choice(alpha[::2], 10000).astype("object") | ||||
|         val[np.random.default_rng(2).choice(len(val), 100)] = np.nan | ||||
|  | ||||
|         cat = Categorical(values=val, categories=alpha) | ||||
|         out = cat.remove_unused_categories() | ||||
|         assert out.tolist() == val.tolist() | ||||
|  | ||||
|  | ||||
| class TestCategoricalAPIWithFactor: | ||||
|     def test_describe(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         # string type | ||||
|         desc = factor.describe() | ||||
|         assert factor.ordered | ||||
|         exp_index = CategoricalIndex( | ||||
|             ["a", "b", "c"], name="categories", ordered=factor.ordered | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             {"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # check unused categories | ||||
|         cat = factor.copy() | ||||
|         cat = cat.set_categories(["a", "b", "c", "d"]) | ||||
|         desc = cat.describe() | ||||
|  | ||||
|         exp_index = CategoricalIndex( | ||||
|             list("abcd"), ordered=factor.ordered, name="categories" | ||||
|         ) | ||||
|         expected = DataFrame( | ||||
|             {"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]}, | ||||
|             index=exp_index, | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # check an integer one | ||||
|         cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) | ||||
|         desc = cat.describe() | ||||
|         exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories") | ||||
|         expected = DataFrame( | ||||
|             {"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]}, | ||||
|             index=exp_index, | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/3678 | ||||
|         # describe should work with NaN | ||||
|         cat = Categorical([np.nan, 1, 2, 2]) | ||||
|         desc = cat.describe() | ||||
|         expected = DataFrame( | ||||
|             {"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]}, | ||||
|             index=CategoricalIndex( | ||||
|                 [1, 2, np.nan], categories=[1, 2], name="categories" | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_frame_equal(desc, expected) | ||||
|  | ||||
|  | ||||
| class TestPrivateCategoricalAPI: | ||||
|     def test_codes_immutable(self): | ||||
|         # Codes should be read only | ||||
|         c = Categorical(["a", "b", "c", "a", np.nan]) | ||||
|         exp = np.array([0, 1, 2, 0, -1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|         # Assignments to codes should raise | ||||
|         msg = ( | ||||
|             "property 'codes' of 'Categorical' object has no setter" | ||||
|             if PY311 | ||||
|             else "can't set attribute" | ||||
|         ) | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") | ||||
|  | ||||
|         # changes in the codes array should raise | ||||
|         codes = c.codes | ||||
|  | ||||
|         with pytest.raises(ValueError, match="assignment destination is read-only"): | ||||
|             codes[4] = 1 | ||||
|  | ||||
|         # But even after getting the codes, the original array should still be | ||||
|         # writeable! | ||||
|         c[4] = "a" | ||||
|         exp = np.array([0, 1, 2, 0, 0], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|         c._codes[4] = 2 | ||||
|         exp = np.array([0, 1, 2, 0, 2], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "codes, old, new, expected", | ||||
|         [ | ||||
|             ([0, 1], ["a", "b"], ["a", "b"], [0, 1]), | ||||
|             ([0, 1], ["b", "a"], ["b", "a"], [0, 1]), | ||||
|             ([0, 1], ["a", "b"], ["b", "a"], [1, 0]), | ||||
|             ([0, 1], ["b", "a"], ["a", "b"], [1, 0]), | ||||
|             ([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]), | ||||
|             ([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]), | ||||
|             ([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]), | ||||
|             ([-1, -1], [], ["a", "b"], [-1, -1]), | ||||
|             ([1, 0], ["b", "a"], ["a", "b"], [0, 1]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_recode_to_categories(self, codes, old, new, expected): | ||||
|         codes = np.asanyarray(codes, dtype=np.int8) | ||||
|         expected = np.asanyarray(expected, dtype=np.int8) | ||||
|         old = Index(old) | ||||
|         new = Index(new) | ||||
|         result = recode_for_categories(codes, old, new) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_recode_to_categories_large(self): | ||||
|         N = 1000 | ||||
|         codes = np.arange(N) | ||||
|         old = Index(codes) | ||||
|         expected = np.arange(N - 1, -1, -1, dtype=np.int16) | ||||
|         new = Index(expected) | ||||
|         result = recode_for_categories(codes, old, new) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,155 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Interval, | ||||
|     NaT, | ||||
|     Period, | ||||
|     Timestamp, | ||||
|     array, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) | ||||
|     @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), NaT]]) | ||||
|     def test_astype_nan_to_int(self, cls, values): | ||||
|         # GH#28406 | ||||
|         obj = cls(values) | ||||
|  | ||||
|         msg = "Cannot (cast|convert)" | ||||
|         with pytest.raises((ValueError, TypeError), match=msg): | ||||
|             obj.astype(int) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "expected", | ||||
|         [ | ||||
|             array(["2019", "2020"], dtype="datetime64[ns, UTC]"), | ||||
|             array([0, 0], dtype="timedelta64[ns]"), | ||||
|             array([Period("2019"), Period("2020")], dtype="period[Y-DEC]"), | ||||
|             array([Interval(0, 1), Interval(1, 2)], dtype="interval"), | ||||
|             array([1, np.nan], dtype="Int64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_category_to_extension_dtype(self, expected): | ||||
|         # GH#28668 | ||||
|         result = expected.astype("category").astype(expected.dtype) | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 "datetime64[ns]", | ||||
|                 np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"), | ||||
|             ), | ||||
|             ( | ||||
|                 "datetime64[ns, MET]", | ||||
|                 DatetimeIndex([Timestamp("2015-01-01 00:00:00+0100", tz="MET")]).array, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_to_datetime64(self, dtype, expected): | ||||
|         # GH#28448 | ||||
|         result = Categorical(["2015-01-01"]).astype(dtype) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_astype_str_int_categories_to_nullable_int(self): | ||||
|         # GH#39616 | ||||
|         dtype = CategoricalDtype([str(i) for i in range(5)]) | ||||
|         codes = np.random.default_rng(2).integers(5, size=20) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|         res = arr.astype("Int64") | ||||
|         expected = array(codes, dtype="Int64") | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     def test_astype_str_int_categories_to_nullable_float(self): | ||||
|         # GH#39616 | ||||
|         dtype = CategoricalDtype([str(i / 2) for i in range(5)]) | ||||
|         codes = np.random.default_rng(2).integers(5, size=20) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|         res = arr.astype("Float64") | ||||
|         expected = array(codes, dtype="Float64") / 2 | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_astype(self, ordered): | ||||
|         # string | ||||
|         cat = Categorical(list("abbaaccc"), ordered=ordered) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array(cat) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         msg = r"Cannot cast object|str dtype to float64" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.astype(float) | ||||
|  | ||||
|         # numeric | ||||
|         cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array(cat, dtype=object) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat.astype(int) | ||||
|         expected = np.array(cat, dtype="int") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat.astype(float) | ||||
|         expected = np.array(cat, dtype=float) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype_ordered", [True, False]) | ||||
|     @pytest.mark.parametrize("cat_ordered", [True, False]) | ||||
|     def test_astype_category(self, dtype_ordered, cat_ordered): | ||||
|         # GH#10696/GH#18593 | ||||
|         data = list("abcaacbab") | ||||
|         cat = Categorical(data, categories=list("bac"), ordered=cat_ordered) | ||||
|  | ||||
|         # standard categories | ||||
|         dtype = CategoricalDtype(ordered=dtype_ordered) | ||||
|         result = cat.astype(dtype) | ||||
|         expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # non-standard categories | ||||
|         dtype = CategoricalDtype(list("adc"), dtype_ordered) | ||||
|         result = cat.astype(dtype) | ||||
|         expected = Categorical(data, dtype=dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         if dtype_ordered is False: | ||||
|             # dtype='category' can't specify ordered, so only test once | ||||
|             result = cat.astype("category") | ||||
|             expected = cat | ||||
|             tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_astype_object_datetime_categories(self): | ||||
|         # GH#40754 | ||||
|         cat = Categorical(to_datetime(["2021-03-27", NaT])) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_object_timestamp_categories(self): | ||||
|         # GH#18024 | ||||
|         cat = Categorical([Timestamp("2014-01-01")]) | ||||
|         result = cat.astype(object) | ||||
|         expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_category_readonly_mask_values(self): | ||||
|         # GH#53658 | ||||
|         arr = array([0, 1, 2], dtype="Int64") | ||||
|         arr._mask.flags["WRITEABLE"] = False | ||||
|         result = arr.astype("category") | ||||
|         expected = array([0, 1, 2], dtype="Int64").astype("category") | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,787 @@ | ||||
| from datetime import ( | ||||
|     date, | ||||
|     datetime, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas.compat import HAS_PYARROW | ||||
|  | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_float_dtype, | ||||
|     is_integer_dtype, | ||||
| ) | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     DatetimeIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalConstructors: | ||||
|     def test_fastpath_deprecated(self): | ||||
|         codes = np.array([1, 2, 3]) | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False) | ||||
|         msg = "The 'fastpath' keyword in Categorical is deprecated" | ||||
|         with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|             Categorical(codes, dtype=dtype, fastpath=True) | ||||
|  | ||||
|     def test_categorical_from_cat_and_dtype_str_preserve_ordered(self): | ||||
|         # GH#49309 we should preserve orderedness in `res` | ||||
|         cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True) | ||||
|  | ||||
|         res = Categorical(cat, dtype="category") | ||||
|         assert res.dtype.ordered | ||||
|  | ||||
|     def test_categorical_disallows_scalar(self): | ||||
|         # GH#38433 | ||||
|         with pytest.raises(TypeError, match="Categorical input must be list-like"): | ||||
|             Categorical("A", categories=["A", "B"]) | ||||
|  | ||||
|     def test_categorical_1d_only(self): | ||||
|         # ndim > 1 | ||||
|         msg = "> 1 ndim Categorical are not supported at this time" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             Categorical(np.array([list("abcd")])) | ||||
|  | ||||
|     def test_validate_ordered(self): | ||||
|         # see gh-14058 | ||||
|         exp_msg = "'ordered' must either be 'True' or 'False'" | ||||
|         exp_err = TypeError | ||||
|  | ||||
|         # This should be a boolean. | ||||
|         ordered = np.array([0, 1, 2]) | ||||
|  | ||||
|         with pytest.raises(exp_err, match=exp_msg): | ||||
|             Categorical([1, 2, 3], ordered=ordered) | ||||
|  | ||||
|         with pytest.raises(exp_err, match=exp_msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 0, 1], categories=["a", "b", "c"], ordered=ordered | ||||
|             ) | ||||
|  | ||||
|     def test_constructor_empty(self): | ||||
|         # GH 17248 | ||||
|         c = Categorical([]) | ||||
|         expected = Index([]) | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|         c = Categorical([], categories=[1, 2, 3]) | ||||
|         expected = Index([1, 2, 3], dtype=np.int64) | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|     def test_constructor_empty_boolean(self): | ||||
|         # see gh-22702 | ||||
|         cat = Categorical([], categories=[True, False]) | ||||
|         categories = sorted(cat.categories.tolist()) | ||||
|         assert categories == [False, True] | ||||
|  | ||||
|     def test_constructor_tuples(self): | ||||
|         values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) | ||||
|         result = Categorical(values) | ||||
|         expected = Index([(1,), (1, 2)], tupleize_cols=False) | ||||
|         tm.assert_index_equal(result.categories, expected) | ||||
|         assert result.ordered is False | ||||
|  | ||||
|     def test_constructor_tuples_datetimes(self): | ||||
|         # numpy will auto reshape when all of the tuples are the | ||||
|         # same len, so add an extra one with 2 items and slice it off | ||||
|         values = np.array( | ||||
|             [ | ||||
|                 (Timestamp("2010-01-01"),), | ||||
|                 (Timestamp("2010-01-02"),), | ||||
|                 (Timestamp("2010-01-01"),), | ||||
|                 (Timestamp("2010-01-02"),), | ||||
|                 ("a", "b"), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         )[:-1] | ||||
|         result = Categorical(values) | ||||
|         expected = Index( | ||||
|             [(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)], | ||||
|             tupleize_cols=False, | ||||
|         ) | ||||
|         tm.assert_index_equal(result.categories, expected) | ||||
|  | ||||
|     def test_constructor_unsortable(self): | ||||
|         # it works! | ||||
|         arr = np.array([1, 2, 3, datetime.now()], dtype="O") | ||||
|         factor = Categorical(arr, ordered=False) | ||||
|         assert not factor.ordered | ||||
|  | ||||
|         # this however will raise as cannot be sorted | ||||
|         msg = ( | ||||
|             "'values' is not ordered, please explicitly specify the " | ||||
|             "categories order by passing in a categories argument." | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Categorical(arr, ordered=True) | ||||
|  | ||||
|     def test_constructor_interval(self): | ||||
|         result = Categorical( | ||||
|             [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True | ||||
|         ) | ||||
|         ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) | ||||
|         exp = Categorical(ii, ordered=True) | ||||
|         tm.assert_categorical_equal(result, exp) | ||||
|         tm.assert_index_equal(result.categories, ii) | ||||
|  | ||||
|     def test_constructor(self): | ||||
|         exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) | ||||
|         c1 = Categorical(exp_arr) | ||||
|         tm.assert_numpy_array_equal(c1.__array__(), exp_arr) | ||||
|         c2 = Categorical(exp_arr, categories=["a", "b", "c"]) | ||||
|         tm.assert_numpy_array_equal(c2.__array__(), exp_arr) | ||||
|         c2 = Categorical(exp_arr, categories=["c", "b", "a"]) | ||||
|         tm.assert_numpy_array_equal(c2.__array__(), exp_arr) | ||||
|  | ||||
|         # categories must be unique | ||||
|         msg = "Categorical categories must be unique" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([1, 2], [1, 2, 2]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ["a", "b", "b"]) | ||||
|  | ||||
|         # The default should be unordered | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         assert not c1.ordered | ||||
|  | ||||
|         # Categorical as input | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(c1) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(c1, categories=["a", "b", "c"]) | ||||
|         tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) | ||||
|         tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) | ||||
|  | ||||
|         # Series of dtype category | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(Series(c1)) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) | ||||
|         c2 = Categorical(Series(c1)) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         # Series | ||||
|         c1 = Categorical(["a", "b", "c", "a"]) | ||||
|         c2 = Categorical(Series(["a", "b", "c", "a"])) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) | ||||
|         c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         # This should result in integer categories, not float! | ||||
|         cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) | ||||
|         assert is_integer_dtype(cat.categories) | ||||
|  | ||||
|         # https://github.com/pandas-dev/pandas/issues/3678 | ||||
|         cat = Categorical([np.nan, 1, 2, 3]) | ||||
|         assert is_integer_dtype(cat.categories) | ||||
|  | ||||
|         # this should result in floats | ||||
|         cat = Categorical([np.nan, 1, 2.0, 3]) | ||||
|         assert is_float_dtype(cat.categories) | ||||
|  | ||||
|         cat = Categorical([np.nan, 1.0, 2.0, 3.0]) | ||||
|         assert is_float_dtype(cat.categories) | ||||
|  | ||||
|         # This doesn't work -> this would probably need some kind of "remember | ||||
|         # the original type" feature to try to cast the array interface result | ||||
|         # to... | ||||
|  | ||||
|         # vals = np.asarray(cat[cat.notna()]) | ||||
|         # assert is_integer_dtype(vals) | ||||
|  | ||||
|         # corner cases | ||||
|         cat = Categorical([1]) | ||||
|         assert len(cat.categories) == 1 | ||||
|         assert cat.categories[0] == 1 | ||||
|         assert len(cat.codes) == 1 | ||||
|         assert cat.codes[0] == 0 | ||||
|  | ||||
|         cat = Categorical(["a"]) | ||||
|         assert len(cat.categories) == 1 | ||||
|         assert cat.categories[0] == "a" | ||||
|         assert len(cat.codes) == 1 | ||||
|         assert cat.codes[0] == 0 | ||||
|  | ||||
|         # two arrays | ||||
|         #  - when the first is an integer dtype and the second is not | ||||
|         #  - when the resulting codes are all -1/NaN | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) | ||||
|  | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) | ||||
|  | ||||
|         # the next one are from the old docs | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) | ||||
|             cat = Categorical([1, 2], categories=[1, 2, 3]) | ||||
|  | ||||
|         # this is a legitimate constructor | ||||
|         with tm.assert_produces_warning(None): | ||||
|             Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True) | ||||
|  | ||||
|     def test_constructor_with_existing_categories(self): | ||||
|         # GH25318: constructing with pd.Series used to bogusly skip recoding | ||||
|         # categories | ||||
|         c0 = Categorical(["a", "b", "c", "a"]) | ||||
|         c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) | ||||
|  | ||||
|         c2 = Categorical(c0, categories=c1.categories) | ||||
|         tm.assert_categorical_equal(c1, c2) | ||||
|  | ||||
|         c3 = Categorical(Series(c0), categories=c1.categories) | ||||
|         tm.assert_categorical_equal(c1, c3) | ||||
|  | ||||
|     def test_constructor_not_sequence(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16022 | ||||
|         msg = r"^Parameter 'categories' must be list-like, was" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             Categorical(["a", "b"], categories="a") | ||||
|  | ||||
|     def test_constructor_with_null(self): | ||||
|         # Cannot have NaN in categories | ||||
|         msg = "Categorical categories cannot be null" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical( | ||||
|                 DatetimeIndex(["nat", "20160101"]), | ||||
|                 categories=[NaT, Timestamp("20160101")], | ||||
|             ) | ||||
|  | ||||
|     def test_constructor_with_index(self): | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         tm.assert_categorical_equal(ci.values, Categorical(ci)) | ||||
|  | ||||
|         ci = CategoricalIndex(list("aabbca"), categories=list("cab")) | ||||
|         tm.assert_categorical_equal( | ||||
|             ci.values, Categorical(ci.astype(object), categories=ci.categories) | ||||
|         ) | ||||
|  | ||||
|     def test_constructor_with_generator(self): | ||||
|         # This was raising an Error in isna(single_val).any() because isna | ||||
|         # returned a scalar for a generator | ||||
|  | ||||
|         exp = Categorical([0, 1, 2]) | ||||
|         cat = Categorical(x for x in [0, 1, 2]) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|         cat = Categorical(range(3)) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|         MultiIndex.from_product([range(5), ["a", "b", "c"]]) | ||||
|  | ||||
|         # check that categories accept generators and sequences | ||||
|         cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|         cat = Categorical([0, 1, 2], categories=range(3)) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|     def test_constructor_with_rangeindex(self): | ||||
|         # RangeIndex is preserved in Categories | ||||
|         rng = Index(range(3)) | ||||
|  | ||||
|         cat = Categorical(rng) | ||||
|         tm.assert_index_equal(cat.categories, rng, exact=True) | ||||
|  | ||||
|         cat = Categorical([1, 2, 0], categories=rng) | ||||
|         tm.assert_index_equal(cat.categories, rng, exact=True) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtl", | ||||
|         [ | ||||
|             date_range("1995-01-01 00:00:00", periods=5, freq="s"), | ||||
|             date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), | ||||
|             timedelta_range("1 day", periods=5, freq="s"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_with_datetimelike(self, dtl): | ||||
|         # see gh-12077 | ||||
|         # constructor with a datetimelike and NaT | ||||
|  | ||||
|         s = Series(dtl) | ||||
|         c = Categorical(s) | ||||
|  | ||||
|         expected = type(dtl)(s) | ||||
|         expected._data.freq = None | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|         tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) | ||||
|  | ||||
|         # with NaT | ||||
|         s2 = s.copy() | ||||
|         s2.iloc[-1] = NaT | ||||
|         c = Categorical(s2) | ||||
|  | ||||
|         expected = type(dtl)(s2.dropna()) | ||||
|         expected._data.freq = None | ||||
|  | ||||
|         tm.assert_index_equal(c.categories, expected) | ||||
|  | ||||
|         exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) | ||||
|         tm.assert_numpy_array_equal(c.codes, exp) | ||||
|  | ||||
|         result = repr(c) | ||||
|         assert "NaT" in result | ||||
|  | ||||
|     def test_constructor_from_index_series_datetimetz(self): | ||||
|         idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") | ||||
|         idx = idx._with_freq(None)  # freq not preserved in result.categories | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     def test_constructor_date_objects(self): | ||||
|         # we dont cast date objects to timestamps, matching Index constructor | ||||
|         v = date.today() | ||||
|  | ||||
|         cat = Categorical([v, v]) | ||||
|         assert cat.categories.dtype == object | ||||
|         assert type(cat.categories[0]) is date | ||||
|  | ||||
|     def test_constructor_from_index_series_timedelta(self): | ||||
|         idx = timedelta_range("1 days", freq="D", periods=3) | ||||
|         idx = idx._with_freq(None)  # freq not preserved in result.categories | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     def test_constructor_from_index_series_period(self): | ||||
|         idx = period_range("2015-01-01", freq="D", periods=3) | ||||
|         result = Categorical(idx) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|         result = Categorical(Series(idx)) | ||||
|         tm.assert_index_equal(result.categories, idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values", | ||||
|         [ | ||||
|             np.array([1.0, 1.2, 1.8, np.nan]), | ||||
|             np.array([1, 2, 3], dtype="int64"), | ||||
|             ["a", "b", "c", np.nan], | ||||
|             [pd.Period("2014-01"), pd.Period("2014-02"), NaT], | ||||
|             [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], | ||||
|             [ | ||||
|                 Timestamp("2014-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2014-01-02", tz="US/Eastern"), | ||||
|                 NaT, | ||||
|             ], | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_invariant(self, values): | ||||
|         # GH 14190 | ||||
|         c = Categorical(values) | ||||
|         c2 = Categorical(c) | ||||
|         tm.assert_categorical_equal(c, c2) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_constructor_with_dtype(self, ordered): | ||||
|         categories = ["b", "a", "c"] | ||||
|         dtype = CategoricalDtype(categories, ordered=ordered) | ||||
|         result = Categorical(["a", "b", "a", "c"], dtype=dtype) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "a", "c"], categories=categories, ordered=ordered | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|         assert result.ordered is ordered | ||||
|  | ||||
|     def test_constructor_dtype_and_others_raises(self): | ||||
|         dtype = CategoricalDtype(["a", "b"], ordered=True) | ||||
|         msg = "Cannot specify `categories` or `ordered` together with `dtype`." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ordered=True, dtype=dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical(["a", "b"], ordered=False, dtype=dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_constructor_str_category(self, categories, ordered): | ||||
|         result = Categorical( | ||||
|             ["a", "b"], categories=categories, ordered=ordered, dtype="category" | ||||
|         ) | ||||
|         expected = Categorical(["a", "b"], categories=categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_str_unknown(self): | ||||
|         with pytest.raises(ValueError, match="Unknown dtype"): | ||||
|             Categorical([1, 2], dtype="foo") | ||||
|  | ||||
|     @pytest.mark.xfail( | ||||
|         using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings" | ||||
|     ) | ||||
|     def test_constructor_np_strs(self): | ||||
|         # GH#31499 Hashtable.map_locations needs to work on np.str_ objects | ||||
|         cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) | ||||
|         assert all(isinstance(x, np.str_) for x in cat.categories) | ||||
|  | ||||
|     def test_constructor_from_categorical_with_dtype(self): | ||||
|         dtype = CategoricalDtype(["a", "b", "c"], ordered=True) | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         result = Categorical(values, dtype=dtype) | ||||
|         # We use dtype.categories, not values.categories | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "c"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_from_categorical_with_unknown_dtype(self): | ||||
|         dtype = CategoricalDtype(None, ordered=True) | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         result = Categorical(values, dtype=dtype) | ||||
|         # We use values.categories, not dtype.categories | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "d"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_from_categorical_string(self): | ||||
|         values = Categorical(["a", "b", "d"]) | ||||
|         # use categories, ordered | ||||
|         result = Categorical( | ||||
|             values, categories=["a", "b", "c"], ordered=True, dtype="category" | ||||
|         ) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "d"], categories=["a", "b", "c"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # No string | ||||
|         result = Categorical(values, categories=["a", "b", "c"], ordered=True) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_with_categorical_categories(self): | ||||
|         # GH17884 | ||||
|         expected = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||||
|  | ||||
|         result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) | ||||
|     def test_construction_with_null(self, klass, nulls_fixture): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31927 | ||||
|         values = klass(["a", nulls_fixture, "b"]) | ||||
|         result = Categorical(values) | ||||
|  | ||||
|         dtype = CategoricalDtype(["a", "b"]) | ||||
|         codes = [0, -1, 1] | ||||
|         expected = Categorical.from_codes(codes=codes, dtype=dtype) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("validate", [True, False]) | ||||
|     def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate): | ||||
|         # GH#39649 | ||||
|         cats = pd.array(range(5), dtype=any_numeric_ea_dtype) | ||||
|         codes = np.random.default_rng(2).integers(5, size=3) | ||||
|         dtype = CategoricalDtype(cats) | ||||
|         arr = Categorical.from_codes(codes, dtype=dtype, validate=validate) | ||||
|         assert arr.categories.dtype == cats.dtype | ||||
|         tm.assert_index_equal(arr.categories, Index(cats)) | ||||
|  | ||||
|     def test_from_codes_empty(self): | ||||
|         cat = ["a", "b", "c"] | ||||
|         result = Categorical.from_codes([], categories=cat) | ||||
|         expected = Categorical([], categories=cat) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("validate", [True, False]) | ||||
|     def test_from_codes_validate(self, validate): | ||||
|         # GH53122 | ||||
|         dtype = CategoricalDtype(["a", "b"]) | ||||
|         if validate: | ||||
|             with pytest.raises(ValueError, match="codes need to be between "): | ||||
|                 Categorical.from_codes([4, 5], dtype=dtype, validate=validate) | ||||
|         else: | ||||
|             # passes, though has incorrect codes, but that's the user responsibility | ||||
|             Categorical.from_codes([4, 5], dtype=dtype, validate=validate) | ||||
|  | ||||
|     def test_from_codes_too_few_categories(self): | ||||
|         dtype = CategoricalDtype(categories=[1, 2]) | ||||
|         msg = "codes need to be between " | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([1, 2], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([1, 2], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_non_int_codes(self): | ||||
|         dtype = CategoricalDtype(categories=[1, 2]) | ||||
|         msg = "codes need to be array-like integers" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(["a"], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(["a"], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_non_unique_categories(self): | ||||
|         with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||||
|             Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) | ||||
|  | ||||
|     def test_from_codes_nan_cat_included(self): | ||||
|         with pytest.raises(ValueError, match="Categorical categories cannot be null"): | ||||
|             Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) | ||||
|  | ||||
|     def test_from_codes_too_negative(self): | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         msg = r"codes need to be between -1 and len\(categories\)-1" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([-2, 1, 2], categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([-2, 1, 2], dtype=dtype) | ||||
|  | ||||
|     def test_from_codes(self): | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         exp = Categorical(["a", "b", "c"], ordered=False) | ||||
|         res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) | ||||
|         tm.assert_categorical_equal(exp, res) | ||||
|  | ||||
|         res = Categorical.from_codes([0, 1, 2], dtype=dtype) | ||||
|         tm.assert_categorical_equal(exp, res) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||||
|     def test_from_codes_with_categorical_categories(self, klass): | ||||
|         # GH17884 | ||||
|         expected = Categorical(["a", "b"], categories=["a", "b", "c"]) | ||||
|  | ||||
|         result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) | ||||
|     def test_from_codes_with_non_unique_categorical_categories(self, klass): | ||||
|         with pytest.raises(ValueError, match="Categorical categories must be unique"): | ||||
|             Categorical.from_codes([0, 1], klass(["a", "b", "a"])) | ||||
|  | ||||
|     def test_from_codes_with_nan_code(self): | ||||
|         # GH21767 | ||||
|         codes = [1, 2, np.nan] | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|         with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||||
|             Categorical.from_codes(codes, categories=dtype.categories) | ||||
|         with pytest.raises(ValueError, match="codes need to be array-like integers"): | ||||
|             Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) | ||||
|     def test_from_codes_with_float(self, codes): | ||||
|         # GH21767 | ||||
|         # float codes should raise even if values are equal to integers | ||||
|         dtype = CategoricalDtype(categories=["a", "b", "c"]) | ||||
|  | ||||
|         msg = "codes need to be array-like integers" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, dtype.categories) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, dtype=dtype) | ||||
|  | ||||
|     def test_from_codes_with_dtype_raises(self): | ||||
|         msg = "Cannot specify" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"]) | ||||
|             ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes( | ||||
|                 [0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"]) | ||||
|             ) | ||||
|  | ||||
|     def test_from_codes_neither(self): | ||||
|         msg = "Both were None" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes([0, 1]) | ||||
|  | ||||
|     def test_from_codes_with_nullable_int(self): | ||||
|         codes = pd.array([0, 1], dtype="Int64") | ||||
|         categories = ["a", "b"] | ||||
|  | ||||
|         result = Categorical.from_codes(codes, categories=categories) | ||||
|         expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_codes_with_nullable_int_na_raises(self): | ||||
|         codes = pd.array([0, None], dtype="Int64") | ||||
|         categories = ["a", "b"] | ||||
|  | ||||
|         msg = "codes cannot contain NA values" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Categorical.from_codes(codes, categories=categories) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [None, "category"]) | ||||
|     def test_from_inferred_categories(self, dtype): | ||||
|         cats = ["a", "b"] | ||||
|         codes = np.array([0, 0, 1, 1], dtype="i8") | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical.from_codes(codes, cats) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [None, "category"]) | ||||
|     def test_from_inferred_categories_sorts(self, dtype): | ||||
|         cats = ["b", "a"] | ||||
|         codes = np.array([0, 1, 1, 1], dtype="i8") | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_inferred_categories_dtype(self): | ||||
|         cats = ["a", "b", "d"] | ||||
|         codes = np.array([0, 1, 0, 2], dtype="i8") | ||||
|         dtype = CategoricalDtype(["c", "b", "a"], ordered=True) | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical( | ||||
|             ["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_inferred_categories_coerces(self): | ||||
|         cats = ["1", "2", "bad"] | ||||
|         codes = np.array([0, 0, 1, 2], dtype="i8") | ||||
|         dtype = CategoricalDtype([1, 2]) | ||||
|         result = Categorical._from_inferred_categories(cats, codes, dtype) | ||||
|         expected = Categorical([1, 1, 2, np.nan]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("ordered", [None, True, False]) | ||||
|     def test_construction_with_ordered(self, ordered): | ||||
|         # GH 9347, 9190 | ||||
|         cat = Categorical([0, 1, 2], ordered=ordered) | ||||
|         assert cat.ordered == bool(ordered) | ||||
|  | ||||
|     def test_constructor_imaginary(self): | ||||
|         values = [1, 2, 3 + 1j] | ||||
|         c1 = Categorical(values) | ||||
|         tm.assert_index_equal(c1.categories, Index(values)) | ||||
|         tm.assert_numpy_array_equal(np.array(c1), np.array(values)) | ||||
|  | ||||
|     def test_constructor_string_and_tuples(self): | ||||
|         # GH 21416 | ||||
|         c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) | ||||
|         expected_index = Index([("a", "b"), ("b", "a"), "c"]) | ||||
|         assert c.categories.equals(expected_index) | ||||
|  | ||||
|     def test_interval(self): | ||||
|         idx = pd.interval_range(0, 10, periods=10) | ||||
|         cat = Categorical(idx, categories=idx) | ||||
|         expected_codes = np.arange(10, dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # infer categories | ||||
|         cat = Categorical(idx) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # list values | ||||
|         cat = Categorical(list(idx)) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # list values, categories | ||||
|         cat = Categorical(list(idx), categories=list(idx)) | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # shuffled | ||||
|         values = idx.take([1, 2, 0]) | ||||
|         cat = Categorical(values, categories=idx) | ||||
|         tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8")) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # extra | ||||
|         values = pd.interval_range(8, 11, periods=3) | ||||
|         cat = Categorical(values, categories=idx) | ||||
|         expected_codes = np.array([8, 9, -1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|         # overlapping | ||||
|         idx = IntervalIndex([Interval(0, 2), Interval(0, 1)]) | ||||
|         cat = Categorical(idx, categories=idx) | ||||
|         expected_codes = np.array([0, 1], dtype="int8") | ||||
|         tm.assert_numpy_array_equal(cat.codes, expected_codes) | ||||
|         tm.assert_index_equal(cat.categories, idx) | ||||
|  | ||||
|     def test_categorical_extension_array_nullable(self, nulls_fixture): | ||||
|         # GH: | ||||
|         arr = pd.arrays.StringArray._from_sequence( | ||||
|             [nulls_fixture] * 2, dtype=pd.StringDtype() | ||||
|         ) | ||||
|         result = Categorical(arr) | ||||
|         assert arr.dtype == result.categories.dtype | ||||
|         expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype)) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_from_sequence_copy(self): | ||||
|         cat = Categorical(np.arange(5).repeat(2)) | ||||
|         result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False) | ||||
|  | ||||
|         # more generally, we'd be OK with a view | ||||
|         assert result._codes is cat._codes | ||||
|  | ||||
|         result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True) | ||||
|  | ||||
|         assert not tm.shares_memory(result, cat) | ||||
|  | ||||
|     def test_constructor_datetime64_non_nano(self): | ||||
|         categories = np.arange(10).view("M8[D]") | ||||
|         values = categories[::2].copy() | ||||
|  | ||||
|         cat = Categorical(values, categories=categories) | ||||
|         assert (cat == values).all() | ||||
|  | ||||
|     def test_constructor_preserves_freq(self): | ||||
|         # GH33830 freq retention in categorical | ||||
|         dti = date_range("2016-01-01", periods=5) | ||||
|  | ||||
|         expected = dti.freq | ||||
|  | ||||
|         cat = Categorical(dti) | ||||
|         result = cat.categories.freq | ||||
|  | ||||
|         assert expected == result | ||||
| @ -0,0 +1,139 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalDtypes: | ||||
|     def test_categories_match_up_to_permutation(self): | ||||
|         # test dtype comparisons between cats | ||||
|  | ||||
|         c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) | ||||
|         c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) | ||||
|         c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) | ||||
|         assert c1._categories_match_up_to_permutation(c1) | ||||
|         assert c2._categories_match_up_to_permutation(c2) | ||||
|         assert c3._categories_match_up_to_permutation(c3) | ||||
|         assert c1._categories_match_up_to_permutation(c2) | ||||
|         assert not c1._categories_match_up_to_permutation(c3) | ||||
|         assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) | ||||
|         assert not c1._categories_match_up_to_permutation(c1.astype(object)) | ||||
|         assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) | ||||
|         assert c1._categories_match_up_to_permutation( | ||||
|             CategoricalIndex(c1, categories=list("cab")) | ||||
|         ) | ||||
|         assert not c1._categories_match_up_to_permutation( | ||||
|             CategoricalIndex(c1, ordered=True) | ||||
|         ) | ||||
|  | ||||
|         # GH 16659 | ||||
|         s1 = Series(c1) | ||||
|         s2 = Series(c2) | ||||
|         s3 = Series(c3) | ||||
|         assert c1._categories_match_up_to_permutation(s1) | ||||
|         assert c2._categories_match_up_to_permutation(s2) | ||||
|         assert c3._categories_match_up_to_permutation(s3) | ||||
|         assert c1._categories_match_up_to_permutation(s2) | ||||
|         assert not c1._categories_match_up_to_permutation(s3) | ||||
|         assert not c1._categories_match_up_to_permutation(s1.astype(object)) | ||||
|  | ||||
|     def test_set_dtype_same(self): | ||||
|         c = Categorical(["a", "b", "c"]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) | ||||
|         tm.assert_categorical_equal(result, c) | ||||
|  | ||||
|     def test_set_dtype_new_categories(self): | ||||
|         c = Categorical(["a", "b", "c"]) | ||||
|         result = c._set_dtype(CategoricalDtype(list("abcd"))) | ||||
|         tm.assert_numpy_array_equal(result.codes, c.codes) | ||||
|         tm.assert_index_equal(result.dtype.categories, Index(list("abcd"))) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, categories, new_categories", | ||||
|         [ | ||||
|             # No NaNs, same cats, same order | ||||
|             (["a", "b", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["a", "b", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # Same, unsorted | ||||
|             (["b", "a", "a"], ["a", "b"], ["a", "b"]), | ||||
|             # No NaNs, same cats, different order | ||||
|             (["b", "a", "a"], ["a", "b"], ["b", "a"]), | ||||
|             # NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b", "a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a", "b"]), | ||||
|             # Introduce NaNs | ||||
|             (["a", "b", "c"], ["a", "b"], ["a"]), | ||||
|             (["a", "b", "c"], ["a", "b"], ["b"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             (["b", "a", "c"], ["a", "b"], ["a"]), | ||||
|             # No overlap | ||||
|             (["a", "b", "c"], ["a", "b"], ["d", "e"]), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_set_dtype_many(self, values, categories, new_categories, ordered): | ||||
|         c = Categorical(values, categories) | ||||
|         expected = Categorical(values, new_categories, ordered) | ||||
|         result = c._set_dtype(expected.dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_set_dtype_no_overlap(self): | ||||
|         c = Categorical(["a", "b", "c"], ["d", "e"]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "b"])) | ||||
|         expected = Categorical([None, None, None], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_codes_dtypes(self): | ||||
|         # GH 8453 | ||||
|         result = Categorical(["foo", "bar", "baz"]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|  | ||||
|         result = Categorical([f"foo{i:05d}" for i in range(400)]) | ||||
|         assert result.codes.dtype == "int16" | ||||
|  | ||||
|         result = Categorical([f"foo{i:05d}" for i in range(40000)]) | ||||
|         assert result.codes.dtype == "int32" | ||||
|  | ||||
|         # adding cats | ||||
|         result = Categorical(["foo", "bar", "baz"]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|         result = result.add_categories([f"foo{i:05d}" for i in range(400)]) | ||||
|         assert result.codes.dtype == "int16" | ||||
|  | ||||
|         # removing cats | ||||
|         result = result.remove_categories([f"foo{i:05d}" for i in range(300)]) | ||||
|         assert result.codes.dtype == "int8" | ||||
|  | ||||
|     def test_iter_python_types(self): | ||||
|         # GH-19909 | ||||
|         cat = Categorical([1, 2]) | ||||
|         assert isinstance(next(iter(cat)), int) | ||||
|         assert isinstance(cat.tolist()[0], int) | ||||
|  | ||||
|     def test_iter_python_types_datetime(self): | ||||
|         cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) | ||||
|         assert isinstance(next(iter(cat)), Timestamp) | ||||
|         assert isinstance(cat.tolist()[0], Timestamp) | ||||
|  | ||||
|     def test_interval_index_category(self): | ||||
|         # GH 38316 | ||||
|         index = IntervalIndex.from_breaks(np.arange(3, dtype="uint64")) | ||||
|  | ||||
|         result = CategoricalIndex(index).dtype.categories | ||||
|         expected = IntervalIndex.from_arrays( | ||||
|             [0, 1], [1, 2], dtype="interval[uint64, right]" | ||||
|         ) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,388 @@ | ||||
| import math | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     NaT, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| import pandas.core.common as com | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexingWithFactor: | ||||
|     def test_getitem(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         assert factor[0] == "a" | ||||
|         assert factor[-1] == "c" | ||||
|  | ||||
|         subf = factor[[0, 1, 2]] | ||||
|         tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8)) | ||||
|  | ||||
|         subf = factor[np.asarray(factor) == "c"] | ||||
|         tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8)) | ||||
|  | ||||
|     def test_setitem(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         # int/positional | ||||
|         c = factor.copy() | ||||
|         c[0] = "b" | ||||
|         assert c[0] == "b" | ||||
|         c[-1] = "a" | ||||
|         assert c[-1] == "a" | ||||
|  | ||||
|         # boolean | ||||
|         c = factor.copy() | ||||
|         indexer = np.zeros(len(c), dtype="bool") | ||||
|         indexer[0] = True | ||||
|         indexer[-1] = True | ||||
|         c[indexer] = "c" | ||||
|         expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|  | ||||
|         tm.assert_categorical_equal(c, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])], | ||||
|     ) | ||||
|     def test_setitem_same_but_unordered(self, other): | ||||
|         # GH-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"]) | ||||
|         mask = np.array([True, False]) | ||||
|         target[mask] = other[mask] | ||||
|         expected = Categorical(["b", "b"], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(target, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             Categorical(["b", "a"], categories=["b", "a", "c"]), | ||||
|             Categorical(["b", "a"], categories=["a", "b", "c"]), | ||||
|             Categorical(["a", "a"], categories=["a"]), | ||||
|             Categorical(["b", "b"], categories=["b"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_different_unordered_raises(self, other): | ||||
|         # GH-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"]) | ||||
|         mask = np.array([True, False]) | ||||
|         msg = "Cannot set a Categorical with another, without identical categories" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             target[mask] = other[mask] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             Categorical(["b", "a"]), | ||||
|             Categorical(["b", "a"], categories=["b", "a"], ordered=True), | ||||
|             Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_same_ordered_raises(self, other): | ||||
|         # Gh-24142 | ||||
|         target = Categorical(["a", "b"], categories=["a", "b"], ordered=True) | ||||
|         mask = np.array([True, False]) | ||||
|         msg = "Cannot set a Categorical with another, without identical categories" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             target[mask] = other[mask] | ||||
|  | ||||
|     def test_setitem_tuple(self): | ||||
|         # GH#20439 | ||||
|         cat = Categorical([(0, 1), (0, 2), (0, 1)]) | ||||
|  | ||||
|         # This should not raise | ||||
|         cat[1] = cat[0] | ||||
|         assert cat[1] == (0, 1) | ||||
|  | ||||
|     def test_setitem_listlike(self): | ||||
|         # GH#9469 | ||||
|         # properly coerce the input indexers | ||||
|  | ||||
|         cat = Categorical( | ||||
|             np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) | ||||
|         ).add_categories([-1000]) | ||||
|         indexer = np.array([100000]).astype(np.int64) | ||||
|         cat[indexer] = -1000 | ||||
|  | ||||
|         # we are asserting the code result here | ||||
|         # which maps to the -1000 category | ||||
|         result = cat.codes[np.array([100000]).astype(np.int64)] | ||||
|         tm.assert_numpy_array_equal(result, np.array([5], dtype="int8")) | ||||
|  | ||||
|  | ||||
| class TestCategoricalIndexing: | ||||
|     def test_getitem_slice(self): | ||||
|         cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) | ||||
|         sliced = cat[3] | ||||
|         assert sliced == "d" | ||||
|  | ||||
|         sliced = cat[3:5] | ||||
|         expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"]) | ||||
|         tm.assert_categorical_equal(sliced, expected) | ||||
|  | ||||
|     def test_getitem_listlike(self): | ||||
|         # GH 9469 | ||||
|         # properly coerce the input indexers | ||||
|  | ||||
|         c = Categorical( | ||||
|             np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) | ||||
|         ) | ||||
|         result = c.codes[np.array([100000]).astype(np.int64)] | ||||
|         expected = c[np.array([100000]).astype(np.int64)].codes | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_periodindex(self): | ||||
|         idx1 = PeriodIndex( | ||||
|             ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], | ||||
|             freq="M", | ||||
|         ) | ||||
|  | ||||
|         cat1 = Categorical(idx1) | ||||
|         str(cat1) | ||||
|         exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) | ||||
|         exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") | ||||
|         tm.assert_numpy_array_equal(cat1._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat1.categories, exp_idx) | ||||
|  | ||||
|         idx2 = PeriodIndex( | ||||
|             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], | ||||
|             freq="M", | ||||
|         ) | ||||
|         cat2 = Categorical(idx2, ordered=True) | ||||
|         str(cat2) | ||||
|         exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) | ||||
|         exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") | ||||
|         tm.assert_numpy_array_equal(cat2._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat2.categories, exp_idx2) | ||||
|  | ||||
|         idx3 = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-12", | ||||
|                 "2013-11", | ||||
|                 "2013-10", | ||||
|                 "2013-09", | ||||
|                 "2013-08", | ||||
|                 "2013-07", | ||||
|                 "2013-05", | ||||
|             ], | ||||
|             freq="M", | ||||
|         ) | ||||
|         cat3 = Categorical(idx3, ordered=True) | ||||
|         exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) | ||||
|         exp_idx = PeriodIndex( | ||||
|             [ | ||||
|                 "2013-05", | ||||
|                 "2013-07", | ||||
|                 "2013-08", | ||||
|                 "2013-09", | ||||
|                 "2013-10", | ||||
|                 "2013-11", | ||||
|                 "2013-12", | ||||
|             ], | ||||
|             freq="M", | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(cat3._codes, exp_arr) | ||||
|         tm.assert_index_equal(cat3.categories, exp_idx) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "null_val", | ||||
|         [None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"], | ||||
|     ) | ||||
|     def test_periodindex_on_null_types(self, null_val): | ||||
|         # GH 46673 | ||||
|         result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D") | ||||
|         expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]") | ||||
|         assert result[2] is NaT | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) | ||||
|     def test_categories_assignments_wrong_length_raises(self, new_categories): | ||||
|         cat = Categorical(["a", "b", "c", "a"]) | ||||
|         msg = ( | ||||
|             "new categories need to have the same number of items " | ||||
|             "as the old categories!" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cat.rename_categories(new_categories) | ||||
|  | ||||
|     # Combinations of sorted/unique: | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] | ||||
|     ) | ||||
|     # Combinations of missing/unique | ||||
|     @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) | ||||
|     @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) | ||||
|     @pytest.mark.parametrize("dtype", [None, "category", "key"]) | ||||
|     def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype): | ||||
|         # GH 21448 | ||||
|         key = key_class(key_values, categories=range(1, 5)) | ||||
|  | ||||
|         if dtype == "key": | ||||
|             dtype = key.dtype | ||||
|  | ||||
|         # Test for flat index and CategoricalIndex with same/different cats: | ||||
|         idx = Index(idx_values, dtype=dtype) | ||||
|         expected, exp_miss = idx.get_indexer_non_unique(key_values) | ||||
|         result, res_miss = idx.get_indexer_non_unique(key) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|         tm.assert_numpy_array_equal(exp_miss, res_miss) | ||||
|  | ||||
|         exp_unique = idx.unique().get_indexer(key_values) | ||||
|         res_unique = idx.unique().get_indexer(key) | ||||
|         tm.assert_numpy_array_equal(res_unique, exp_unique) | ||||
|  | ||||
|     def test_where_unobserved_nan(self): | ||||
|         ser = Series(Categorical(["a", "b"])) | ||||
|         result = ser.where([True, False]) | ||||
|         expected = Series(Categorical(["a", None], categories=["a", "b"])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # all NA | ||||
|         ser = Series(Categorical(["a", "b"])) | ||||
|         result = ser.where([False, False]) | ||||
|         expected = Series(Categorical([None, None], categories=["a", "b"])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_unobserved_categories(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) | ||||
|         result = ser.where([True, True, False], other="b") | ||||
|         expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_other_categorical(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) | ||||
|         other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"]) | ||||
|         result = ser.where([True, False, True], other) | ||||
|         expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_where_new_category_raises(self): | ||||
|         ser = Series(Categorical(["a", "b", "c"])) | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.where([True, False, True], "d") | ||||
|  | ||||
|     def test_where_ordered_differs_rasies(self): | ||||
|         ser = Series( | ||||
|             Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True) | ||||
|         ) | ||||
|         other = Categorical( | ||||
|             ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match="without identical categories"): | ||||
|             ser.where([True, False, True], other) | ||||
|  | ||||
|  | ||||
| class TestContains: | ||||
|     def test_contains(self): | ||||
|         # GH#21508 | ||||
|         cat = Categorical(list("aabbca"), categories=list("cab")) | ||||
|  | ||||
|         assert "b" in cat | ||||
|         assert "z" not in cat | ||||
|         assert np.nan not in cat | ||||
|         with pytest.raises(TypeError, match="unhashable type: 'list'"): | ||||
|             assert [1] in cat | ||||
|  | ||||
|         # assert codes NOT in index | ||||
|         assert 0 not in cat | ||||
|         assert 1 not in cat | ||||
|  | ||||
|         cat = Categorical(list("aabbca") + [np.nan], categories=list("cab")) | ||||
|         assert np.nan in cat | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "item, expected", | ||||
|         [ | ||||
|             (Interval(0, 1), True), | ||||
|             (1.5, True), | ||||
|             (Interval(0.5, 1.5), False), | ||||
|             ("a", False), | ||||
|             (Timestamp(1), False), | ||||
|             (Timedelta(1), False), | ||||
|         ], | ||||
|         ids=str, | ||||
|     ) | ||||
|     def test_contains_interval(self, item, expected): | ||||
|         # GH#23705 | ||||
|         cat = Categorical(IntervalIndex.from_breaks(range(3))) | ||||
|         result = item in cat | ||||
|         assert result is expected | ||||
|  | ||||
|     def test_contains_list(self): | ||||
|         # GH#21729 | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|  | ||||
|         assert "a" not in cat | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a"] in cat | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unhashable type"): | ||||
|             ["a", "b"] in cat | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [True, False]) | ||||
| def test_mask_with_boolean(index): | ||||
|     ser = Series(range(3)) | ||||
|     idx = Categorical([True, False, True]) | ||||
|     if index: | ||||
|         idx = CategoricalIndex(idx) | ||||
|  | ||||
|     assert com.is_bool_indexer(idx) | ||||
|     result = ser[idx] | ||||
|     expected = ser[idx.astype("object")] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [True, False]) | ||||
| def test_mask_with_boolean_na_treated_as_false(index): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31503 | ||||
|     ser = Series(range(3)) | ||||
|     idx = Categorical([True, False, None]) | ||||
|     if index: | ||||
|         idx = CategoricalIndex(idx) | ||||
|  | ||||
|     result = ser[idx] | ||||
|     expected = ser[idx.fillna(False)] | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def non_coercible_categorical(monkeypatch): | ||||
|     """ | ||||
|     Monkeypatch Categorical.__array__ to ensure no implicit conversion. | ||||
|  | ||||
|     Raises | ||||
|     ------ | ||||
|     ValueError | ||||
|         When Categorical.__array__ is called. | ||||
|     """ | ||||
|  | ||||
|     # TODO(Categorical): identify other places where this may be | ||||
|     # useful and move to a conftest.py | ||||
|     def array(self, dtype=None): | ||||
|         raise ValueError("I cannot be converted.") | ||||
|  | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(Categorical, "__array__", array) | ||||
|         yield | ||||
|  | ||||
|  | ||||
| def test_series_at(): | ||||
|     arr = Categorical(["a", "b", "c"]) | ||||
|     ser = Series(arr) | ||||
|     result = ser.at[0] | ||||
|     assert result == "a" | ||||
| @ -0,0 +1,154 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "ignore"]) | ||||
| def na_action(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, categories", | ||||
|     [ | ||||
|         (list("abcbca"), list("cab")), | ||||
|         (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), | ||||
|     ], | ||||
|     ids=["string", "interval"], | ||||
| ) | ||||
| def test_map_str(data, categories, ordered, na_action): | ||||
|     # GH 31202 - override base class since we want to maintain categorical/ordered | ||||
|     cat = Categorical(data, categories=categories, ordered=ordered) | ||||
|     result = cat.map(str, na_action=na_action) | ||||
|     expected = Categorical( | ||||
|         map(str, data), categories=map(str, categories), ordered=ordered | ||||
|     ) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(na_action): | ||||
|     cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) | ||||
|     result = cat.map(lambda x: x.lower(), na_action=na_action) | ||||
|     exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|     result = cat.map(lambda x: x.lower(), na_action=na_action) | ||||
|     exp = Categorical(list("ababc"), categories=list("bac"), ordered=False) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     # GH 12766: Return an index not an array | ||||
|     result = cat.map(lambda x: 1, na_action=na_action) | ||||
|     exp = Index(np.array([1] * 5, dtype=np.int64)) | ||||
|     tm.assert_index_equal(result, exp) | ||||
|  | ||||
|     # change categories dtype | ||||
|     cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False) | ||||
|  | ||||
|     def f(x): | ||||
|         return {"A": 10, "B": 20, "C": 30}.get(x) | ||||
|  | ||||
|     result = cat.map(f, na_action=na_action) | ||||
|     exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     mapper = Series([10, 20, 30], index=["A", "B", "C"]) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|     result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action) | ||||
|     tm.assert_categorical_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, True])), | ||||
|         ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             Categorical([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False] * 3), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_none(data, f, expected):  # GH 24241 | ||||
|     values = Categorical(data) | ||||
|     result = values.map(f, na_action=None) | ||||
|     if isinstance(expected, Categorical): | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("data", "f", "expected"), | ||||
|     ( | ||||
|         ([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], pd.isna, Index([False, False, np.nan])), | ||||
|         ([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])), | ||||
|         ([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])), | ||||
|         ( | ||||
|             [1, 1, np.nan], | ||||
|             Series([False, False]), | ||||
|             Categorical([False, False, np.nan]), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, np.nan], | ||||
|             Series([False, False, False]), | ||||
|             Index([False, False, np.nan]), | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_map_with_nan_ignore(data, f, expected):  # GH 24241 | ||||
|     values = Categorical(data) | ||||
|     result = values.map(f, na_action="ignore") | ||||
|     if data[1] == 1: | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|     else: | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_with_dict_or_series(na_action): | ||||
|     orig_values = ["a", "B", 1, "a"] | ||||
|     new_values = ["one", 2, 3.0, "one"] | ||||
|     cat = Categorical(orig_values) | ||||
|  | ||||
|     mapper = Series(new_values[:-1], index=orig_values[:-1]) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|  | ||||
|     # Order of categories in result can be different | ||||
|     expected = Categorical(new_values, categories=[3.0, 2, "one"]) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     mapper = dict(zip(orig_values[:-1], new_values[:-1])) | ||||
|     result = cat.map(mapper, na_action=na_action) | ||||
|     # Order of categories in result can be different | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_na_action_no_default_deprecated(): | ||||
|     # GH51645 | ||||
|     cat = Categorical(["a", "b", "c"]) | ||||
|     msg = ( | ||||
|         "The default value of 'ignore' for the `na_action` parameter in " | ||||
|         "pandas.Categorical.map is deprecated and will be " | ||||
|         "changed to 'None' in a future version. Please set na_action to the " | ||||
|         "desired value to avoid seeing this warning" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         cat.map(lambda x: x) | ||||
| @ -0,0 +1,216 @@ | ||||
| import collections | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import CategoricalDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalMissing: | ||||
|     def test_isna(self): | ||||
|         exp = np.array([False, False, True]) | ||||
|         cat = Categorical(["a", "b", np.nan]) | ||||
|         res = cat.isna() | ||||
|  | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_na_flags_int_categories(self): | ||||
|         # #1457 | ||||
|  | ||||
|         categories = list(range(10)) | ||||
|         labels = np.random.default_rng(2).integers(0, 10, 20) | ||||
|         labels[::5] = -1 | ||||
|  | ||||
|         cat = Categorical(labels, categories) | ||||
|         repr(cat) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(isna(cat), labels == -1) | ||||
|  | ||||
|     def test_nan_handling(self): | ||||
|         # Nans are represented as -1 in codes | ||||
|         c = Categorical(["a", "b", np.nan, "a"]) | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|         c[1] = np.nan | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|         # Adding nan to categories should make assigned nan point to the | ||||
|         # category! | ||||
|         c = Categorical(["a", "b", np.nan, "a"]) | ||||
|         tm.assert_index_equal(c.categories, Index(["a", "b"])) | ||||
|         tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) | ||||
|  | ||||
|     def test_set_dtype_nans(self): | ||||
|         c = Categorical(["a", "b", np.nan]) | ||||
|         result = c._set_dtype(CategoricalDtype(["a", "c"])) | ||||
|         tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8")) | ||||
|  | ||||
|     def test_set_item_nan(self): | ||||
|         cat = Categorical([1, 2, 3]) | ||||
|         cat[1] = np.nan | ||||
|  | ||||
|         exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) | ||||
|         tm.assert_categorical_equal(cat, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fillna_kwargs, msg", | ||||
|         [ | ||||
|             ( | ||||
|                 {"value": 1, "method": "ffill"}, | ||||
|                 "Cannot specify both 'value' and 'method'.", | ||||
|             ), | ||||
|             ({}, "Must specify a fill 'value' or 'method'."), | ||||
|             ({"method": "bad"}, "Invalid fill method. Expecting .* bad"), | ||||
|             ( | ||||
|                 {"value": Series([1, 2, 3, 4, "a"])}, | ||||
|                 "Cannot setitem on a Categorical with a new category", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fillna_raises(self, fillna_kwargs, msg): | ||||
|         # https://github.com/pandas-dev/pandas/issues/19682 | ||||
|         # https://github.com/pandas-dev/pandas/issues/13628 | ||||
|         cat = Categorical([1, 2, 3, None, None]) | ||||
|  | ||||
|         if len(fillna_kwargs) == 1 and "value" in fillna_kwargs: | ||||
|             err = TypeError | ||||
|         else: | ||||
|             err = ValueError | ||||
|  | ||||
|         with pytest.raises(err, match=msg): | ||||
|             cat.fillna(**fillna_kwargs) | ||||
|  | ||||
|     @pytest.mark.parametrize("named", [True, False]) | ||||
|     def test_fillna_iterable_category(self, named): | ||||
|         # https://github.com/pandas-dev/pandas/issues/21097 | ||||
|         if named: | ||||
|             Point = collections.namedtuple("Point", "x y") | ||||
|         else: | ||||
|             Point = lambda *args: args  # tuple | ||||
|         cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object)) | ||||
|         result = cat.fillna(Point(0, 0)) | ||||
|         expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) | ||||
|  | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # Case where the Point is not among our categories; we want ValueError, | ||||
|         #  not NotImplementedError GH#41914 | ||||
|         cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object)) | ||||
|         msg = "Cannot setitem on a Categorical with a new category" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat.fillna(Point(0, 0)) | ||||
|  | ||||
|     def test_fillna_array(self): | ||||
|         # accept Categorical or ndarray value if it holds appropriate values | ||||
|         cat = Categorical(["A", "B", "C", None, None]) | ||||
|  | ||||
|         other = cat.fillna("C") | ||||
|         result = cat.fillna(other) | ||||
|         tm.assert_categorical_equal(result, other) | ||||
|         assert isna(cat[-1])  # didn't modify original inplace | ||||
|  | ||||
|         other = np.array(["A", "B", "C", "B", "A"]) | ||||
|         result = cat.fillna(other) | ||||
|         expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|         assert isna(cat[-1])  # didn't modify original inplace | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, expected", | ||||
|         [ | ||||
|             ([1, 2, 3], np.array([False, False, False])), | ||||
|             ([1, 2, np.nan], np.array([False, False, True])), | ||||
|             ([1, 2, np.inf], np.array([False, False, True])), | ||||
|             ([1, 2, pd.NA], np.array([False, False, True])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_use_inf_as_na(self, values, expected): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33594 | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("mode.use_inf_as_na", True): | ||||
|                 cat = Categorical(values) | ||||
|                 result = cat.isna() | ||||
|                 tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|                 result = Series(cat).isna() | ||||
|                 expected = Series(expected) | ||||
|                 tm.assert_series_equal(result, expected) | ||||
|  | ||||
|                 result = DataFrame(cat).isna() | ||||
|                 expected = DataFrame(expected) | ||||
|                 tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, expected", | ||||
|         [ | ||||
|             ([1, 2, 3], np.array([False, False, False])), | ||||
|             ([1, 2, np.nan], np.array([False, False, True])), | ||||
|             ([1, 2, np.inf], np.array([False, False, True])), | ||||
|             ([1, 2, pd.NA], np.array([False, False, True])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_use_inf_as_na_outside_context(self, values, expected): | ||||
|         # https://github.com/pandas-dev/pandas/issues/33594 | ||||
|         # Using isna directly for Categorical will fail in general here | ||||
|         cat = Categorical(values) | ||||
|  | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("mode.use_inf_as_na", True): | ||||
|                 result = isna(cat) | ||||
|                 tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|                 result = isna(Series(cat)) | ||||
|                 expected = Series(expected) | ||||
|                 tm.assert_series_equal(result, expected) | ||||
|  | ||||
|                 result = isna(DataFrame(cat)) | ||||
|                 expected = DataFrame(expected) | ||||
|                 tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "a1, a2, categories", | ||||
|         [ | ||||
|             (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]), | ||||
|             ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_compare_categorical_with_missing(self, a1, a2, categories): | ||||
|         # GH 28384 | ||||
|         cat_type = CategoricalDtype(categories) | ||||
|  | ||||
|         # != | ||||
|         result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type) | ||||
|         expected = Series(a1) != Series(a2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # == | ||||
|         result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type) | ||||
|         expected = Series(a1) == Series(a2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "na_value, dtype", | ||||
|         [ | ||||
|             (pd.NaT, "datetime64[ns]"), | ||||
|             (None, "float64"), | ||||
|             (np.nan, "float64"), | ||||
|             (pd.NA, "float64"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_categorical_only_missing_values_no_cast(self, na_value, dtype): | ||||
|         # GH#44900 | ||||
|         result = Categorical([na_value, na_value]) | ||||
|         tm.assert_index_equal(result.categories, Index([], dtype=dtype)) | ||||
| @ -0,0 +1,414 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalOpsWithFactor: | ||||
|     def test_categories_none_comparisons(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         tm.assert_categorical_equal(factor, factor) | ||||
|  | ||||
|     def test_comparisons(self): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         result = factor[factor == "a"] | ||||
|         expected = factor[np.asarray(factor) == "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor != "a"] | ||||
|         expected = factor[np.asarray(factor) != "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor < "c"] | ||||
|         expected = factor[np.asarray(factor) < "c"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor > "a"] | ||||
|         expected = factor[np.asarray(factor) > "a"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor >= "b"] | ||||
|         expected = factor[np.asarray(factor) >= "b"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         result = factor[factor <= "b"] | ||||
|         expected = factor[np.asarray(factor) <= "b"] | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         n = len(factor) | ||||
|  | ||||
|         other = factor[np.random.default_rng(2).permutation(n)] | ||||
|         result = factor == other | ||||
|         expected = np.asarray(factor) == np.asarray(other) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = factor == "d" | ||||
|         expected = np.zeros(len(factor), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # comparisons with categoricals | ||||
|         cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True) | ||||
|         cat_rev_base = Categorical( | ||||
|             ["b", "b", "b"], categories=["c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         cat = Categorical(["a", "b", "c"], ordered=True) | ||||
|         cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True) | ||||
|  | ||||
|         # comparisons need to take categories ordering into account | ||||
|         res_rev = cat_rev > cat_rev_base | ||||
|         exp_rev = np.array([True, False, False]) | ||||
|         tm.assert_numpy_array_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res_rev = cat_rev < cat_rev_base | ||||
|         exp_rev = np.array([False, False, True]) | ||||
|         tm.assert_numpy_array_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res = cat > cat_base | ||||
|         exp = np.array([False, False, True]) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         # Only categories with same categories can be compared | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_rev | ||||
|  | ||||
|         cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"]) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > cat_rev_base2 | ||||
|  | ||||
|         # Only categories with same ordering information can be compared | ||||
|         cat_unordered = cat.set_ordered(False) | ||||
|         assert not (cat > cat).any() | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_unordered | ||||
|  | ||||
|         # comparison (in both directions) with Series will raise | ||||
|         s = Series(["b", "b", "b"], dtype=object) | ||||
|         msg = ( | ||||
|             "Cannot compare a Categorical for op __gt__ with type " | ||||
|             r"<class 'numpy\.ndarray'>" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat_rev | ||||
|  | ||||
|         # comparison with numpy.array will raise in both direction, but only on | ||||
|         # newer numpy versions | ||||
|         a = np.array(["b", "b", "b"], dtype=object) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > a | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > a | ||||
|  | ||||
|         # Make sure that unequal comparison take the categories order in | ||||
|         # account | ||||
|         cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True) | ||||
|         exp = np.array([True, False, False]) | ||||
|         res = cat_rev > "b" | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         # check that zero-dim array gets unboxed | ||||
|         res = cat_rev > np.array("b") | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|  | ||||
| class TestCategoricalOps: | ||||
|     @pytest.mark.parametrize( | ||||
|         "categories", | ||||
|         [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], | ||||
|     ) | ||||
|     def test_not_equal_with_na(self, categories): | ||||
|         # https://github.com/pandas-dev/pandas/issues/32276 | ||||
|         c1 = Categorical.from_codes([-1, 0], categories=categories) | ||||
|         c2 = Categorical.from_codes([0, 1], categories=categories) | ||||
|  | ||||
|         result = c1 != c2 | ||||
|  | ||||
|         assert result.all() | ||||
|  | ||||
|     def test_compare_frame(self): | ||||
|         # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame | ||||
|         data = ["a", "b", 2, "a"] | ||||
|         cat = Categorical(data) | ||||
|  | ||||
|         df = DataFrame(cat) | ||||
|  | ||||
|         result = cat == df.T | ||||
|         expected = DataFrame([[True, True, True, True]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = cat[::-1] != df.T | ||||
|         expected = DataFrame([[False, True, True, False]]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_compare_frame_raises(self, comparison_op): | ||||
|         # alignment raises unless we transpose | ||||
|         op = comparison_op | ||||
|         cat = Categorical(["a", "b", 2, "a"]) | ||||
|         df = DataFrame(cat) | ||||
|         msg = "Unable to coerce to Series, length must be 1: given 4" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             op(cat, df) | ||||
|  | ||||
|     def test_datetime_categorical_comparison(self): | ||||
|         dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True) | ||||
|         tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) | ||||
|         tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True])) | ||||
|  | ||||
|     def test_reflected_comparison_with_scalars(self): | ||||
|         # GH8658 | ||||
|         cat = Categorical([1, 2, 3], ordered=True) | ||||
|         tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) | ||||
|         tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True])) | ||||
|  | ||||
|     def test_comparison_with_unknown_scalars(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 | ||||
|         # and following comparisons with scalars not in categories should raise | ||||
|         # for unequal comps, but not for equal/not equal | ||||
|         cat = Categorical([1, 2, 3], ordered=True) | ||||
|  | ||||
|         msg = "Invalid comparison between dtype=category and int" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat < 4 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > 4 | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             4 < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             4 > cat | ||||
|  | ||||
|         tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False])) | ||||
|         tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) | ||||
|  | ||||
|     def test_comparison_with_tuple(self): | ||||
|         cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object)) | ||||
|  | ||||
|         result = cat == "foo" | ||||
|         expected = np.array([True, False, False, False], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat == (0, 1) | ||||
|         expected = np.array([False, True, False, True], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = cat != (0, 1) | ||||
|         tm.assert_numpy_array_equal(result, ~expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore::RuntimeWarning") | ||||
|     def test_comparison_of_ordered_categorical_with_nan_to_scalar( | ||||
|         self, compare_operators_no_eq_ne | ||||
|     ): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26504 | ||||
|         # BUG: fix ordered categorical comparison with missing values (#26504 ) | ||||
|         # and following comparisons with scalars in categories with missing | ||||
|         # values should be evaluated as False | ||||
|  | ||||
|         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) | ||||
|         scalar = 2 | ||||
|         expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar) | ||||
|         actual = getattr(cat, compare_operators_no_eq_ne)(scalar) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore::RuntimeWarning") | ||||
|     def test_comparison_of_ordered_categorical_with_nan_to_listlike( | ||||
|         self, compare_operators_no_eq_ne | ||||
|     ): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26504 | ||||
|         # and following comparisons of missing values in ordered Categorical | ||||
|         # with listlike should be evaluated as False | ||||
|  | ||||
|         cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) | ||||
|         other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) | ||||
|         expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) | ||||
|         actual = getattr(cat, compare_operators_no_eq_ne)(other) | ||||
|         tm.assert_numpy_array_equal(actual, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,reverse,base", | ||||
|         [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])], | ||||
|     ) | ||||
|     def test_comparisons(self, data, reverse, base): | ||||
|         cat_rev = Series(Categorical(data, categories=reverse, ordered=True)) | ||||
|         cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True)) | ||||
|         cat = Series(Categorical(data, ordered=True)) | ||||
|         cat_base = Series( | ||||
|             Categorical(base, categories=cat.cat.categories, ordered=True) | ||||
|         ) | ||||
|         s = Series(base, dtype=object if base == list("bbb") else None) | ||||
|         a = np.array(base) | ||||
|  | ||||
|         # comparisons need to take categories ordering into account | ||||
|         res_rev = cat_rev > cat_rev_base | ||||
|         exp_rev = Series([True, False, False]) | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res_rev = cat_rev < cat_rev_base | ||||
|         exp_rev = Series([False, False, True]) | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|  | ||||
|         res = cat > cat_base | ||||
|         exp = Series([False, False, True]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         scalar = base[1] | ||||
|         res = cat > scalar | ||||
|         exp = Series([False, False, True]) | ||||
|         exp2 = cat.values > scalar | ||||
|         tm.assert_series_equal(res, exp) | ||||
|         tm.assert_numpy_array_equal(res.values, exp2) | ||||
|         res_rev = cat_rev > scalar | ||||
|         exp_rev = Series([True, False, False]) | ||||
|         exp_rev2 = cat_rev.values > scalar | ||||
|         tm.assert_series_equal(res_rev, exp_rev) | ||||
|         tm.assert_numpy_array_equal(res_rev.values, exp_rev2) | ||||
|  | ||||
|         # Only categories with same categories can be compared | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > cat_rev | ||||
|  | ||||
|         # categorical cannot be compared to Series or numpy array, and also | ||||
|         # not the other way around | ||||
|         msg = ( | ||||
|             "Cannot compare a Categorical for op __gt__ with type " | ||||
|             r"<class 'numpy\.ndarray'>" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > s | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat > a | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             cat_rev > a | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s < cat_rev | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a < cat | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a < cat_rev | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ctor", | ||||
|         [ | ||||
|             lambda *args, **kwargs: Categorical(*args, **kwargs), | ||||
|             lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), | ||||
|         ], | ||||
|     ) | ||||
|     def test_unordered_different_order_equal(self, ctor): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16014 | ||||
|         c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 == c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 != c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) | ||||
|         assert (c1 != c2).all() | ||||
|  | ||||
|         c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) | ||||
|         c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) | ||||
|         result = c1 == c2 | ||||
|         tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) | ||||
|  | ||||
|     def test_unordered_different_categories_raises(self): | ||||
|         c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False) | ||||
|         c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=("Categoricals can only be compared")): | ||||
|             c1 == c2 | ||||
|  | ||||
|     def test_compare_different_lengths(self): | ||||
|         c1 = Categorical([], categories=["a", "b"]) | ||||
|         c2 = Categorical([], categories=["a"]) | ||||
|  | ||||
|         msg = "Categoricals can only be compared if 'categories' are the same." | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             c1 == c2 | ||||
|  | ||||
|     def test_compare_unordered_different_order(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- | ||||
|         # 349290078 | ||||
|         a = Categorical(["a"], categories=["a", "b"]) | ||||
|         b = Categorical(["b"], categories=["b", "a"]) | ||||
|         assert not a.equals(b) | ||||
|  | ||||
|     def test_numeric_like_ops(self): | ||||
|         df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)}) | ||||
|         labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] | ||||
|         cat_labels = Categorical(labels, labels) | ||||
|  | ||||
|         df = df.sort_values(by=["value"], ascending=True) | ||||
|         df["value_group"] = pd.cut( | ||||
|             df.value, range(0, 10500, 500), right=False, labels=cat_labels | ||||
|         ) | ||||
|  | ||||
|         # numeric ops should not succeed | ||||
|         for op, str_rep in [ | ||||
|             ("__add__", r"\+"), | ||||
|             ("__sub__", "-"), | ||||
|             ("__mul__", r"\*"), | ||||
|             ("__truediv__", "/"), | ||||
|         ]: | ||||
|             msg = f"Series cannot perform the operation {str_rep}|unsupported operand" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(df, op)(df) | ||||
|  | ||||
|         # reduction ops should not succeed (unless specifically defined, e.g. | ||||
|         # min/max) | ||||
|         s = df["value_group"] | ||||
|         for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: | ||||
|             msg = f"does not support reduction '{op}'" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 getattr(s, op)(numeric_only=False) | ||||
|  | ||||
|     def test_numeric_like_ops_series(self): | ||||
|         # numpy ops | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         with pytest.raises(TypeError, match="does not support reduction 'sum'"): | ||||
|             np.sum(s) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "op, str_rep", | ||||
|         [ | ||||
|             ("__add__", r"\+"), | ||||
|             ("__sub__", "-"), | ||||
|             ("__mul__", r"\*"), | ||||
|             ("__truediv__", "/"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numeric_like_ops_series_arith(self, op, str_rep): | ||||
|         # numeric ops on a Series | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         msg = f"Series cannot perform the operation {str_rep}|unsupported operand" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(s, op)(2) | ||||
|  | ||||
|     def test_numeric_like_ops_series_invalid(self): | ||||
|         # invalid ufunc | ||||
|         s = Series(Categorical([1, 2, 3, 4])) | ||||
|         msg = "Object with dtype category cannot perform the numpy op log" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             np.log(s) | ||||
| @ -0,0 +1,111 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_replace,value,expected,flip_categories", | ||||
|     [ | ||||
|         # one-to-one | ||||
|         (1, 2, [2, 2, 3], False), | ||||
|         (1, 4, [4, 2, 3], False), | ||||
|         (4, 1, [1, 2, 3], False), | ||||
|         (5, 6, [1, 2, 3], False), | ||||
|         # many-to-one | ||||
|         ([1], 2, [2, 2, 3], False), | ||||
|         ([1, 2], 3, [3, 3, 3], False), | ||||
|         ([1, 2], 4, [4, 4, 3], False), | ||||
|         ((1, 2, 4), 5, [5, 5, 3], False), | ||||
|         ((5, 6), 2, [1, 2, 3], False), | ||||
|         ([1], [2], [2, 2, 3], False), | ||||
|         ([1, 4], [5, 2], [5, 2, 3], False), | ||||
|         # GH49404: overlap between to_replace and value | ||||
|         ([1, 2, 3], [2, 3, 4], [2, 3, 4], False), | ||||
|         # GH50872, GH46884: replace with null | ||||
|         (1, None, [None, 2, 3], False), | ||||
|         (1, pd.NA, [None, 2, 3], False), | ||||
|         # check_categorical sorts categories, which crashes on mixed dtypes | ||||
|         (3, "4", [1, 2, "4"], False), | ||||
|         ([1, 2, "3"], "5", ["5", "5", 3], True), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:.*with CategoricalDtype is deprecated:FutureWarning" | ||||
| ) | ||||
| def test_replace_categorical_series(to_replace, value, expected, flip_categories): | ||||
|     # GH 31720 | ||||
|  | ||||
|     ser = pd.Series([1, 2, 3], dtype="category") | ||||
|     result = ser.replace(to_replace, value) | ||||
|     expected = pd.Series(expected, dtype="category") | ||||
|     ser.replace(to_replace, value, inplace=True) | ||||
|  | ||||
|     if flip_categories: | ||||
|         expected = expected.cat.set_categories(expected.cat.categories[::-1]) | ||||
|  | ||||
|     tm.assert_series_equal(expected, result, check_category_order=False) | ||||
|     tm.assert_series_equal(expected, ser, check_category_order=False) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_replace, value, result, expected_error_msg", | ||||
|     [ | ||||
|         ("b", "c", ["a", "c"], "Categorical.categories are different"), | ||||
|         ("c", "d", ["a", "b"], None), | ||||
|         # https://github.com/pandas-dev/pandas/issues/33288 | ||||
|         ("a", "a", ["a", "b"], None), | ||||
|         ("b", None, ["a", None], "Categorical.categories length are different"), | ||||
|     ], | ||||
| ) | ||||
| def test_replace_categorical(to_replace, value, result, expected_error_msg): | ||||
|     # GH#26988 | ||||
|     cat = Categorical(["a", "b"]) | ||||
|     expected = Categorical(result) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     warn = FutureWarning if expected_error_msg is not None else None | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = pd.Series(cat, copy=False).replace(to_replace, value)._values | ||||
|  | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|     if to_replace == "b":  # the "c" test is supposed to be unchanged | ||||
|         with pytest.raises(AssertionError, match=expected_error_msg): | ||||
|             # ensure non-inplace call does not affect original | ||||
|             tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|     ser = pd.Series(cat, copy=False) | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         ser.replace(to_replace, value, inplace=True) | ||||
|     tm.assert_categorical_equal(cat, expected) | ||||
|  | ||||
|  | ||||
| def test_replace_categorical_ea_dtype(): | ||||
|     # GH49404 | ||||
|     cat = Categorical(pd.array(["a", "b"], dtype="string")) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values | ||||
|     expected = Categorical(pd.array(["c", pd.NA], dtype="string")) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_replace_maintain_ordering(): | ||||
|     # GH51016 | ||||
|     dtype = pd.CategoricalDtype([0, 1, 2], ordered=True) | ||||
|     ser = pd.Series([0, 1, 2], dtype=dtype) | ||||
|     msg = ( | ||||
|         r"The behavior of Series\.replace \(and DataFrame.replace\) " | ||||
|         "with CategoricalDtype" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = ser.replace(0, 2) | ||||
|     expected_dtype = pd.CategoricalDtype([1, 2], ordered=True) | ||||
|     expected = pd.Series([2, 1, 2], dtype=expected_dtype) | ||||
|     tm.assert_series_equal(expected, result, check_category_order=True) | ||||
| @ -0,0 +1,545 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Series, | ||||
|     date_range, | ||||
|     option_context, | ||||
|     period_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestCategoricalReprWithFactor: | ||||
|     def test_print(self, using_infer_string): | ||||
|         factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) | ||||
|         dtype = "str" if using_infer_string else "object" | ||||
|         expected = [ | ||||
|             "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", | ||||
|             f"Categories (3, {dtype}): ['a' < 'b' < 'c']", | ||||
|         ] | ||||
|         expected = "\n".join(expected) | ||||
|         actual = repr(factor) | ||||
|         assert actual == expected | ||||
|  | ||||
|  | ||||
| class TestCategoricalRepr: | ||||
|     def test_big_print(self): | ||||
|         codes = np.array([0, 1, 2, 0, 1, 2] * 100) | ||||
|         dtype = CategoricalDtype(categories=Index(["a", "b", "c"], dtype=object)) | ||||
|         factor = Categorical.from_codes(codes, dtype=dtype) | ||||
|         expected = [ | ||||
|             "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", | ||||
|             "Length: 600", | ||||
|             "Categories (3, object): ['a', 'b', 'c']", | ||||
|         ] | ||||
|         expected = "\n".join(expected) | ||||
|  | ||||
|         actual = repr(factor) | ||||
|  | ||||
|         assert actual == expected | ||||
|  | ||||
|     def test_empty_print(self): | ||||
|         factor = Categorical([], Index(["a", "b", "c"], dtype=object)) | ||||
|         expected = "[], Categories (3, object): ['a', 'b', 'c']" | ||||
|         actual = repr(factor) | ||||
|         assert actual == expected | ||||
|  | ||||
|         assert expected == actual | ||||
|         factor = Categorical([], Index(["a", "b", "c"], dtype=object), ordered=True) | ||||
|         expected = "[], Categories (3, object): ['a' < 'b' < 'c']" | ||||
|         actual = repr(factor) | ||||
|         assert expected == actual | ||||
|  | ||||
|         factor = Categorical([], []) | ||||
|         expected = "[], Categories (0, object): []" | ||||
|         assert expected == repr(factor) | ||||
|  | ||||
|     def test_print_none_width(self): | ||||
|         # GH10087 | ||||
|         a = Series(Categorical([1, 2, 3, 4])) | ||||
|         exp = ( | ||||
|             "0    1\n1    2\n2    3\n3    4\n" | ||||
|             "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" | ||||
|         ) | ||||
|  | ||||
|         with option_context("display.width", None): | ||||
|             assert exp == repr(a) | ||||
|  | ||||
|     @pytest.mark.skipif( | ||||
|         using_string_dtype(), | ||||
|         reason="Change once infer_string is set to True by default", | ||||
|     ) | ||||
|     def test_unicode_print(self): | ||||
|         c = Categorical(["aaaaa", "bb", "cccc"] * 20) | ||||
|         expected = """\ | ||||
| ['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['aaaaa', 'bb', 'cccc']""" | ||||
|  | ||||
|         assert repr(c) == expected | ||||
|  | ||||
|         c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) | ||||
|         expected = """\ | ||||
| ['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == expected | ||||
|  | ||||
|         # unicode option should not affect to Categorical, as it doesn't care | ||||
|         # the repr width | ||||
|         with option_context("display.unicode.east_asian_width", True): | ||||
|             c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) | ||||
|             expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] | ||||
| Length: 60 | ||||
| Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa: E501 | ||||
|  | ||||
|             assert repr(c) == expected | ||||
|  | ||||
|     def test_categorical_repr(self): | ||||
|         c = Categorical([1, 2, 3]) | ||||
|         exp = """[1, 2, 3] | ||||
| Categories (3, int64): [1, 2, 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) | ||||
|         exp = """[1, 2, 3, 1, 2, 3] | ||||
| Categories (3, int64): [1, 2, 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 4, 5] * 10) | ||||
|         exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] | ||||
| Length: 50 | ||||
| Categories (5, int64): [1, 2, 3, 4, 5]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(np.arange(20, dtype=np.int64)) | ||||
|         exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] | ||||
| Length: 20 | ||||
| Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_ordered(self): | ||||
|         c = Categorical([1, 2, 3], ordered=True) | ||||
|         exp = """[1, 2, 3] | ||||
| Categories (3, int64): [1 < 2 < 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) | ||||
|         exp = """[1, 2, 3, 1, 2, 3] | ||||
| Categories (3, int64): [1 < 2 < 3]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) | ||||
|         exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] | ||||
| Length: 50 | ||||
| Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(np.arange(20, dtype=np.int64), ordered=True) | ||||
|         exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] | ||||
| Length: 20 | ||||
| Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_datetime(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx) | ||||
|  | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " | ||||
|             "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" | ||||
|             "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" | ||||
|             "                                 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]" | ||||
|             "" | ||||
|         ) | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " | ||||
|             "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]\n" | ||||
|             "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " | ||||
|             "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" | ||||
|             "                                 2011-01-01 12:00:00, " | ||||
|             "2011-01-01 13:00:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         c = Categorical(idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " | ||||
|             "2011-01-01 13:00:00-05:00]\n" | ||||
|             "Categories (5, datetime64[ns, US/Eastern]): " | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 13:00:00-05:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = ( | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " | ||||
|             "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " | ||||
|             "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " | ||||
|             "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" | ||||
|             "Categories (5, datetime64[ns, US/Eastern]): " | ||||
|             "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" | ||||
|             "                                             " | ||||
|             "2011-01-01 13:00:00-05:00]" | ||||
|         ) | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_datetime_ordered(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] | ||||
| Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < | ||||
|                                  2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] | ||||
| Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < | ||||
|                                  2011-01-01 12:00:00 < 2011-01-01 13:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] | ||||
| Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < | ||||
|                                              2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < | ||||
|                                              2011-01-01 13:00:00-05:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] | ||||
| Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < | ||||
|                                              2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < | ||||
|                                              2011-01-01 13:00:00-05:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_int_with_nan(self): | ||||
|         c = Categorical([1, 2, np.nan]) | ||||
|         c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]""" | ||||
|         assert repr(c) == c_exp | ||||
|  | ||||
|         s = Series([1, 2, np.nan], dtype="object").astype("category") | ||||
|         s_exp = """0      1\n1      2\n2    NaN | ||||
| dtype: category | ||||
| Categories (2, int64): [1, 2]""" | ||||
|         assert repr(s) == s_exp | ||||
|  | ||||
|     def test_categorical_repr_period(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_period_ordered(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] | ||||
| Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < | ||||
|                             2011-01-01 13:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] | ||||
| Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_timedelta(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=20) | ||||
|         c = Categorical(idx) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 20 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, | ||||
|                                    3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, | ||||
|                                    18 days 01:00:00, 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 40 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, | ||||
|                                    3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, | ||||
|                                    18 days 01:00:00, 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_repr_timedelta_ordered(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] | ||||
| Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=20) | ||||
|         c = Categorical(idx, ordered=True) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 20 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < | ||||
|                                    3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < | ||||
|                                    18 days 01:00:00 < 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|         c = Categorical(idx.append(idx), categories=idx, ordered=True) | ||||
|         exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] | ||||
| Length: 40 | ||||
| Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < | ||||
|                                    3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < | ||||
|                                    18 days 01:00:00 < 19 days 01:00:00]"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(c) == exp | ||||
|  | ||||
|     def test_categorical_index_repr(self): | ||||
|         idx = CategoricalIndex(Categorical([1, 2, 3])) | ||||
|         exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(idx) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64))) | ||||
|         exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_ordered(self): | ||||
|         i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) | ||||
|         exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True)) | ||||
|         exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_datetime(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', | ||||
|                   '2011-01-01 11:00:00', '2011-01-01 12:00:00', | ||||
|                   '2011-01-01 13:00:00'], | ||||
|                  categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_datetime_ordered(self): | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', | ||||
|                   '2011-01-01 11:00:00', '2011-01-01 12:00:00', | ||||
|                   '2011-01-01 13:00:00'], | ||||
|                  categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern") | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', | ||||
|                   '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', | ||||
|                   '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', | ||||
|                   '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', | ||||
|                   '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], | ||||
|                  categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_period(self): | ||||
|         # test all length | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=1) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=2) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=3) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         i = CategoricalIndex(Categorical(idx.append(idx))) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', | ||||
|                   '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', | ||||
|                   '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_period_ordered(self): | ||||
|         idx = period_range("2011-01-01 09:00", freq="h", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', | ||||
|                   '2011-01-01 12:00', '2011-01-01 13:00'], | ||||
|                  categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = period_range("2011-01", freq="M", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_timedelta(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=10) | ||||
|         i = CategoricalIndex(Categorical(idx)) | ||||
|         exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', | ||||
|                   '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', | ||||
|                   '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', | ||||
|                   '9 days 01:00:00'], | ||||
|                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=False, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_index_repr_timedelta_ordered(self): | ||||
|         idx = timedelta_range("1 days", periods=5) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|         idx = timedelta_range("1 hours", periods=10) | ||||
|         i = CategoricalIndex(Categorical(idx, ordered=True)) | ||||
|         exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', | ||||
|                   '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', | ||||
|                   '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', | ||||
|                   '9 days 01:00:00'], | ||||
|                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=True, dtype='category')"""  # noqa: E501 | ||||
|  | ||||
|         assert repr(i) == exp | ||||
|  | ||||
|     def test_categorical_str_repr(self): | ||||
|         # GH 33676 | ||||
|         result = repr(Categorical([1, "2", 3, 4])) | ||||
|         expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" | ||||
|         assert result == expected | ||||
| @ -0,0 +1,128 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalSort: | ||||
|     def test_argsort(self): | ||||
|         c = Categorical([5, 3, 1, 4, 2], ordered=True) | ||||
|  | ||||
|         expected = np.array([2, 4, 1, 3, 0]) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             c.argsort(ascending=True), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|         expected = expected[::-1] | ||||
|         tm.assert_numpy_array_equal( | ||||
|             c.argsort(ascending=False), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|     def test_numpy_argsort(self): | ||||
|         c = Categorical([5, 3, 1, 4, 2], ordered=True) | ||||
|  | ||||
|         expected = np.array([2, 4, 1, 3, 0]) | ||||
|         tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) | ||||
|  | ||||
|         tm.assert_numpy_array_equal( | ||||
|             np.argsort(c, kind="mergesort"), expected, check_dtype=False | ||||
|         ) | ||||
|  | ||||
|         msg = "the 'axis' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(c, axis=0) | ||||
|  | ||||
|         msg = "the 'order' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.argsort(c, order="C") | ||||
|  | ||||
|     def test_sort_values(self): | ||||
|         # unordered cats are sortable | ||||
|         cat = Categorical(["a", "b", "b", "a"], ordered=False) | ||||
|         cat.sort_values() | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d"], ordered=True) | ||||
|  | ||||
|         # sort_values | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         cat = Categorical( | ||||
|             ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True | ||||
|         ) | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         res = cat.sort_values(ascending=False) | ||||
|         exp = np.array(["d", "c", "b", "a"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         # sort (inplace order) | ||||
|         cat1 = cat.copy() | ||||
|         orig_codes = cat1._codes | ||||
|         cat1.sort_values(inplace=True) | ||||
|         assert cat1._codes is orig_codes | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=object) | ||||
|         tm.assert_numpy_array_equal(cat1.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, cat.categories) | ||||
|  | ||||
|         # reverse | ||||
|         cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) | ||||
|         res = cat.sort_values(ascending=False) | ||||
|         exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|     def test_sort_values_na_position(self): | ||||
|         # see gh-12882 | ||||
|         cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) | ||||
|         exp_categories = Index([2, 5]) | ||||
|  | ||||
|         exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values()  # default arguments | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) | ||||
|         res = cat.sort_values(ascending=True, na_position="first") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) | ||||
|         res = cat.sort_values(ascending=False, na_position="first") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values(ascending=True, na_position="last") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) | ||||
|         res = cat.sort_values(ascending=False, na_position="last") | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) | ||||
|         res = cat.sort_values(ascending=False, na_position="last") | ||||
|         exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
|  | ||||
|         cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) | ||||
|         res = cat.sort_values(ascending=False, na_position="first") | ||||
|         exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) | ||||
|         exp_categories = Index(["a", "b", "c", "d"]) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp_val) | ||||
|         tm.assert_index_equal(res.categories, exp_categories) | ||||
| @ -0,0 +1,26 @@ | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class SubclassedCategorical(Categorical): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class TestCategoricalSubclassing: | ||||
|     def test_constructor(self): | ||||
|         sc = SubclassedCategorical(["a", "b", "c"]) | ||||
|         assert isinstance(sc, SubclassedCategorical) | ||||
|         tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"])) | ||||
|  | ||||
|     def test_from_codes(self): | ||||
|         sc = SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"]) | ||||
|         assert isinstance(sc, SubclassedCategorical) | ||||
|         exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"]) | ||||
|         tm.assert_categorical_equal(sc, exp) | ||||
|  | ||||
|     def test_map(self): | ||||
|         sc = SubclassedCategorical(["a", "b", "c"]) | ||||
|         res = sc.map(lambda x: x.upper(), na_action=None) | ||||
|         assert isinstance(res, SubclassedCategorical) | ||||
|         exp = Categorical(["A", "B", "C"]) | ||||
|         tm.assert_categorical_equal(res, exp) | ||||
| @ -0,0 +1,89 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Categorical | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def allow_fill(request): | ||||
|     """Boolean 'allow_fill' parameter for Categorical.take""" | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|  | ||||
|     def test_take_default_allow_fill(self): | ||||
|         cat = Categorical(["a", "b"]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = cat.take([0, -1]) | ||||
|  | ||||
|         assert result.equals(cat) | ||||
|  | ||||
|     def test_take_positive_no_warning(self): | ||||
|         cat = Categorical(["a", "b"]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             cat.take([0, 0]) | ||||
|  | ||||
|     def test_take_bounds(self, allow_fill): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|         cat = Categorical(["a", "b", "a"]) | ||||
|         if allow_fill: | ||||
|             msg = "indices are out-of-bounds" | ||||
|         else: | ||||
|             msg = "index 4 is out of bounds for( axis 0 with)? size 3" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             cat.take([4, 5], allow_fill=allow_fill) | ||||
|  | ||||
|     def test_take_empty(self, allow_fill): | ||||
|         # https://github.com/pandas-dev/pandas/issues/20664 | ||||
|         cat = Categorical([], categories=["a", "b"]) | ||||
|         if allow_fill: | ||||
|             msg = "indices are out-of-bounds" | ||||
|         else: | ||||
|             msg = "cannot do a non-empty take from an empty axes" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             cat.take([0], allow_fill=allow_fill) | ||||
|  | ||||
|     def test_positional_take(self, ordered): | ||||
|         cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) | ||||
|         result = cat.take([0, 1, 2], allow_fill=False) | ||||
|         expected = Categorical( | ||||
|             ["a", "a", "b"], categories=cat.categories, ordered=ordered | ||||
|         ) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_positional_take_unobserved(self, ordered): | ||||
|         cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) | ||||
|         result = cat.take([1, 0], allow_fill=False) | ||||
|         expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_allow_fill(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "a", "b"]) | ||||
|         result = cat.take([0, -1, -1], allow_fill=True) | ||||
|         expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_with_negative_one(self): | ||||
|         # -1 was a category | ||||
|         cat = Categorical([-1, 0, 1]) | ||||
|         result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) | ||||
|         expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_value(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "b", "c"]) | ||||
|         result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) | ||||
|         expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|     def test_take_fill_value_new_raises(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23296 | ||||
|         cat = Categorical(["a", "b", "c"]) | ||||
|         xpr = r"Cannot setitem on a Categorical with a new category \(d\)" | ||||
|         with pytest.raises(TypeError, match=xpr): | ||||
|             cat.take([0, 1, -1], fill_value="d", allow_fill=True) | ||||
| @ -0,0 +1,19 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCategoricalWarnings: | ||||
|     def test_tab_complete_warning(self, ip): | ||||
|         # https://github.com/pandas-dev/pandas/issues/16409 | ||||
|         pytest.importorskip("IPython", minversion="6.0.0") | ||||
|         from IPython.core.completer import provisionalcompleter | ||||
|  | ||||
|         code = "import pandas as pd; c = pd.Categorical([])" | ||||
|         ip.run_cell(code) | ||||
|  | ||||
|         # GH 31324 newer jedi version raises Deprecation warning; | ||||
|         #  appears resolved 2021-02-02 | ||||
|         with tm.assert_produces_warning(None, raise_on_extra_warnings=False): | ||||
|             with provisionalcompleter("ignore"): | ||||
|                 list(ip.Completer.completions("c.", 1)) | ||||
| @ -0,0 +1,284 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs import iNaT | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import DatetimeArray | ||||
|  | ||||
|  | ||||
| class TestDatetimeArrayConstructor: | ||||
|     def test_from_sequence_invalid_type(self): | ||||
|         mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) | ||||
|         with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): | ||||
|             DatetimeArray._from_sequence(mi, dtype="M8[ns]") | ||||
|  | ||||
|     def test_only_1dim_accepted(self): | ||||
|         arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") | ||||
|  | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Only 1-dimensional"): | ||||
|                 # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 | ||||
|                 DatetimeArray(arr.reshape(2, 2, 1)) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Only 1-dimensional"): | ||||
|                 # 0-dim | ||||
|                 DatetimeArray(arr[[0]].squeeze()) | ||||
|  | ||||
|     def test_freq_validation(self): | ||||
|         # GH#24623 check that invalid instances cannot be created with the | ||||
|         #  public constructor | ||||
|         arr = np.arange(5, dtype=np.int64) * 3600 * 10**9 | ||||
|  | ||||
|         msg = ( | ||||
|             "Inferred frequency h from passed values does not " | ||||
|             "conform to passed frequency W-SUN" | ||||
|         ) | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 DatetimeArray(arr, freq="W") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "meth", | ||||
|         [ | ||||
|             DatetimeArray._from_sequence, | ||||
|             pd.to_datetime, | ||||
|             pd.DatetimeIndex, | ||||
|         ], | ||||
|     ) | ||||
|     def test_mixing_naive_tzaware_raises(self, meth): | ||||
|         # GH#24569 | ||||
|         arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) | ||||
|  | ||||
|         msg = ( | ||||
|             "Cannot mix tz-aware with tz-naive values|" | ||||
|             "Tz-aware datetime.datetime cannot be converted " | ||||
|             "to datetime64 unless utc=True" | ||||
|         ) | ||||
|  | ||||
|         for obj in [arr, arr[::-1]]: | ||||
|             # check that we raise regardless of whether naive is found | ||||
|             #  before aware or vice-versa | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 meth(obj) | ||||
|  | ||||
|     def test_from_pandas_array(self): | ||||
|         arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9 | ||||
|  | ||||
|         result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer") | ||||
|  | ||||
|         expected = pd.date_range("1970-01-01", periods=5, freq="h")._data | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|     def test_mismatched_timezone_raises(self): | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             arr = DatetimeArray( | ||||
|                 np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), | ||||
|                 dtype=DatetimeTZDtype(tz="US/Central"), | ||||
|             ) | ||||
|         dtype = DatetimeTZDtype(tz="US/Eastern") | ||||
|         msg = r"dtype=datetime64\[ns.*\] does not match data dtype datetime64\[ns.*\]" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 DatetimeArray(arr, dtype=dtype) | ||||
|  | ||||
|         # also with mismatched tzawareness | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 DatetimeArray(arr, dtype=np.dtype("M8[ns]")) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 DatetimeArray(arr.tz_localize(None), dtype=arr.dtype) | ||||
|  | ||||
|     def test_non_array_raises(self): | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="list"): | ||||
|                 DatetimeArray([1, 2, 3]) | ||||
|  | ||||
|     def test_bool_dtype_raises(self): | ||||
|         arr = np.array([1, 2, 3], dtype="bool") | ||||
|  | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         msg = "Unexpected value for 'dtype': 'bool'. Must be" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 DatetimeArray(arr) | ||||
|  | ||||
|         msg = r"dtype bool cannot be converted to datetime64\[ns\]" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             DatetimeArray._from_sequence(arr, dtype="M8[ns]") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             pd.DatetimeIndex(arr) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             pd.to_datetime(arr) | ||||
|  | ||||
|     def test_incorrect_dtype_raises(self): | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): | ||||
|                 DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): | ||||
|                 DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="m8[s]") | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): | ||||
|                 DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="M8[D]") | ||||
|  | ||||
|     def test_mismatched_values_dtype_units(self): | ||||
|         arr = np.array([1, 2, 3], dtype="M8[s]") | ||||
|         dtype = np.dtype("M8[ns]") | ||||
|         msg = "Values resolution does not match dtype." | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 DatetimeArray(arr, dtype=dtype) | ||||
|  | ||||
|         dtype2 = DatetimeTZDtype(tz="UTC", unit="ns") | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 DatetimeArray(arr, dtype=dtype2) | ||||
|  | ||||
|     def test_freq_infer_raises(self): | ||||
|         depr_msg = "DatetimeArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Frequency inference"): | ||||
|                 DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") | ||||
|  | ||||
|     def test_copy(self): | ||||
|         data = np.array([1, 2, 3], dtype="M8[ns]") | ||||
|         arr = DatetimeArray._from_sequence(data, copy=False) | ||||
|         assert arr._ndarray is data | ||||
|  | ||||
|         arr = DatetimeArray._from_sequence(data, copy=True) | ||||
|         assert arr._ndarray is not data | ||||
|  | ||||
|     @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) | ||||
|     def test_numpy_datetime_unit(self, unit): | ||||
|         data = np.array([1, 2, 3], dtype=f"M8[{unit}]") | ||||
|         arr = DatetimeArray._from_sequence(data) | ||||
|         assert arr.unit == unit | ||||
|         assert arr[0].unit == unit | ||||
|  | ||||
|  | ||||
| class TestSequenceToDT64NS: | ||||
|     def test_tz_dtype_mismatch_raises(self): | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             ["2000"], dtype=DatetimeTZDtype(tz="US/Central") | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match="data is already tz-aware"): | ||||
|             DatetimeArray._from_sequence(arr, dtype=DatetimeTZDtype(tz="UTC")) | ||||
|  | ||||
|     def test_tz_dtype_matches(self): | ||||
|         dtype = DatetimeTZDtype(tz="US/Central") | ||||
|         arr = DatetimeArray._from_sequence(["2000"], dtype=dtype) | ||||
|         result = DatetimeArray._from_sequence(arr, dtype=dtype) | ||||
|         tm.assert_equal(arr, result) | ||||
|  | ||||
|     @pytest.mark.parametrize("order", ["F", "C"]) | ||||
|     def test_2d(self, order): | ||||
|         dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") | ||||
|         arr = np.array(dti, dtype=object).reshape(3, 2) | ||||
|         if order == "F": | ||||
|             arr = arr.T | ||||
|  | ||||
|         res = DatetimeArray._from_sequence(arr, dtype=dti.dtype) | ||||
|         expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape( | ||||
|             arr.shape | ||||
|         ) | ||||
|         tm.assert_datetime_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Arrow interaction | ||||
|  | ||||
|  | ||||
| EXTREME_VALUES = [0, 123456789, None, iNaT, 2**63 - 1, -(2**63) + 1] | ||||
| FINE_TO_COARSE_SAFE = [123_000_000_000, None, -123_000_000_000] | ||||
| COARSE_TO_FINE_SAFE = [123, None, -123] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("pa_unit", "pd_unit", "pa_tz", "pd_tz", "data"), | ||||
|     [ | ||||
|         ("s", "s", "UTC", "UTC", EXTREME_VALUES), | ||||
|         ("ms", "ms", "UTC", "Europe/Berlin", EXTREME_VALUES), | ||||
|         ("us", "us", "US/Eastern", "UTC", EXTREME_VALUES), | ||||
|         ("ns", "ns", "US/Central", "Asia/Kolkata", EXTREME_VALUES), | ||||
|         ("ns", "s", "UTC", "UTC", FINE_TO_COARSE_SAFE), | ||||
|         ("us", "ms", "UTC", "Europe/Berlin", FINE_TO_COARSE_SAFE), | ||||
|         ("ms", "us", "US/Eastern", "UTC", COARSE_TO_FINE_SAFE), | ||||
|         ("s", "ns", "US/Central", "Asia/Kolkata", COARSE_TO_FINE_SAFE), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrow_with_different_units_and_timezones_with( | ||||
|     pa_unit, pd_unit, pa_tz, pd_tz, data | ||||
| ): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     pa_type = pa.timestamp(pa_unit, tz=pa_tz) | ||||
|     arr = pa.array(data, type=pa_type) | ||||
|     dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz) | ||||
|  | ||||
|     result = dtype.__from_arrow__(arr) | ||||
|     expected = DatetimeArray._from_sequence(data, dtype=f"M8[{pa_unit}, UTC]").astype( | ||||
|         dtype, copy=False | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = dtype.__from_arrow__(pa.chunked_array([arr])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     ("unit", "tz"), | ||||
|     [ | ||||
|         ("s", "UTC"), | ||||
|         ("ms", "Europe/Berlin"), | ||||
|         ("us", "US/Eastern"), | ||||
|         ("ns", "Asia/Kolkata"), | ||||
|         ("ns", "UTC"), | ||||
|     ], | ||||
| ) | ||||
| def test_from_arrow_from_empty(unit, tz): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     data = [] | ||||
|     arr = pa.array(data) | ||||
|     dtype = DatetimeTZDtype(unit=unit, tz=tz) | ||||
|  | ||||
|     result = dtype.__from_arrow__(arr) | ||||
|     expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]")) | ||||
|     expected = expected.tz_localize(tz=tz) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = dtype.__from_arrow__(pa.chunked_array([arr])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_arrow_from_integers(): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     data = [0, 123456789, None, 2**63 - 1, iNaT, -123456789] | ||||
|     arr = pa.array(data) | ||||
|     dtype = DatetimeTZDtype(unit="ns", tz="UTC") | ||||
|  | ||||
|     result = dtype.__from_arrow__(arr) | ||||
|     expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]")) | ||||
|     expected = expected.tz_localize("UTC") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = dtype.__from_arrow__(pa.chunked_array([arr])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,44 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import DatetimeArray | ||||
|  | ||||
|  | ||||
| class TestAccumulator: | ||||
|     def test_accumulators_freq(self): | ||||
|         # GH#50297 | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             [ | ||||
|                 "2000-01-01", | ||||
|                 "2000-01-02", | ||||
|                 "2000-01-03", | ||||
|             ], | ||||
|             dtype="M8[ns]", | ||||
|         )._with_freq("infer") | ||||
|         result = arr._accumulate("cummin") | ||||
|         expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]") | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         result = arr._accumulate("cummax") | ||||
|         expected = DatetimeArray._from_sequence( | ||||
|             [ | ||||
|                 "2000-01-01", | ||||
|                 "2000-01-02", | ||||
|                 "2000-01-03", | ||||
|             ], | ||||
|             dtype="M8[ns]", | ||||
|         ) | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) | ||||
|     def test_accumulators_disallowed(self, func): | ||||
|         # GH#50297 | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             [ | ||||
|                 "2000-01-01", | ||||
|                 "2000-01-02", | ||||
|             ], | ||||
|             dtype="M8[ns]", | ||||
|         )._with_freq("infer") | ||||
|         with pytest.raises(TypeError, match=f"Accumulation {func}"): | ||||
|             arr._accumulate(func) | ||||
| @ -0,0 +1,183 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import NaT | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import DatetimeArray | ||||
|  | ||||
|  | ||||
| class TestReductions: | ||||
|     @pytest.fixture(params=["s", "ms", "us", "ns"]) | ||||
|     def unit(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def arr1d(self, tz_naive_fixture): | ||||
|         """Fixture returning DatetimeArray with parametrized timezones""" | ||||
|         tz = tz_naive_fixture | ||||
|         dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             [ | ||||
|                 "2000-01-03", | ||||
|                 "2000-01-03", | ||||
|                 "NaT", | ||||
|                 "2000-01-02", | ||||
|                 "2000-01-05", | ||||
|                 "2000-01-04", | ||||
|             ], | ||||
|             dtype=dtype, | ||||
|         ) | ||||
|         return arr | ||||
|  | ||||
|     def test_min_max(self, arr1d, unit): | ||||
|         arr = arr1d | ||||
|         arr = arr.as_unit(unit) | ||||
|         tz = arr.tz | ||||
|  | ||||
|         result = arr.min() | ||||
|         expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit) | ||||
|         assert result == expected | ||||
|         assert result.unit == expected.unit | ||||
|  | ||||
|         result = arr.max() | ||||
|         expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit) | ||||
|         assert result == expected | ||||
|         assert result.unit == expected.unit | ||||
|  | ||||
|         result = arr.min(skipna=False) | ||||
|         assert result is NaT | ||||
|  | ||||
|         result = arr.max(skipna=False) | ||||
|         assert result is NaT | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Central"]) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_min_max_empty(self, skipna, tz): | ||||
|         dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") | ||||
|         arr = DatetimeArray._from_sequence([], dtype=dtype) | ||||
|         result = arr.min(skipna=skipna) | ||||
|         assert result is NaT | ||||
|  | ||||
|         result = arr.max(skipna=skipna) | ||||
|         assert result is NaT | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", [None, "US/Central"]) | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_median_empty(self, skipna, tz): | ||||
|         dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") | ||||
|         arr = DatetimeArray._from_sequence([], dtype=dtype) | ||||
|         result = arr.median(skipna=skipna) | ||||
|         assert result is NaT | ||||
|  | ||||
|         arr = arr.reshape(0, 3) | ||||
|         result = arr.median(axis=0, skipna=skipna) | ||||
|         expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         result = arr.median(axis=1, skipna=skipna) | ||||
|         expected = type(arr)._from_sequence([], dtype=arr.dtype) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_median(self, arr1d): | ||||
|         arr = arr1d | ||||
|  | ||||
|         result = arr.median() | ||||
|         assert result == arr[0] | ||||
|         result = arr.median(skipna=False) | ||||
|         assert result is NaT | ||||
|  | ||||
|         result = arr.dropna().median(skipna=False) | ||||
|         assert result == arr[0] | ||||
|  | ||||
|         result = arr.median(axis=0) | ||||
|         assert result == arr[0] | ||||
|  | ||||
|     def test_median_axis(self, arr1d): | ||||
|         arr = arr1d | ||||
|         assert arr.median(axis=0) == arr.median() | ||||
|         assert arr.median(axis=0, skipna=False) is NaT | ||||
|  | ||||
|         msg = r"abs\(axis\) must be less than ndim" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.median(axis=1) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning") | ||||
|     def test_median_2d(self, arr1d): | ||||
|         arr = arr1d.reshape(1, -1) | ||||
|  | ||||
|         # axis = None | ||||
|         assert arr.median() == arr1d.median() | ||||
|         assert arr.median(skipna=False) is NaT | ||||
|  | ||||
|         # axis = 0 | ||||
|         result = arr.median(axis=0) | ||||
|         expected = arr1d | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         # Since column 3 is all-NaT, we get NaT there with or without skipna | ||||
|         result = arr.median(axis=0, skipna=False) | ||||
|         expected = arr1d | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         # axis = 1 | ||||
|         result = arr.median(axis=1) | ||||
|         expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|         result = arr.median(axis=1, skipna=False) | ||||
|         expected = type(arr)._from_sequence([NaT], dtype=arr.dtype) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_mean(self, arr1d): | ||||
|         arr = arr1d | ||||
|  | ||||
|         # manually verified result | ||||
|         expected = arr[0] + 0.4 * pd.Timedelta(days=1) | ||||
|  | ||||
|         result = arr.mean() | ||||
|         assert result == expected | ||||
|         result = arr.mean(skipna=False) | ||||
|         assert result is NaT | ||||
|  | ||||
|         result = arr.dropna().mean(skipna=False) | ||||
|         assert result == expected | ||||
|  | ||||
|         result = arr.mean(axis=0) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_mean_2d(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") | ||||
|         dta = dti._data.reshape(3, 2) | ||||
|  | ||||
|         result = dta.mean(axis=0) | ||||
|         expected = dta[1] | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         result = dta.mean(axis=1) | ||||
|         expected = dta[:, 0] + pd.Timedelta(hours=12) | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         result = dta.mean(axis=None) | ||||
|         expected = dti.mean() | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_mean_empty(self, arr1d, skipna): | ||||
|         arr = arr1d[:0] | ||||
|  | ||||
|         assert arr.mean(skipna=skipna) is NaT | ||||
|  | ||||
|         arr2d = arr.reshape(0, 3) | ||||
|         result = arr2d.mean(axis=0, skipna=skipna) | ||||
|         expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         result = arr2d.mean(axis=1, skipna=skipna) | ||||
|         expected = arr  # i.e. 1D, empty | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         result = arr2d.mean(axis=None, skipna=skipna) | ||||
|         assert result is NaT | ||||
| @ -0,0 +1,48 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.floating import ( | ||||
|     Float32Dtype, | ||||
|     Float64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[Float32Dtype, Float64Dtype]) | ||||
| def dtype(request): | ||||
|     """Parametrized fixture returning a float 'dtype'""" | ||||
|     return request.param() | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(dtype): | ||||
|     """Fixture returning 'data' array according to parametrized float 'dtype'""" | ||||
|     return pd.array( | ||||
|         list(np.arange(0.1, 0.9, 0.1)) | ||||
|         + [pd.NA] | ||||
|         + list(np.arange(1, 9.8, 0.1)) | ||||
|         + [pd.NA] | ||||
|         + [9.9, 10.0], | ||||
|         dtype=dtype, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_missing(dtype): | ||||
|     """ | ||||
|     Fixture returning array with missing data according to parametrized float | ||||
|     'dtype'. | ||||
|     """ | ||||
|     return pd.array([np.nan, 0.1], dtype=dtype) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["data", "data_missing"]) | ||||
| def all_data(request, data, data_missing): | ||||
|     """Parametrized fixture returning 'data' or 'data_missing' float arrays. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     if request.param == "data": | ||||
|         return data | ||||
|     elif request.param == "data_missing": | ||||
|         return data_missing | ||||
| @ -0,0 +1,240 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
| # Basic test for the arithmetic array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "opname, exp", | ||||
|     [ | ||||
|         ("add", [1.1, 2.2, None, None, 5.5]), | ||||
|         ("mul", [0.1, 0.4, None, None, 2.5]), | ||||
|         ("sub", [0.9, 1.8, None, None, 4.5]), | ||||
|         ("truediv", [10.0, 10.0, None, None, 10.0]), | ||||
|         ("floordiv", [9.0, 9.0, None, None, 10.0]), | ||||
|         ("mod", [0.1, 0.2, None, None, 0.0]), | ||||
|     ], | ||||
|     ids=["add", "mul", "sub", "div", "floordiv", "mod"], | ||||
| ) | ||||
| def test_array_op(dtype, opname, exp): | ||||
|     a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype) | ||||
|     b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype) | ||||
|  | ||||
|     op = getattr(operator, opname) | ||||
|  | ||||
|     result = op(a, b) | ||||
|     expected = pd.array(exp, dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) | ||||
| def test_divide_by_zero(dtype, zero, negative): | ||||
|     # TODO pending NA/NaN discussion | ||||
|     # https://github.com/pandas-dev/pandas/issues/32265/ | ||||
|     a = pd.array([0, 1, -1, None], dtype=dtype) | ||||
|     result = a / zero | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype), | ||||
|         np.array([False, False, False, True]), | ||||
|     ) | ||||
|     if negative: | ||||
|         expected *= -1 | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_scalar(dtype): | ||||
|     a = pd.array([-1, 0, 1, None, 2], dtype=dtype) | ||||
|     result = a**0 | ||||
|     expected = pd.array([1, 1, 1, 1, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**1 | ||||
|     expected = pd.array([-1, 0, 1, None, 2], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**pd.NA | ||||
|     expected = pd.array([None, None, 1, None, None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**np.nan | ||||
|     # TODO np.nan should be converted to pd.NA / missing before operation? | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype), | ||||
|         mask=a._mask, | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # reversed | ||||
|     a = a[1:]  # Can't raise integers to negative powers. | ||||
|  | ||||
|     result = 0**a | ||||
|     expected = pd.array([1, 0, None, 0], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = 1**a | ||||
|     expected = pd.array([1, 1, 1, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = pd.NA**a | ||||
|     expected = pd.array([1, None, None, None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = np.nan**a | ||||
|     expected = FloatingArray( | ||||
|         np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_array(dtype): | ||||
|     a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype) | ||||
|     result = a**b | ||||
|     expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_rpow_one_to_na(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/22022 | ||||
|     # https://github.com/pandas-dev/pandas/issues/29997 | ||||
|     arr = pd.array([np.nan, np.nan], dtype="Float64") | ||||
|     result = np.array([1.0, 2.0]) ** arr | ||||
|     expected = pd.array([1.0, np.nan], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("other", [0, 0.5]) | ||||
| def test_arith_zero_dim_ndarray(other): | ||||
|     arr = pd.array([1, None, 2], dtype="Float64") | ||||
|     result = arr + np.array(other) | ||||
|     expected = arr + other | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_values(data, all_arithmetic_operators): | ||||
|     op = all_arithmetic_operators | ||||
|     s = pd.Series(data) | ||||
|     ops = getattr(s, op) | ||||
|  | ||||
|     # invalid scalars | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"can only perform ops with numeric values", | ||||
|             r"FloatingArray cannot perform the operation mod", | ||||
|             "unsupported operand type", | ||||
|             "not all arguments converted during string formatting", | ||||
|             "can't multiply sequence by non-int of type 'float'", | ||||
|             "ufunc 'subtract' cannot use operands with types dtype", | ||||
|             r"can only concatenate str \(not \"float\"\) to str", | ||||
|             "ufunc '.*' not supported for the input types, and the inputs could not", | ||||
|             "ufunc '.*' did not contain a loop with signature matching types", | ||||
|             "Concatenation operation is not implemented for NumPy arrays", | ||||
|             "has no kernel", | ||||
|             "not implemented", | ||||
|             "not supported for dtype", | ||||
|             "Can only string multiply by an integer", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops("foo") | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops(pd.Timestamp("20180101")) | ||||
|  | ||||
|     # invalid array-likes | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops(pd.Series("foo", index=s.index)) | ||||
|  | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             "can only perform ops with numeric values", | ||||
|             "cannot perform .* with this index type: DatetimeArray", | ||||
|             "Addition/subtraction of integers and integer-arrays " | ||||
|             "with DatetimeArray is no longer supported. *", | ||||
|             "unsupported operand type", | ||||
|             "not all arguments converted during string formatting", | ||||
|             "can't multiply sequence by non-int of type 'float'", | ||||
|             "ufunc 'subtract' cannot use operands with types dtype", | ||||
|             ( | ||||
|                 "ufunc 'add' cannot use operands with types " | ||||
|                 rf"dtype\('{tm.ENDIAN}M8\[ns\]'\)" | ||||
|             ), | ||||
|             r"ufunc 'add' cannot use operands with types dtype\('float\d{2}'\)", | ||||
|             "cannot subtract DatetimeArray from ndarray", | ||||
|             "has no kernel", | ||||
|             "not implemented", | ||||
|             "not supported for dtype", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ops(pd.Series(pd.date_range("20180101", periods=len(s)))) | ||||
|  | ||||
|  | ||||
| # Various | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_cross_type_arithmetic(): | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": pd.array([1, 2, np.nan], dtype="Float64"), | ||||
|             "B": pd.array([1, np.nan, 3], dtype="Float32"), | ||||
|             "C": np.array([1, 2, 3], dtype="float64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     result = df.A + df.C | ||||
|     expected = pd.Series([2, 4, np.nan], dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = (df.A + df.C) * 3 == 12 | ||||
|     expected = pd.Series([False, True, None], dtype="boolean") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df.A + df.B | ||||
|     expected = pd.Series([2, np.nan, np.nan], dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "source, neg_target, abs_target", | ||||
|     [ | ||||
|         ([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]), | ||||
|         ([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]), | ||||
|         ([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]), | ||||
|     ], | ||||
| ) | ||||
| def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target): | ||||
|     # GH38794 | ||||
|     dtype = float_ea_dtype | ||||
|     arr = pd.array(source, dtype=dtype) | ||||
|     neg_result, pos_result, abs_result = -arr, +arr, abs(arr) | ||||
|     neg_target = pd.array(neg_target, dtype=dtype) | ||||
|     abs_target = pd.array(abs_target, dtype=dtype) | ||||
|  | ||||
|     tm.assert_extension_array_equal(neg_result, neg_target) | ||||
|     tm.assert_extension_array_equal(pos_result, arr) | ||||
|     assert not tm.shares_memory(pos_result, arr) | ||||
|     tm.assert_extension_array_equal(abs_result, abs_target) | ||||
|  | ||||
|  | ||||
| def test_bitwise(dtype): | ||||
|     left = pd.array([1, None, 3, 4], dtype=dtype) | ||||
|     right = pd.array([None, 3, 5, 4], dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left | right | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left & right | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left ^ right | ||||
| @ -0,0 +1,135 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_astype(): | ||||
|     # with missing values | ||||
|     arr = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|  | ||||
|     with pytest.raises(ValueError, match="cannot convert NA to integer"): | ||||
|         arr.astype("int64") | ||||
|  | ||||
|     with pytest.raises(ValueError, match="cannot convert float NaN to bool"): | ||||
|         arr.astype("bool") | ||||
|  | ||||
|     result = arr.astype("float64") | ||||
|     expected = np.array([0.1, 0.2, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     # no missing values | ||||
|     arr = pd.array([0.0, 1.0, 0.5], dtype="Float64") | ||||
|     result = arr.astype("int64") | ||||
|     expected = np.array([0, 1, 0], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.astype("bool") | ||||
|     expected = np.array([False, True, True], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_to_floating_array(): | ||||
|     # astype to FloatingArray | ||||
|     arr = pd.array([0.0, 1.0, None], dtype="Float64") | ||||
|  | ||||
|     result = arr.astype("Float64") | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|     result = arr.astype(pd.Float64Dtype()) | ||||
|     tm.assert_extension_array_equal(result, arr) | ||||
|     result = arr.astype("Float32") | ||||
|     expected = pd.array([0.0, 1.0, None], dtype="Float32") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_to_boolean_array(): | ||||
|     # astype to BooleanArray | ||||
|     arr = pd.array([0.0, 1.0, None], dtype="Float64") | ||||
|  | ||||
|     result = arr.astype("boolean") | ||||
|     expected = pd.array([False, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     result = arr.astype(pd.BooleanDtype()) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_to_integer_array(): | ||||
|     # astype to IntegerArray | ||||
|     arr = pd.array([0.0, 1.5, None], dtype="Float64") | ||||
|  | ||||
|     result = arr.astype("Int64") | ||||
|     expected = pd.array([0, 1, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_str(using_infer_string): | ||||
|     a = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|  | ||||
|     if using_infer_string: | ||||
|         expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan)) | ||||
|  | ||||
|         tm.assert_extension_array_equal(a.astype(str), expected) | ||||
|         tm.assert_extension_array_equal(a.astype("str"), expected) | ||||
|     else: | ||||
|         expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(a.astype(str), expected) | ||||
|         tm.assert_numpy_array_equal(a.astype("str"), expected) | ||||
|  | ||||
|  | ||||
| def test_astype_copy(): | ||||
|     arr = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|     orig = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|  | ||||
|     # copy=True -> ensure both data and mask are actual copies | ||||
|     result = arr.astype("Float64", copy=True) | ||||
|     assert result is not arr | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|     # copy=False | ||||
|     result = arr.astype("Float64", copy=False) | ||||
|     assert result is arr | ||||
|     assert np.shares_memory(result._data, arr._data) | ||||
|     assert np.shares_memory(result._mask, arr._mask) | ||||
|     result[0] = 10 | ||||
|     assert arr[0] == 10 | ||||
|     result[0] = pd.NA | ||||
|     assert arr[0] is pd.NA | ||||
|  | ||||
|     # astype to different dtype -> always needs a copy -> even with copy=False | ||||
|     # we need to ensure that also the mask is actually copied | ||||
|     arr = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|     orig = pd.array([0.1, 0.2, None], dtype="Float64") | ||||
|  | ||||
|     result = arr.astype("Float32", copy=False) | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|  | ||||
| def test_astype_object(dtype): | ||||
|     arr = pd.array([1.0, pd.NA], dtype=dtype) | ||||
|  | ||||
|     result = arr.astype(object) | ||||
|     expected = np.array([1.0, pd.NA], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|     # check exact element types | ||||
|     assert isinstance(result[0], float) | ||||
|     assert result[1] is pd.NA | ||||
|  | ||||
|  | ||||
| def test_Float64_conversion(): | ||||
|     # GH#40729 | ||||
|     testseries = pd.Series(["1", "2", "3", "4"], dtype="object") | ||||
|     result = testseries.astype(pd.Float64Dtype()) | ||||
|  | ||||
|     expected = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=pd.Float64Dtype()) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,65 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
| from pandas.tests.arrays.masked_shared import ( | ||||
|     ComparisonOps, | ||||
|     NumericOps, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestComparisonOps(NumericOps, ComparisonOps): | ||||
|     @pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1]) | ||||
|     def test_scalar(self, other, comparison_op, dtype): | ||||
|         ComparisonOps.test_scalar(self, other, comparison_op, dtype) | ||||
|  | ||||
|     def test_compare_with_integerarray(self, comparison_op): | ||||
|         op = comparison_op | ||||
|         a = pd.array([0, 1, None] * 3, dtype="Int64") | ||||
|         b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64") | ||||
|         other = b.astype("Int64") | ||||
|         expected = op(a, other) | ||||
|         result = op(a, b) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         expected = op(other, a) | ||||
|         result = op(b, a) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals(): | ||||
|     # GH-30652 | ||||
|     # equals is generally tested in /tests/extension/base/methods, but this | ||||
|     # specifically tests that two arrays of the same class but different dtype | ||||
|     # do not evaluate equal | ||||
|     a1 = pd.array([1, 2, None], dtype="Float64") | ||||
|     a2 = pd.array([1, 2, None], dtype="Float32") | ||||
|     assert a1.equals(a2) is False | ||||
|  | ||||
|  | ||||
| def test_equals_nan_vs_na(): | ||||
|     # GH#44382 | ||||
|  | ||||
|     mask = np.zeros(3, dtype=bool) | ||||
|     data = np.array([1.0, np.nan, 3.0], dtype=np.float64) | ||||
|  | ||||
|     left = FloatingArray(data, mask) | ||||
|     assert left.equals(left) | ||||
|     tm.assert_extension_array_equal(left, left) | ||||
|  | ||||
|     assert left.equals(left.copy()) | ||||
|     assert left.equals(FloatingArray(data.copy(), mask.copy())) | ||||
|  | ||||
|     mask2 = np.array([False, True, False], dtype=bool) | ||||
|     data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64) | ||||
|     right = FloatingArray(data2, mask2) | ||||
|     assert right.equals(right) | ||||
|     tm.assert_extension_array_equal(right, right) | ||||
|  | ||||
|     assert not left.equals(right) | ||||
|  | ||||
|     # with mask[1] = True, the only difference is data[1], which should | ||||
|     #  not matter for equals | ||||
|     mask[1] = True | ||||
|     assert left.equals(right) | ||||
| @ -0,0 +1,20 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         (["Float64", "Float64"], "Float64"), | ||||
|         (["Float32", "Float64"], "Float64"), | ||||
|         (["Float32", "Float32"], "Float32"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series(to_concat_dtypes, result_dtype): | ||||
|     result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes]) | ||||
|     expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype( | ||||
|         result_dtype | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,204 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
| from pandas.core.arrays.floating import ( | ||||
|     Float32Dtype, | ||||
|     Float64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_uses_pandas_na(): | ||||
|     a = pd.array([1, None], dtype=Float64Dtype()) | ||||
|     assert a[1] is pd.NA | ||||
|  | ||||
|  | ||||
| def test_floating_array_constructor(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="float64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = FloatingArray(values, mask) | ||||
|     expected = pd.array([1, 2, 3, np.nan], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     tm.assert_numpy_array_equal(result._data, values) | ||||
|     tm.assert_numpy_array_equal(result._mask, mask) | ||||
|  | ||||
|     msg = r".* should be .* numpy array. Use the 'pd.array' function instead" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         FloatingArray(values.tolist(), mask) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         FloatingArray(values, mask.tolist()) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         FloatingArray(values.astype(int), mask) | ||||
|  | ||||
|     msg = r"__init__\(\) missing 1 required positional argument: 'mask'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         FloatingArray(values) | ||||
|  | ||||
|  | ||||
| def test_floating_array_disallows_float16(): | ||||
|     # GH#44715 | ||||
|     arr = np.array([1, 2], dtype=np.float16) | ||||
|     mask = np.array([False, False]) | ||||
|  | ||||
|     msg = "FloatingArray does not support np.float16 dtype" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         FloatingArray(arr, mask) | ||||
|  | ||||
|  | ||||
| def test_floating_array_disallows_Float16_dtype(request): | ||||
|     # GH#44715 | ||||
|     with pytest.raises(TypeError, match="data type 'Float16' not understood"): | ||||
|         pd.array([1.0, 2.0], dtype="Float16") | ||||
|  | ||||
|  | ||||
| def test_floating_array_constructor_copy(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="float64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = FloatingArray(values, mask) | ||||
|     assert result._data is values | ||||
|     assert result._mask is mask | ||||
|  | ||||
|     result = FloatingArray(values, mask, copy=True) | ||||
|     assert result._data is not values | ||||
|     assert result._mask is not mask | ||||
|  | ||||
|  | ||||
| def test_to_array(): | ||||
|     result = pd.array([0.1, 0.2, 0.3, 0.4]) | ||||
|     expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         ([1, None], [1, pd.NA]), | ||||
|         ([None], [pd.NA]), | ||||
|         ([None, np.nan], [pd.NA, pd.NA]), | ||||
|         ([1, np.nan], [1, pd.NA]), | ||||
|         ([np.nan], [pd.NA]), | ||||
|     ], | ||||
| ) | ||||
| def test_to_array_none_is_nan(a, b): | ||||
|     result = pd.array(a, dtype="Float64") | ||||
|     expected = pd.array(b, dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_array_mixed_integer_float(): | ||||
|     result = pd.array([1, 2.0]) | ||||
|     expected = pd.array([1.0, 2.0], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = pd.array([1, None, 2.0]) | ||||
|     expected = pd.array([1.0, None, 2.0], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values", | ||||
|     [ | ||||
|         ["foo", "bar"], | ||||
|         "foo", | ||||
|         1, | ||||
|         1.0, | ||||
|         pd.date_range("20130101", periods=2), | ||||
|         np.array(["foo"]), | ||||
|         [[1, 2], [3, 4]], | ||||
|         [np.nan, {"a": 1}], | ||||
|         # GH#44514 all-NA case used to get quietly swapped out before checking ndim | ||||
|         np.array([pd.NA] * 6, dtype=object).reshape(3, 2), | ||||
|     ], | ||||
| ) | ||||
| def test_to_array_error(values): | ||||
|     # error in converting existing arrays to FloatingArray | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             "cannot be converted to FloatingDtype", | ||||
|             "values must be a 1D list-like", | ||||
|             "Cannot pass scalar", | ||||
|             r"float\(\) argument must be a string or a (real )?number, not 'dict'", | ||||
|             "could not convert string to float: 'foo'", | ||||
|             r"could not convert string to float: np\.str_\('foo'\)", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises((TypeError, ValueError), match=msg): | ||||
|         pd.array(values, dtype="Float64") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]]) | ||||
| def test_construct_from_float_strings(values): | ||||
|     # see also test_to_integer_array_str | ||||
|     expected = pd.array([float(values[0]), 2, None], dtype="Float64") | ||||
|  | ||||
|     res = pd.array(values, dtype="Float64") | ||||
|     tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     res = FloatingArray._from_sequence(values) | ||||
|     tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_to_array_inferred_dtype(): | ||||
|     # if values has dtype -> respect it | ||||
|     result = pd.array(np.array([1, 2], dtype="float32")) | ||||
|     assert result.dtype == Float32Dtype() | ||||
|  | ||||
|     # if values have no dtype -> always float64 | ||||
|     result = pd.array([1.0, 2.0]) | ||||
|     assert result.dtype == Float64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_array_dtype_keyword(): | ||||
|     result = pd.array([1, 2], dtype="Float32") | ||||
|     assert result.dtype == Float32Dtype() | ||||
|  | ||||
|     # if values has dtype -> override it | ||||
|     result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64") | ||||
|     assert result.dtype == Float64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_array_integer(): | ||||
|     result = pd.array([1, 2], dtype="Float64") | ||||
|     expected = pd.array([1.0, 2.0], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # for integer dtypes, the itemsize is not preserved | ||||
|     # TODO can we specify "floating" in general? | ||||
|     result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64") | ||||
|     assert result.dtype == Float64Dtype() | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "bool_values, values, target_dtype, expected_dtype", | ||||
|     [ | ||||
|         ([False, True], [0, 1], Float64Dtype(), Float64Dtype()), | ||||
|         ([False, True], [0, 1], "Float64", Float64Dtype()), | ||||
|         ([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()), | ||||
|     ], | ||||
| ) | ||||
| def test_to_array_bool(bool_values, values, target_dtype, expected_dtype): | ||||
|     result = pd.array(bool_values, dtype=target_dtype) | ||||
|     assert result.dtype == expected_dtype | ||||
|     expected = pd.array(values, dtype=target_dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_from_float(data): | ||||
|     # construct from our dtype & string dtype | ||||
|     dtype = data.dtype | ||||
|  | ||||
|     # from float | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # from list | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,12 @@ | ||||
| import numpy as np | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| def test_contains_nan(): | ||||
|     # GH#52840 | ||||
|     arr = pd.array(range(5)) / 0 | ||||
|  | ||||
|     assert np.isnan(arr._data[0]) | ||||
|     assert not arr.isna()[0] | ||||
|     assert np.nan in arr | ||||
| @ -0,0 +1,194 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import IS64 | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.sign]) | ||||
| # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127> | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning") | ||||
| def test_ufuncs_single(ufunc): | ||||
|     a = pd.array([1, 2, -3, np.nan], dtype="Float64") | ||||
|     result = ufunc(a) | ||||
|     expected = pd.array(ufunc(a.astype(float)), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     result = ufunc(s) | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) | ||||
| def test_ufuncs_single_float(ufunc): | ||||
|     a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64") | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(a) | ||||
|         expected = pd.array(ufunc(a.astype(float)), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(s) | ||||
|         expected = pd.Series(ufunc(s.astype(float)), dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) | ||||
| def test_ufuncs_binary_float(ufunc): | ||||
|     # two FloatingArrays | ||||
|     a = pd.array([1, 0.2, -3, np.nan], dtype="Float64") | ||||
|     result = ufunc(a, a) | ||||
|     expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # FloatingArray with numpy array | ||||
|     arr = np.array([1, 2, 3, 4]) | ||||
|     result = ufunc(a, arr) | ||||
|     expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(arr, a) | ||||
|     expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # FloatingArray with scalar | ||||
|     result = ufunc(a, 1) | ||||
|     expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(1, a) | ||||
|     expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[0, 1], [0, None]]) | ||||
| def test_ufunc_reduce_raises(values): | ||||
|     arr = pd.array(values, dtype="Float64") | ||||
|  | ||||
|     res = np.add.reduce(arr) | ||||
|     expected = arr.sum(skipna=False) | ||||
|     tm.assert_almost_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") | ||||
| @pytest.mark.parametrize( | ||||
|     "pandasmethname, kwargs", | ||||
|     [ | ||||
|         ("var", {"ddof": 0}), | ||||
|         ("var", {"ddof": 1}), | ||||
|         ("std", {"ddof": 0}), | ||||
|         ("std", {"ddof": 1}), | ||||
|         ("kurtosis", {}), | ||||
|         ("skew", {}), | ||||
|         ("sem", {}), | ||||
|     ], | ||||
| ) | ||||
| def test_stat_method(pandasmethname, kwargs): | ||||
|     s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64") | ||||
|     pandasmeth = getattr(s, pandasmethname) | ||||
|     result = pandasmeth(**kwargs) | ||||
|     s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64") | ||||
|     pandasmeth = getattr(s2, pandasmethname) | ||||
|     expected = pandasmeth(**kwargs) | ||||
|     assert expected == result | ||||
|  | ||||
|  | ||||
| def test_value_counts_na(): | ||||
|     arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64") | ||||
|     result = arr.value_counts(dropna=False) | ||||
|     idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype) | ||||
|     assert idx.dtype == arr.dtype | ||||
|     expected = pd.Series([2, 1, 1], index=idx, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = arr.value_counts(dropna=True) | ||||
|     expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_empty(): | ||||
|     ser = pd.Series([], dtype="Float64") | ||||
|     result = ser.value_counts() | ||||
|     idx = pd.Index([], dtype="Float64") | ||||
|     assert idx.dtype == "Float64" | ||||
|     expected = pd.Series([], index=idx, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_with_normalize(): | ||||
|     ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64") | ||||
|     result = ser.value_counts(normalize=True) | ||||
|     expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3 | ||||
|     assert expected.index.dtype == ser.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 4]) | ||||
| def test_floating_array_sum(skipna, min_count, dtype): | ||||
|     arr = pd.array([1, 2, 3, None], dtype=dtype) | ||||
|     result = arr.sum(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 6.0 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)] | ||||
| ) | ||||
| def test_floating_array_numpy_sum(values, expected): | ||||
|     arr = pd.array(values, dtype="Float64") | ||||
|     result = np.sum(arr) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) | ||||
| def test_preserve_dtypes(op): | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": pd.array([0.1, None, 3.0], dtype="Float64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # op | ||||
|     result = getattr(df.C, op)() | ||||
|     assert isinstance(result, np.float64) | ||||
|  | ||||
|     # groupby | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")}, | ||||
|         index=pd.Index(["a", "b"], name="A"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("method", ["min", "max"]) | ||||
| def test_floating_array_min_max(skipna, method, dtype): | ||||
|     arr = pd.array([0.0, 1.0, None], dtype=dtype) | ||||
|     func = getattr(arr, method) | ||||
|     result = func(skipna=skipna) | ||||
|     if skipna: | ||||
|         assert result == (0 if method == "min" else 1) | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 9]) | ||||
| def test_floating_array_prod(skipna, min_count, dtype): | ||||
|     arr = pd.array([1.0, 2.0, None], dtype=dtype) | ||||
|     result = arr.prod(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 2 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
| @ -0,0 +1,47 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.floating import ( | ||||
|     Float32Dtype, | ||||
|     Float64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_dtypes(dtype): | ||||
|     # smoke tests on auto dtype construction | ||||
|  | ||||
|     np.dtype(dtype.type).kind == "f" | ||||
|     assert dtype.name is not None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")], | ||||
| ) | ||||
| def test_repr_dtype(dtype, expected): | ||||
|     assert repr(dtype) == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array(): | ||||
|     result = repr(pd.array([1.0, None, 3.0])) | ||||
|     expected = "<FloatingArray>\n[1.0, <NA>, 3.0]\nLength: 3, dtype: Float64" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array_long(): | ||||
|     data = pd.array([1.0, 2.0, None] * 1000) | ||||
|     expected = """<FloatingArray> | ||||
| [ 1.0,  2.0, <NA>,  1.0,  2.0, <NA>,  1.0,  2.0, <NA>,  1.0, | ||||
|  ... | ||||
|  <NA>,  1.0,  2.0, <NA>,  1.0,  2.0, <NA>,  1.0,  2.0, <NA>] | ||||
| Length: 3000, dtype: Float64""" | ||||
|     result = repr(data) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_frame_repr(data_missing): | ||||
|     df = pd.DataFrame({"A": data_missing}) | ||||
|     result = repr(df) | ||||
|     expected = "      A\n0  <NA>\n1   0.1" | ||||
|     assert result == expected | ||||
| @ -0,0 +1,132 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy(box): | ||||
|     con = pd.Series if box else pd.array | ||||
|  | ||||
|     # default (with or without missing values) -> object dtype | ||||
|     arr = con([0.1, 0.2, 0.3], dtype="Float64") | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array([0.1, 0.2, 0.3], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([0.1, 0.2, None], dtype="Float64") | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array([0.1, 0.2, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_float(box): | ||||
|     con = pd.Series if box else pd.array | ||||
|  | ||||
|     # no missing values -> can convert to float, otherwise raises | ||||
|     arr = con([0.1, 0.2, 0.3], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="float64") | ||||
|     expected = np.array([0.1, 0.2, 0.3], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([0.1, 0.2, None], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="float64") | ||||
|     expected = np.array([0.1, 0.2, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype="float64", na_value=np.nan) | ||||
|     expected = np.array([0.1, 0.2, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_int(box): | ||||
|     con = pd.Series if box else pd.array | ||||
|  | ||||
|     # no missing values -> can convert to int, otherwise raises | ||||
|     arr = con([1.0, 2.0, 3.0], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="int64") | ||||
|     expected = np.array([1, 2, 3], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = con([1.0, 2.0, None], dtype="Float64") | ||||
|     with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): | ||||
|         result = arr.to_numpy(dtype="int64") | ||||
|  | ||||
|     # automatic casting (floors the values) | ||||
|     arr = con([0.1, 0.9, 1.1], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="int64") | ||||
|     expected = np.array([0, 0, 1], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_na_value(box): | ||||
|     con = pd.Series if box else pd.array | ||||
|  | ||||
|     arr = con([0.0, 1.0, None], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype=object, na_value=None) | ||||
|     expected = np.array([0.0, 1.0, None], dtype="object") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype=bool, na_value=False) | ||||
|     expected = np.array([False, True, False], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = arr.to_numpy(dtype="int64", na_value=-99) | ||||
|     expected = np.array([0, 1, -99], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy_na_value_with_nan(): | ||||
|     # array with both NaN and NA -> only fill NA with `na_value` | ||||
|     arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True])) | ||||
|     result = arr.to_numpy(dtype="float64", na_value=-1) | ||||
|     expected = np.array([0.0, np.nan, -1.0], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_dtype(box, dtype): | ||||
|     con = pd.Series if box else pd.array | ||||
|     arr = con([0.0, 1.0], dtype="Float64") | ||||
|  | ||||
|     result = arr.to_numpy(dtype=dtype) | ||||
|     expected = np.array([0, 1], dtype=dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_na_raises(box, dtype): | ||||
|     con = pd.Series if box else pd.array | ||||
|     arr = con([0.0, 1.0, None], dtype="Float64") | ||||
|     with pytest.raises(ValueError, match=dtype): | ||||
|         arr.to_numpy(dtype=dtype) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) | ||||
| def test_to_numpy_string(box, dtype): | ||||
|     con = pd.Series if box else pd.array | ||||
|     arr = con([0.0, 1.0, None], dtype="Float64") | ||||
|  | ||||
|     result = arr.to_numpy(dtype="str") | ||||
|     expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy_copy(): | ||||
|     # to_numpy can be zero-copy if no missing values | ||||
|     arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="float64") | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64")) | ||||
|  | ||||
|     arr = pd.array([0.1, 0.2, 0.3], dtype="Float64") | ||||
|     result = arr.to_numpy(dtype="float64", copy=True) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64")) | ||||
| @ -0,0 +1,68 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int16Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
|     UInt8Dtype, | ||||
|     UInt16Dtype, | ||||
|     UInt32Dtype, | ||||
|     UInt64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         Int8Dtype, | ||||
|         Int16Dtype, | ||||
|         Int32Dtype, | ||||
|         Int64Dtype, | ||||
|         UInt8Dtype, | ||||
|         UInt16Dtype, | ||||
|         UInt32Dtype, | ||||
|         UInt64Dtype, | ||||
|     ] | ||||
| ) | ||||
| def dtype(request): | ||||
|     """Parametrized fixture returning integer 'dtype'""" | ||||
|     return request.param() | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data(dtype): | ||||
|     """ | ||||
|     Fixture returning 'data' array with valid and missing values according to | ||||
|     parametrized integer 'dtype'. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     return pd.array( | ||||
|         list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100], | ||||
|         dtype=dtype, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def data_missing(dtype): | ||||
|     """ | ||||
|     Fixture returning array with exactly one NaN and one valid integer, | ||||
|     according to parametrized integer 'dtype'. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     return pd.array([np.nan, 1], dtype=dtype) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["data", "data_missing"]) | ||||
| def all_data(request, data, data_missing): | ||||
|     """Parametrized fixture returning 'data' or 'data_missing' integer arrays. | ||||
|  | ||||
|     Used to test dtype conversion with and without missing values. | ||||
|     """ | ||||
|     if request.param == "data": | ||||
|         return data | ||||
|     elif request.param == "data_missing": | ||||
|         return data_missing | ||||
| @ -0,0 +1,345 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core import ops | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
| # Basic test for the arithmetic array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "opname, exp", | ||||
|     [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])], | ||||
|     ids=["add", "mul"], | ||||
| ) | ||||
| def test_add_mul(dtype, opname, exp): | ||||
|     a = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|     b = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|  | ||||
|     # array / array | ||||
|     expected = pd.array(exp, dtype=dtype) | ||||
|  | ||||
|     op = getattr(operator, opname) | ||||
|     result = op(a, b) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     op = getattr(ops, "r" + opname) | ||||
|     result = op(a, b) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_sub(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a - b | ||||
|     expected = pd.array([1, 1, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_div(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a / b | ||||
|     expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) | ||||
| def test_divide_by_zero(zero, negative): | ||||
|     # https://github.com/pandas-dev/pandas/issues/27398, GH#22793 | ||||
|     a = pd.array([0, 1, -1, None], dtype="Int64") | ||||
|     result = a / zero | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"), | ||||
|         np.array([False, False, False, True]), | ||||
|     ) | ||||
|     if negative: | ||||
|         expected *= -1 | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_floordiv(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a // b | ||||
|     # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) | ||||
|     expected = pd.array([0, 2, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype): | ||||
|     # GH 48223: Aligns with non-masked floordiv | ||||
|     # but differs from numpy | ||||
|     # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740 | ||||
|     ser = pd.Series([0, 1], dtype=any_int_ea_dtype) | ||||
|     result = 1 // ser | ||||
|     expected = pd.Series([np.inf, 1.0], dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     ser_non_nullable = ser.astype(ser.dtype.numpy_dtype) | ||||
|     result = 1 // ser_non_nullable | ||||
|     expected = expected.astype(np.float64) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mod(dtype): | ||||
|     a = pd.array([1, 2, 3, None, 5], dtype=dtype) | ||||
|     b = pd.array([0, 1, None, 3, 4], dtype=dtype) | ||||
|  | ||||
|     result = a % b | ||||
|     expected = pd.array([0, 0, None, None, 1], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_scalar(): | ||||
|     a = pd.array([-1, 0, 1, None, 2], dtype="Int64") | ||||
|     result = a**0 | ||||
|     expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**1 | ||||
|     expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**pd.NA | ||||
|     expected = pd.array([None, None, 1, None, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = a**np.nan | ||||
|     expected = FloatingArray( | ||||
|         np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"), | ||||
|         np.array([False, False, False, True, False]), | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # reversed | ||||
|     a = a[1:]  # Can't raise integers to negative powers. | ||||
|  | ||||
|     result = 0**a | ||||
|     expected = pd.array([1, 0, None, 0], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = 1**a | ||||
|     expected = pd.array([1, 1, 1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = pd.NA**a | ||||
|     expected = pd.array([1, None, None, None], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = np.nan**a | ||||
|     expected = FloatingArray( | ||||
|         np.array([1, np.nan, np.nan, np.nan], dtype="float64"), | ||||
|         np.array([False, False, True, False]), | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pow_array(): | ||||
|     a = pd.array([0, 0, 0, 1, 1, 1, None, None, None]) | ||||
|     b = pd.array([0, 1, None, 0, 1, None, 0, 1, None]) | ||||
|     result = a**b | ||||
|     expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None]) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_rpow_one_to_na(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/22022 | ||||
|     # https://github.com/pandas-dev/pandas/issues/29997 | ||||
|     arr = pd.array([np.nan, np.nan], dtype="Int64") | ||||
|     result = np.array([1.0, 2.0]) ** arr | ||||
|     expected = pd.array([1.0, np.nan], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("other", [0, 0.5]) | ||||
| def test_numpy_zero_dim_ndarray(other): | ||||
|     arr = pd.array([1, None, 2]) | ||||
|     result = arr + np.array(other) | ||||
|     expected = arr + other | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_values(data, all_arithmetic_operators): | ||||
|     op = all_arithmetic_operators | ||||
|     s = pd.Series(data) | ||||
|     ops = getattr(s, op) | ||||
|  | ||||
|     # invalid scalars | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops("foo") | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops(pd.Timestamp("20180101")) | ||||
|  | ||||
|     # invalid array-likes | ||||
|     str_ser = pd.Series("foo", index=s.index) | ||||
|     # with pytest.raises(TypeError, match=msg): | ||||
|     if all_arithmetic_operators in [ | ||||
|         "__mul__", | ||||
|         "__rmul__", | ||||
|     ]:  # (data[~data.isna()] >= 0).all(): | ||||
|         res = ops(str_ser) | ||||
|         expected = pd.Series(["foo" * x for x in data], index=s.index) | ||||
|         expected = expected.fillna(np.nan) | ||||
|         # TODO: doing this fillna to keep tests passing as we make | ||||
|         #  assert_almost_equal stricter, but the expected with pd.NA seems | ||||
|         #  more-correct than np.nan here. | ||||
|         tm.assert_series_equal(res, expected) | ||||
|     else: | ||||
|         with tm.external_error_raised(TypeError): | ||||
|             ops(str_ser) | ||||
|  | ||||
|     with tm.external_error_raised(TypeError): | ||||
|         ops(pd.Series(pd.date_range("20180101", periods=len(s)))) | ||||
|  | ||||
|  | ||||
| # Various | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| # TODO test unsigned overflow | ||||
|  | ||||
|  | ||||
| def test_arith_coerce_scalar(data, all_arithmetic_operators): | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     s = pd.Series(data) | ||||
|     other = 0.01 | ||||
|  | ||||
|     result = op(s, other) | ||||
|     expected = op(s.astype(float), other) | ||||
|     expected = expected.astype("Float64") | ||||
|  | ||||
|     # rmod results in NaN that wasn't NA in original nullable Series -> unmask it | ||||
|     if all_arithmetic_operators == "__rmod__": | ||||
|         mask = (s == 0).fillna(False).to_numpy(bool) | ||||
|         expected.array._mask[mask] = False | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("other", [1.0, np.array(1.0)]) | ||||
| def test_arithmetic_conversion(all_arithmetic_operators, other): | ||||
|     # if we have a float operand we should have a float result | ||||
|     # if that is equal to an integer | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|  | ||||
|     s = pd.Series([1, 2, 3], dtype="Int64") | ||||
|     result = op(s, other) | ||||
|     assert result.dtype == "Float64" | ||||
|  | ||||
|  | ||||
| def test_cross_type_arithmetic(): | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": pd.Series([1, 2, np.nan], dtype="Int64"), | ||||
|             "B": pd.Series([1, np.nan, 3], dtype="UInt8"), | ||||
|             "C": [1, 2, 3], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     result = df.A + df.C | ||||
|     expected = pd.Series([2, 4, np.nan], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = (df.A + df.C) * 3 == 12 | ||||
|     expected = pd.Series([False, True, None], dtype="boolean") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df.A + df.B | ||||
|     expected = pd.Series([2, np.nan, np.nan], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["mean"]) | ||||
| def test_reduce_to_float(op): | ||||
|     # some reduce ops always return float, even if the result | ||||
|     # is a rounded number | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": pd.array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # op | ||||
|     result = getattr(df.C, op)() | ||||
|     assert isinstance(result, float) | ||||
|  | ||||
|     # groupby | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")}, | ||||
|         index=pd.Index(["a", "b"], name="A"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "source, neg_target, abs_target", | ||||
|     [ | ||||
|         ([1, 2, 3], [-1, -2, -3], [1, 2, 3]), | ||||
|         ([1, 2, None], [-1, -2, None], [1, 2, None]), | ||||
|         ([-1, 0, 1], [1, 0, -1], [1, 0, 1]), | ||||
|     ], | ||||
| ) | ||||
| def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target): | ||||
|     dtype = any_signed_int_ea_dtype | ||||
|     arr = pd.array(source, dtype=dtype) | ||||
|     neg_result, pos_result, abs_result = -arr, +arr, abs(arr) | ||||
|     neg_target = pd.array(neg_target, dtype=dtype) | ||||
|     abs_target = pd.array(abs_target, dtype=dtype) | ||||
|  | ||||
|     tm.assert_extension_array_equal(neg_result, neg_target) | ||||
|     tm.assert_extension_array_equal(pos_result, arr) | ||||
|     assert not tm.shares_memory(pos_result, arr) | ||||
|     tm.assert_extension_array_equal(abs_result, abs_target) | ||||
|  | ||||
|  | ||||
| def test_values_multiplying_large_series_by_NA(): | ||||
|     # GH#33701 | ||||
|  | ||||
|     result = pd.NA * pd.Series(np.zeros(10001)) | ||||
|     expected = pd.Series([pd.NA] * 10001) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_bitwise(dtype): | ||||
|     left = pd.array([1, None, 3, 4], dtype=dtype) | ||||
|     right = pd.array([None, 3, 5, 4], dtype=dtype) | ||||
|  | ||||
|     result = left | right | ||||
|     expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = left & right | ||||
|     expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = left ^ right | ||||
|     expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # TODO: desired behavior when operating with boolean?  defer? | ||||
|  | ||||
|     floats = right.astype("Float64") | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left | floats | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left & floats | ||||
|     with pytest.raises(TypeError, match="unsupported operand type"): | ||||
|         left ^ floats | ||||
| @ -0,0 +1,39 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.arrays.masked_shared import ( | ||||
|     ComparisonOps, | ||||
|     NumericOps, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestComparisonOps(NumericOps, ComparisonOps): | ||||
|     @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1]) | ||||
|     def test_scalar(self, other, comparison_op, dtype): | ||||
|         ComparisonOps.test_scalar(self, other, comparison_op, dtype) | ||||
|  | ||||
|     def test_compare_to_int(self, dtype, comparison_op): | ||||
|         # GH 28930 | ||||
|         op_name = f"__{comparison_op.__name__}__" | ||||
|         s1 = pd.Series([1, None, 3], dtype=dtype) | ||||
|         s2 = pd.Series([1, None, 3], dtype="float") | ||||
|  | ||||
|         method = getattr(s1, op_name) | ||||
|         result = method(2) | ||||
|  | ||||
|         method = getattr(s2, op_name) | ||||
|         expected = method(2).astype("boolean") | ||||
|         expected[s2.isna()] = pd.NA | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_equals(): | ||||
|     # GH-30652 | ||||
|     # equals is generally tested in /tests/extension/base/methods, but this | ||||
|     # specifically tests that two arrays of the same class but different dtype | ||||
|     # do not evaluate equal | ||||
|     a1 = pd.array([1, 2, None], dtype="Int64") | ||||
|     a2 = pd.array([1, 2, None], dtype="Int32") | ||||
|     assert a1.equals(a2) is False | ||||
| @ -0,0 +1,69 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         (["Int64", "Int64"], "Int64"), | ||||
|         (["UInt64", "UInt64"], "UInt64"), | ||||
|         (["Int8", "Int8"], "Int8"), | ||||
|         (["Int8", "Int16"], "Int16"), | ||||
|         (["UInt8", "Int8"], "Int16"), | ||||
|         (["Int32", "UInt32"], "Int64"), | ||||
|         (["Int64", "UInt64"], "Float64"), | ||||
|         (["Int64", "boolean"], "object"), | ||||
|         (["UInt8", "boolean"], "object"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series(to_concat_dtypes, result_dtype): | ||||
|     # we expect the same dtypes as we would get with non-masked inputs, | ||||
|     #  just masked where available. | ||||
|  | ||||
|     result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes]) | ||||
|     expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( | ||||
|         result_dtype | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # order doesn't matter for result | ||||
|     result = pd.concat( | ||||
|         [pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]] | ||||
|     ) | ||||
|     expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype( | ||||
|         result_dtype | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         (["Int64", "int64"], "Int64"), | ||||
|         (["UInt64", "uint64"], "UInt64"), | ||||
|         (["Int8", "int8"], "Int8"), | ||||
|         (["Int8", "int16"], "Int16"), | ||||
|         (["UInt8", "int8"], "Int16"), | ||||
|         (["Int32", "uint32"], "Int64"), | ||||
|         (["Int64", "uint64"], "Float64"), | ||||
|         (["Int64", "bool"], "object"), | ||||
|         (["UInt8", "bool"], "object"), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series_with_numpy(to_concat_dtypes, result_dtype): | ||||
|     # we expect the same dtypes as we would get with non-masked inputs, | ||||
|     #  just masked where available. | ||||
|  | ||||
|     s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0]) | ||||
|     s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1])) | ||||
|     result = pd.concat([s1, s2], ignore_index=True) | ||||
|     expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # order doesn't matter for result | ||||
|     result = pd.concat([s2, s1], ignore_index=True) | ||||
|     expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,245 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_integer | ||||
| from pandas.core.arrays import IntegerArray | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[pd.array, IntegerArray._from_sequence]) | ||||
| def constructor(request): | ||||
|     """Fixture returning parametrized IntegerArray from given sequence. | ||||
|  | ||||
|     Used to test dtype conversions. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_uses_pandas_na(): | ||||
|     a = pd.array([1, None], dtype=Int64Dtype()) | ||||
|     assert a[1] is pd.NA | ||||
|  | ||||
|  | ||||
| def test_from_dtype_from_float(data): | ||||
|     # construct from our dtype & string dtype | ||||
|     dtype = data.dtype | ||||
|  | ||||
|     # from float | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # from int / list | ||||
|     expected = pd.Series(data) | ||||
|     result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # from int / array | ||||
|     expected = pd.Series(data).dropna().reset_index(drop=True) | ||||
|     dropped = np.array(data.dropna()).astype(np.dtype(dtype.type)) | ||||
|     result = pd.Series(dropped, dtype=str(dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_conversions(data_missing): | ||||
|     # astype to object series | ||||
|     df = pd.DataFrame({"A": data_missing}) | ||||
|     result = df["A"].astype("object") | ||||
|     expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # convert to object ndarray | ||||
|     # we assert that we are exactly equal | ||||
|     # including type conversions of scalars | ||||
|     result = df["A"].astype("object").values | ||||
|     expected = np.array([pd.NA, 1], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     for r, e in zip(result, expected): | ||||
|         if pd.isnull(r): | ||||
|             assert pd.isnull(e) | ||||
|         elif is_integer(r): | ||||
|             assert r == e | ||||
|             assert is_integer(e) | ||||
|         else: | ||||
|             assert r == e | ||||
|             assert type(r) == type(e) | ||||
|  | ||||
|  | ||||
| def test_integer_array_constructor(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="int64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = IntegerArray(values, mask) | ||||
|     expected = pd.array([1, 2, 3, np.nan], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     msg = r".* should be .* numpy array. Use the 'pd.array' function instead" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values.tolist(), mask) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values, mask.tolist()) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values.astype(float), mask) | ||||
|     msg = r"__init__\(\) missing 1 required positional argument: 'mask'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         IntegerArray(values) | ||||
|  | ||||
|  | ||||
| def test_integer_array_constructor_copy(): | ||||
|     values = np.array([1, 2, 3, 4], dtype="int64") | ||||
|     mask = np.array([False, False, False, True], dtype="bool") | ||||
|  | ||||
|     result = IntegerArray(values, mask) | ||||
|     assert result._data is values | ||||
|     assert result._mask is mask | ||||
|  | ||||
|     result = IntegerArray(values, mask, copy=True) | ||||
|     assert result._data is not values | ||||
|     assert result._mask is not mask | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         ([1, None], [1, np.nan]), | ||||
|         ([None], [np.nan]), | ||||
|         ([None, np.nan], [np.nan, np.nan]), | ||||
|         ([np.nan, np.nan], [np.nan, np.nan]), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_none_is_nan(a, b): | ||||
|     result = pd.array(a, dtype="Int64") | ||||
|     expected = pd.array(b, dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values", | ||||
|     [ | ||||
|         ["foo", "bar"], | ||||
|         "foo", | ||||
|         1, | ||||
|         1.0, | ||||
|         pd.date_range("20130101", periods=2), | ||||
|         np.array(["foo"]), | ||||
|         [[1, 2], [3, 4]], | ||||
|         [np.nan, {"a": 1}], | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_error(values): | ||||
|     # error in converting existing arrays to IntegerArrays | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"cannot be converted to IntegerDtype", | ||||
|             r"invalid literal for int\(\) with base 10:", | ||||
|             r"values must be a 1D list-like", | ||||
|             r"Cannot pass scalar", | ||||
|             r"int\(\) argument must be a string", | ||||
|         ] | ||||
|     ) | ||||
|     with pytest.raises((ValueError, TypeError), match=msg): | ||||
|         pd.array(values, dtype="Int64") | ||||
|  | ||||
|     with pytest.raises((ValueError, TypeError), match=msg): | ||||
|         IntegerArray._from_sequence(values) | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_inferred_dtype(constructor): | ||||
|     # if values has dtype -> respect it | ||||
|     result = constructor(np.array([1, 2], dtype="int8")) | ||||
|     assert result.dtype == Int8Dtype() | ||||
|     result = constructor(np.array([1, 2], dtype="int32")) | ||||
|     assert result.dtype == Int32Dtype() | ||||
|  | ||||
|     # if values have no dtype -> always int64 | ||||
|     result = constructor([1, 2]) | ||||
|     assert result.dtype == Int64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_dtype_keyword(constructor): | ||||
|     result = constructor([1, 2], dtype="Int8") | ||||
|     assert result.dtype == Int8Dtype() | ||||
|  | ||||
|     # if values has dtype -> override it | ||||
|     result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32") | ||||
|     assert result.dtype == Int32Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_float(): | ||||
|     result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64") | ||||
|     expected = pd.array([1, 2], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): | ||||
|         IntegerArray._from_sequence([1.5, 2.0], dtype="Int64") | ||||
|  | ||||
|     # for float dtypes, the itemsize is not preserved | ||||
|     result = IntegerArray._from_sequence( | ||||
|         np.array([1.0, 2.0], dtype="float32"), dtype="Int64" | ||||
|     ) | ||||
|     assert result.dtype == Int64Dtype() | ||||
|  | ||||
|  | ||||
| def test_to_integer_array_str(): | ||||
|     result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64") | ||||
|     expected = pd.array([1, 2, np.nan], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, match=r"invalid literal for int\(\) with base 10: .*" | ||||
|     ): | ||||
|         IntegerArray._from_sequence(["1", "2", ""], dtype="Int64") | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, match=r"invalid literal for int\(\) with base 10: .*" | ||||
|     ): | ||||
|         IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "bool_values, int_values, target_dtype, expected_dtype", | ||||
|     [ | ||||
|         ([False, True], [0, 1], Int64Dtype(), Int64Dtype()), | ||||
|         ([False, True], [0, 1], "Int64", Int64Dtype()), | ||||
|         ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array_bool( | ||||
|     constructor, bool_values, int_values, target_dtype, expected_dtype | ||||
| ): | ||||
|     result = constructor(bool_values, dtype=target_dtype) | ||||
|     assert result.dtype == expected_dtype | ||||
|     expected = pd.array(int_values, dtype=target_dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, to_dtype, result_dtype", | ||||
|     [ | ||||
|         (np.array([1], dtype="int64"), None, Int64Dtype), | ||||
|         (np.array([1, np.nan]), None, Int64Dtype), | ||||
|         (np.array([1, np.nan]), "int8", Int8Dtype), | ||||
|     ], | ||||
| ) | ||||
| def test_to_integer_array(values, to_dtype, result_dtype): | ||||
|     # convert existing arrays to IntegerArrays | ||||
|     result = IntegerArray._from_sequence(values, dtype=to_dtype) | ||||
|     assert result.dtype == result_dtype() | ||||
|     expected = pd.array(values, dtype=result_dtype()) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_integer_array_from_boolean(): | ||||
|     # GH31104 | ||||
|     expected = pd.array(np.array([True, False]), dtype="Int64") | ||||
|     result = pd.array(np.array([True, False], dtype=object), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,301 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.generic import ABCIndex | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     UInt32Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_dtypes(dtype): | ||||
|     # smoke tests on auto dtype construction | ||||
|  | ||||
|     if dtype.is_signed_integer: | ||||
|         assert np.dtype(dtype.type).kind == "i" | ||||
|     else: | ||||
|         assert np.dtype(dtype.type).kind == "u" | ||||
|     assert dtype.name is not None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) | ||||
| def test_preserve_dtypes(op): | ||||
|     # for ops that enable (mean would actually work here | ||||
|     # but generally it is a float return value) | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": pd.array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # op | ||||
|     result = getattr(df.C, op)() | ||||
|     if op in {"sum", "prod", "min", "max"}: | ||||
|         assert isinstance(result, np.int64) | ||||
|     else: | ||||
|         assert isinstance(result, int) | ||||
|  | ||||
|     # groupby | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")}, | ||||
|         index=pd.Index(["a", "b"], name="A"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_nansafe(): | ||||
|     # see gh-22343 | ||||
|     arr = pd.array([np.nan, 1, 2], dtype="Int8") | ||||
|     msg = "cannot convert NA to integer" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         arr.astype("uint32") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dropna", [True, False]) | ||||
| def test_construct_index(all_data, dropna): | ||||
|     # ensure that we do not coerce to different Index dtype or non-index | ||||
|  | ||||
|     all_data = all_data[:10] | ||||
|     if dropna: | ||||
|         other = np.array(all_data[~all_data.isna()]) | ||||
|     else: | ||||
|         other = all_data | ||||
|  | ||||
|     result = pd.Index(pd.array(other, dtype=all_data.dtype)) | ||||
|     expected = pd.Index(other, dtype=all_data.dtype) | ||||
|     assert all_data.dtype == expected.dtype  # dont coerce to object | ||||
|  | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dropna", [True, False]) | ||||
| def test_astype_index(all_data, dropna): | ||||
|     # as an int/uint index to Index | ||||
|  | ||||
|     all_data = all_data[:10] | ||||
|     if dropna: | ||||
|         other = all_data[~all_data.isna()] | ||||
|     else: | ||||
|         other = all_data | ||||
|  | ||||
|     dtype = all_data.dtype | ||||
|     idx = pd.Index(np.array(other)) | ||||
|     assert isinstance(idx, ABCIndex) | ||||
|  | ||||
|     result = idx.astype(dtype) | ||||
|     expected = idx.astype(object).astype(dtype) | ||||
|     tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype(all_data): | ||||
|     all_data = all_data[:10] | ||||
|  | ||||
|     ints = all_data[~all_data.isna()] | ||||
|     mixed = all_data | ||||
|     dtype = Int8Dtype() | ||||
|  | ||||
|     # coerce to same type - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(all_data.dtype) | ||||
|     expected = pd.Series(ints) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same other - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series(ints, dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same numpy_dtype - ints | ||||
|     s = pd.Series(ints) | ||||
|     result = s.astype(all_data.dtype.numpy_dtype) | ||||
|     expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same type - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype(all_data.dtype) | ||||
|     expected = pd.Series(mixed) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same other - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series(mixed, dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # coerce to same numpy_dtype - mixed | ||||
|     s = pd.Series(mixed) | ||||
|     msg = "cannot convert NA to integer" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         s.astype(all_data.dtype.numpy_dtype) | ||||
|  | ||||
|     # coerce to object | ||||
|     s = pd.Series(mixed) | ||||
|     result = s.astype("object") | ||||
|     expected = pd.Series(np.asarray(mixed, dtype=object)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_copy(): | ||||
|     arr = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|     orig = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|  | ||||
|     # copy=True -> ensure both data and mask are actual copies | ||||
|     result = arr.astype("Int64", copy=True) | ||||
|     assert result is not arr | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|     # copy=False | ||||
|     result = arr.astype("Int64", copy=False) | ||||
|     assert result is arr | ||||
|     assert np.shares_memory(result._data, arr._data) | ||||
|     assert np.shares_memory(result._mask, arr._mask) | ||||
|     result[0] = 10 | ||||
|     assert arr[0] == 10 | ||||
|     result[0] = pd.NA | ||||
|     assert arr[0] is pd.NA | ||||
|  | ||||
|     # astype to different dtype -> always needs a copy -> even with copy=False | ||||
|     # we need to ensure that also the mask is actually copied | ||||
|     arr = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|     orig = pd.array([1, 2, 3, None], dtype="Int64") | ||||
|  | ||||
|     result = arr.astype("Int32", copy=False) | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|     result[0] = 10 | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|     result[0] = pd.NA | ||||
|     tm.assert_extension_array_equal(arr, orig) | ||||
|  | ||||
|  | ||||
| def test_astype_to_larger_numpy(): | ||||
|     a = pd.array([1, 2], dtype="Int32") | ||||
|     result = a.astype("int64") | ||||
|     expected = np.array([1, 2], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     a = pd.array([1, 2], dtype="UInt32") | ||||
|     result = a.astype("uint64") | ||||
|     expected = np.array([1, 2], dtype="uint64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) | ||||
| def test_astype_specific_casting(dtype): | ||||
|     s = pd.Series([1, 2, 3], dtype="Int64") | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series([1, 2, 3], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series([1, 2, 3, None], dtype="Int64") | ||||
|     result = s.astype(dtype) | ||||
|     expected = pd.Series([1, 2, 3, None], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_floating(): | ||||
|     arr = pd.array([1, 2, None], dtype="Int64") | ||||
|     result = arr.astype("Float64") | ||||
|     expected = pd.array([1.0, 2.0, None], dtype="Float64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_dt64(): | ||||
|     # GH#32435 | ||||
|     arr = pd.array([1, 2, 3, pd.NA]) * 10**9 | ||||
|  | ||||
|     result = arr.astype("datetime64[ns]") | ||||
|  | ||||
|     expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_construct_cast_invalid(dtype): | ||||
|     msg = "cannot safely" | ||||
|     arr = [1.2, 2.3, 3.7] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(arr, dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.Series(arr).astype(dtype) | ||||
|  | ||||
|     arr = [1.2, 2.3, 3.7, np.nan] | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(arr, dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.Series(arr).astype(dtype) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("in_series", [True, False]) | ||||
| def test_to_numpy_na_nan(in_series): | ||||
|     a = pd.array([0, 1, None], dtype="Int64") | ||||
|     if in_series: | ||||
|         a = pd.Series(a) | ||||
|  | ||||
|     result = a.to_numpy(dtype="float64", na_value=np.nan) | ||||
|     expected = np.array([0.0, 1.0, np.nan], dtype="float64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = a.to_numpy(dtype="int64", na_value=-1) | ||||
|     expected = np.array([0, 1, -1], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     result = a.to_numpy(dtype="bool", na_value=False) | ||||
|     expected = np.array([False, True, False], dtype="bool") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("in_series", [True, False]) | ||||
| @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) | ||||
| def test_to_numpy_dtype(dtype, in_series): | ||||
|     a = pd.array([0, 1], dtype="Int64") | ||||
|     if in_series: | ||||
|         a = pd.Series(a) | ||||
|  | ||||
|     result = a.to_numpy(dtype=dtype) | ||||
|     expected = np.array([0, 1], dtype=dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["int64", "bool"]) | ||||
| def test_to_numpy_na_raises(dtype): | ||||
|     a = pd.array([0, 1, None], dtype="Int64") | ||||
|     with pytest.raises(ValueError, match=dtype): | ||||
|         a.to_numpy(dtype=dtype) | ||||
|  | ||||
|  | ||||
| def test_astype_str(using_infer_string): | ||||
|     a = pd.array([1, 2, None], dtype="Int64") | ||||
|  | ||||
|     if using_infer_string: | ||||
|         expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan)) | ||||
|  | ||||
|         tm.assert_extension_array_equal(a.astype(str), expected) | ||||
|         tm.assert_extension_array_equal(a.astype("str"), expected) | ||||
|     else: | ||||
|         expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(a.astype(str), expected) | ||||
|         tm.assert_numpy_array_equal(a.astype("str"), expected) | ||||
|  | ||||
|  | ||||
| def test_astype_boolean(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31102 | ||||
|     a = pd.array([1, 0, -1, 2, None], dtype="Int64") | ||||
|     result = a.astype("boolean") | ||||
|     expected = pd.array([True, False, True, True, None], dtype="boolean") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,203 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import FloatingArray | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.sign]) | ||||
| # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127> | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning") | ||||
| def test_ufuncs_single_int(ufunc): | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     result = ufunc(a) | ||||
|     expected = pd.array(ufunc(a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     result = ufunc(s) | ||||
|     expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64")) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) | ||||
| def test_ufuncs_single_float(ufunc): | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(a) | ||||
|         expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     with np.errstate(invalid="ignore"): | ||||
|         result = ufunc(s) | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) | ||||
| def test_ufuncs_binary_int(ufunc): | ||||
|     # two IntegerArrays | ||||
|     a = pd.array([1, 2, -3, np.nan]) | ||||
|     result = ufunc(a, a) | ||||
|     expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # IntegerArray with numpy array | ||||
|     arr = np.array([1, 2, 3, 4]) | ||||
|     result = ufunc(a, arr) | ||||
|     expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(arr, a) | ||||
|     expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # IntegerArray with scalar | ||||
|     result = ufunc(a, 1) | ||||
|     expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = ufunc(1, a) | ||||
|     expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_ufunc_binary_output(): | ||||
|     a = pd.array([1, 2, np.nan]) | ||||
|     result = np.modf(a) | ||||
|     expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) | ||||
|     expected = (pd.array(expected[0]), pd.array(expected[1])) | ||||
|  | ||||
|     assert isinstance(result, tuple) | ||||
|     assert len(result) == 2 | ||||
|  | ||||
|     for x, y in zip(result, expected): | ||||
|         tm.assert_extension_array_equal(x, y) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[0, 1], [0, None]]) | ||||
| def test_ufunc_reduce_raises(values): | ||||
|     arr = pd.array(values) | ||||
|  | ||||
|     res = np.add.reduce(arr) | ||||
|     expected = arr.sum(skipna=False) | ||||
|     tm.assert_almost_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "pandasmethname, kwargs", | ||||
|     [ | ||||
|         ("var", {"ddof": 0}), | ||||
|         ("var", {"ddof": 1}), | ||||
|         ("std", {"ddof": 0}), | ||||
|         ("std", {"ddof": 1}), | ||||
|         ("kurtosis", {}), | ||||
|         ("skew", {}), | ||||
|         ("sem", {}), | ||||
|     ], | ||||
| ) | ||||
| def test_stat_method(pandasmethname, kwargs): | ||||
|     s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") | ||||
|     pandasmeth = getattr(s, pandasmethname) | ||||
|     result = pandasmeth(**kwargs) | ||||
|     s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") | ||||
|     pandasmeth = getattr(s2, pandasmethname) | ||||
|     expected = pandasmeth(**kwargs) | ||||
|     assert expected == result | ||||
|  | ||||
|  | ||||
| def test_value_counts_na(): | ||||
|     arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") | ||||
|     result = arr.value_counts(dropna=False) | ||||
|     ex_index = pd.Index([1, 2, pd.NA], dtype="Int64") | ||||
|     assert ex_index.dtype == "Int64" | ||||
|     expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = arr.value_counts(dropna=True) | ||||
|     expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count") | ||||
|     assert expected.index.dtype == arr.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_empty(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/33317 | ||||
|     ser = pd.Series([], dtype="Int64") | ||||
|     result = ser.value_counts() | ||||
|     idx = pd.Index([], dtype=ser.dtype) | ||||
|     assert idx.dtype == ser.dtype | ||||
|     expected = pd.Series([], index=idx, dtype="Int64", name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_with_normalize(): | ||||
|     # GH 33172 | ||||
|     ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") | ||||
|     result = ser.value_counts(normalize=True) | ||||
|     expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3 | ||||
|     assert expected.index.dtype == ser.dtype | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 4]) | ||||
| def test_integer_array_sum(skipna, min_count, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([1, 2, 3, None], dtype=dtype) | ||||
|     result = arr.sum(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 6 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("method", ["min", "max"]) | ||||
| def test_integer_array_min_max(skipna, method, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([0, 1, None], dtype=dtype) | ||||
|     func = getattr(arr, method) | ||||
|     result = func(skipna=skipna) | ||||
|     if skipna: | ||||
|         assert result == (0 if method == "min" else 1) | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| @pytest.mark.parametrize("min_count", [0, 9]) | ||||
| def test_integer_array_prod(skipna, min_count, any_int_ea_dtype): | ||||
|     dtype = any_int_ea_dtype | ||||
|     arr = pd.array([1, 2, None], dtype=dtype) | ||||
|     result = arr.prod(skipna=skipna, min_count=min_count) | ||||
|     if skipna and min_count == 0: | ||||
|         assert result == 2 | ||||
|     else: | ||||
|         assert result is pd.NA | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] | ||||
| ) | ||||
| def test_integer_array_numpy_sum(values, expected): | ||||
|     arr = pd.array(values, dtype="Int64") | ||||
|     result = np.sum(arr) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["sum", "prod", "min", "max"]) | ||||
| def test_dataframe_reductions(op): | ||||
|     # https://github.com/pandas-dev/pandas/pull/32867 | ||||
|     # ensure the integers are not cast to float during reductions | ||||
|     df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")}) | ||||
|     result = df.max() | ||||
|     assert isinstance(result["a"], np.int64) | ||||
|  | ||||
|  | ||||
| # TODO(jreback) - these need testing / are broken | ||||
|  | ||||
| # shift | ||||
|  | ||||
| # set_index (destroys type) | ||||
| @ -0,0 +1,19 @@ | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_array_setitem_nullable_boolean_mask(): | ||||
|     # GH 31446 | ||||
|     ser = pd.Series([1, 2], dtype="Int64") | ||||
|     result = ser.where(ser > 1) | ||||
|     expected = pd.Series([pd.NA, 2], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_setitem(): | ||||
|     # GH 31446 | ||||
|     arr = pd.Series([1, 2], dtype="Int64").array | ||||
|     arr[arr > 1] = 1 | ||||
|  | ||||
|     expected = pd.array([1, 1], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
| @ -0,0 +1,123 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     array, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", np.int64(3)], | ||||
|         ["prod", np.int64(2)], | ||||
|         ["min", np.int64(1)], | ||||
|         ["max", np.int64(2)], | ||||
|         ["mean", np.float64(1.5)], | ||||
|         ["median", np.float64(1.5)], | ||||
|         ["var", np.float64(0.5)], | ||||
|         ["std", np.float64(0.5**0.5)], | ||||
|         ["skew", pd.NA], | ||||
|         ["kurt", pd.NA], | ||||
|         ["any", True], | ||||
|         ["all", True], | ||||
|     ], | ||||
| ) | ||||
| def test_series_reductions(op, expected): | ||||
|     ser = Series([1, 2], dtype="Int64") | ||||
|     result = getattr(ser, op)() | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", Series([3], index=["a"], dtype="Int64")], | ||||
|         ["prod", Series([2], index=["a"], dtype="Int64")], | ||||
|         ["min", Series([1], index=["a"], dtype="Int64")], | ||||
|         ["max", Series([2], index=["a"], dtype="Int64")], | ||||
|         ["mean", Series([1.5], index=["a"], dtype="Float64")], | ||||
|         ["median", Series([1.5], index=["a"], dtype="Float64")], | ||||
|         ["var", Series([0.5], index=["a"], dtype="Float64")], | ||||
|         ["std", Series([0.5**0.5], index=["a"], dtype="Float64")], | ||||
|         ["skew", Series([pd.NA], index=["a"], dtype="Float64")], | ||||
|         ["kurt", Series([pd.NA], index=["a"], dtype="Float64")], | ||||
|         ["any", Series([True], index=["a"], dtype="boolean")], | ||||
|         ["all", Series([True], index=["a"], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_dataframe_reductions(op, expected): | ||||
|     df = DataFrame({"a": array([1, 2], dtype="Int64")}) | ||||
|     result = getattr(df, op)() | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", array([1, 3], dtype="Int64")], | ||||
|         ["prod", array([1, 3], dtype="Int64")], | ||||
|         ["min", array([1, 3], dtype="Int64")], | ||||
|         ["max", array([1, 3], dtype="Int64")], | ||||
|         ["mean", array([1, 3], dtype="Float64")], | ||||
|         ["median", array([1, 3], dtype="Float64")], | ||||
|         ["var", array([pd.NA], dtype="Float64")], | ||||
|         ["std", array([pd.NA], dtype="Float64")], | ||||
|         ["skew", array([pd.NA], dtype="Float64")], | ||||
|         ["any", array([True, True], dtype="boolean")], | ||||
|         ["all", array([True, True], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_groupby_reductions(op, expected): | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|     result = getattr(df.groupby("A"), op)() | ||||
|     expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"]) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op, expected", | ||||
|     [ | ||||
|         ["sum", Series([4, 4], index=["B", "C"], dtype="Float64")], | ||||
|         ["prod", Series([3, 3], index=["B", "C"], dtype="Float64")], | ||||
|         ["min", Series([1, 1], index=["B", "C"], dtype="Float64")], | ||||
|         ["max", Series([3, 3], index=["B", "C"], dtype="Float64")], | ||||
|         ["mean", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["median", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["var", Series([2, 2], index=["B", "C"], dtype="Float64")], | ||||
|         ["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")], | ||||
|         ["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")], | ||||
|         ["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")], | ||||
|         ["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], | ||||
|         ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], | ||||
|     ], | ||||
| ) | ||||
| def test_mixed_reductions(op, expected): | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "A": ["a", "b", "b"], | ||||
|             "B": [1, None, 3], | ||||
|             "C": array([1, None, 3], dtype="Int64"), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     # series | ||||
|     result = getattr(df.C, op)() | ||||
|     tm.assert_equal(result, expected["C"]) | ||||
|  | ||||
|     # frame | ||||
|     if op in ["any", "all"]: | ||||
|         result = getattr(df, op)() | ||||
|     else: | ||||
|         result = getattr(df, op)(numeric_only=True) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,67 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays.integer import ( | ||||
|     Int8Dtype, | ||||
|     Int16Dtype, | ||||
|     Int32Dtype, | ||||
|     Int64Dtype, | ||||
|     UInt8Dtype, | ||||
|     UInt16Dtype, | ||||
|     UInt32Dtype, | ||||
|     UInt64Dtype, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_dtypes(dtype): | ||||
|     # smoke tests on auto dtype construction | ||||
|  | ||||
|     if dtype.is_signed_integer: | ||||
|         assert np.dtype(dtype.type).kind == "i" | ||||
|     else: | ||||
|         assert np.dtype(dtype.type).kind == "u" | ||||
|     assert dtype.name is not None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         (Int8Dtype(), "Int8Dtype()"), | ||||
|         (Int16Dtype(), "Int16Dtype()"), | ||||
|         (Int32Dtype(), "Int32Dtype()"), | ||||
|         (Int64Dtype(), "Int64Dtype()"), | ||||
|         (UInt8Dtype(), "UInt8Dtype()"), | ||||
|         (UInt16Dtype(), "UInt16Dtype()"), | ||||
|         (UInt32Dtype(), "UInt32Dtype()"), | ||||
|         (UInt64Dtype(), "UInt64Dtype()"), | ||||
|     ], | ||||
| ) | ||||
| def test_repr_dtype(dtype, expected): | ||||
|     assert repr(dtype) == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array(): | ||||
|     result = repr(pd.array([1, None, 3])) | ||||
|     expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_repr_array_long(): | ||||
|     data = pd.array([1, 2, None] * 1000) | ||||
|     expected = ( | ||||
|         "<IntegerArray>\n" | ||||
|         "[   1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,\n" | ||||
|         " ...\n" | ||||
|         " <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>]\n" | ||||
|         "Length: 3000, dtype: Int64" | ||||
|     ) | ||||
|     result = repr(data) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_frame_repr(data_missing): | ||||
|     df = pd.DataFrame({"A": data_missing}) | ||||
|     result = repr(df) | ||||
|     expected = "      A\n0  <NA>\n1     1" | ||||
|     assert result == expected | ||||
| @ -0,0 +1,28 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     Index, | ||||
|     IntervalIndex, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     @pytest.mark.parametrize("ordered", [True, False]) | ||||
|     def test_astype_categorical_retains_ordered(self, ordered): | ||||
|         index = IntervalIndex.from_breaks(range(5)) | ||||
|         arr = index._data | ||||
|  | ||||
|         dtype = CategoricalDtype(None, ordered=ordered) | ||||
|  | ||||
|         expected = Categorical(list(arr), ordered=ordered) | ||||
|         result = arr.astype(dtype) | ||||
|         assert result.ordered is ordered | ||||
|         tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|         # test IntervalIndex.astype while we're at it. | ||||
|         result = index.astype(dtype) | ||||
|         expected = Index(expected) | ||||
|         tm.assert_index_equal(result, expected) | ||||
| @ -0,0 +1,13 @@ | ||||
| from pandas.core.arrays import IntervalArray | ||||
|  | ||||
|  | ||||
| def test_repr(): | ||||
|     # GH#25022 | ||||
|     arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) | ||||
|     result = repr(arr) | ||||
|     expected = ( | ||||
|         "<IntervalArray>\n" | ||||
|         "[(0, 1], (1, 2]]\n" | ||||
|         "Length: 2, dtype: interval[int64, right]" | ||||
|     ) | ||||
|     assert result == expected | ||||
| @ -0,0 +1,231 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         (Index([0, 2, 4]), Index([1, 3, 5])), | ||||
|         (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])), | ||||
|         (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)), | ||||
|         (date_range("20170101", periods=3), date_range("20170102", periods=3)), | ||||
|         ( | ||||
|             date_range("20170101", periods=3, tz="US/Eastern"), | ||||
|             date_range("20170102", periods=3, tz="US/Eastern"), | ||||
|         ), | ||||
|     ], | ||||
|     ids=lambda x: str(x[0].dtype), | ||||
| ) | ||||
| def left_right_dtypes(request): | ||||
|     """ | ||||
|     Fixture for building an IntervalArray from various dtypes | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestAttributes: | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right", | ||||
|         [ | ||||
|             (0, 1), | ||||
|             (Timedelta("0 days"), Timedelta("1 day")), | ||||
|             (Timestamp("2018-01-01"), Timestamp("2018-01-02")), | ||||
|             ( | ||||
|                 Timestamp("2018-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2018-01-02", tz="US/Eastern"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex]) | ||||
|     def test_is_empty(self, constructor, left, right, closed): | ||||
|         # GH27219 | ||||
|         tuples = [(left, left), (left, right), np.nan] | ||||
|         expected = np.array([closed != "both", False, False]) | ||||
|         result = constructor.from_tuples(tuples, closed=closed).is_empty | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestMethods: | ||||
|     @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) | ||||
|     def test_set_closed(self, closed, new_closed): | ||||
|         # GH 21670 | ||||
|         array = IntervalArray.from_breaks(range(10), closed=closed) | ||||
|         result = array.set_closed(new_closed) | ||||
|         expected = IntervalArray.from_breaks(range(10), closed=new_closed) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             Interval(0, 1, closed="right"), | ||||
|             IntervalArray.from_breaks([1, 2, 3, 4], closed="right"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_where_raises(self, other): | ||||
|         # GH#45768 The IntervalArray methods raises; the Series method coerces | ||||
|         ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) | ||||
|         mask = np.array([True, False, True]) | ||||
|         match = "'value.closed' is 'right', expected 'left'." | ||||
|         with pytest.raises(ValueError, match=match): | ||||
|             ser.array._where(mask, other) | ||||
|  | ||||
|         res = ser.where(mask, other=other) | ||||
|         expected = ser.astype(object).where(mask, other) | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     def test_shift(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 | ||||
|         a = IntervalArray.from_breaks([1, 2, 3]) | ||||
|         result = a.shift() | ||||
|         # int -> float | ||||
|         expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) | ||||
|         tm.assert_interval_array_equal(result, expected) | ||||
|  | ||||
|         msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a.shift(1, fill_value=pd.NaT) | ||||
|  | ||||
|     def test_shift_datetime(self): | ||||
|         # GH#31502, GH#31504 | ||||
|         a = IntervalArray.from_breaks(date_range("2000", periods=4)) | ||||
|         result = a.shift(2) | ||||
|         expected = a.take([-1, -1, 0], allow_fill=True) | ||||
|         tm.assert_interval_array_equal(result, expected) | ||||
|  | ||||
|         result = a.shift(-1) | ||||
|         expected = a.take([1, 2, -1], allow_fill=True) | ||||
|         tm.assert_interval_array_equal(result, expected) | ||||
|  | ||||
|         msg = "can only insert Interval objects and NA into an IntervalArray" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             a.shift(1, fill_value=np.timedelta64("NaT", "ns")) | ||||
|  | ||||
|  | ||||
| class TestSetitem: | ||||
|     def test_set_na(self, left_right_dtypes): | ||||
|         left, right = left_right_dtypes | ||||
|         left = left.copy(deep=True) | ||||
|         right = right.copy(deep=True) | ||||
|         result = IntervalArray.from_arrays(left, right) | ||||
|  | ||||
|         if result.dtype.subtype.kind not in ["m", "M"]: | ||||
|             msg = "'value' should be an interval type, got <.*NaTType'> instead." | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 result[0] = pd.NaT | ||||
|         if result.dtype.subtype.kind in ["i", "u"]: | ||||
|             msg = "Cannot set float NaN to integer-backed IntervalArray" | ||||
|             # GH#45484 TypeError, not ValueError, matches what we get with | ||||
|             # non-NA un-holdable value. | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 result[0] = np.nan | ||||
|             return | ||||
|  | ||||
|         result[0] = np.nan | ||||
|  | ||||
|         expected_left = Index([left._na_value] + list(left[1:])) | ||||
|         expected_right = Index([right._na_value] + list(right[1:])) | ||||
|         expected = IntervalArray.from_arrays(expected_left, expected_right) | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_mismatched_closed(self): | ||||
|         arr = IntervalArray.from_breaks(range(4)) | ||||
|         orig = arr.copy() | ||||
|         other = arr.set_closed("both") | ||||
|  | ||||
|         msg = "'value.closed' is 'both', expected 'right'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[0] = other[0] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:1] = other[:1] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:0] = other[:0] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:] = other[::-1] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:] = list(other[::-1]) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:] = other[::-1].astype(object) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr[:] = other[::-1].astype("category") | ||||
|  | ||||
|         # empty list should be no-op | ||||
|         arr[:0] = [] | ||||
|         tm.assert_interval_array_equal(arr, orig) | ||||
|  | ||||
|  | ||||
| class TestReductions: | ||||
|     def test_min_max_invalid_axis(self, left_right_dtypes): | ||||
|         left, right = left_right_dtypes | ||||
|         left = left.copy(deep=True) | ||||
|         right = right.copy(deep=True) | ||||
|         arr = IntervalArray.from_arrays(left, right) | ||||
|  | ||||
|         msg = "`axis` must be fewer than the number of dimensions" | ||||
|         for axis in [-2, 1]: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 arr.min(axis=axis) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 arr.max(axis=axis) | ||||
|  | ||||
|         msg = "'>=' not supported between" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.min(axis="foo") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.max(axis="foo") | ||||
|  | ||||
|     def test_min_max(self, left_right_dtypes, index_or_series_or_array): | ||||
|         # GH#44746 | ||||
|         left, right = left_right_dtypes | ||||
|         left = left.copy(deep=True) | ||||
|         right = right.copy(deep=True) | ||||
|         arr = IntervalArray.from_arrays(left, right) | ||||
|  | ||||
|         # The expected results below are only valid if monotonic | ||||
|         assert left.is_monotonic_increasing | ||||
|         assert Index(arr).is_monotonic_increasing | ||||
|  | ||||
|         MIN = arr[0] | ||||
|         MAX = arr[-1] | ||||
|  | ||||
|         indexer = np.arange(len(arr)) | ||||
|         np.random.default_rng(2).shuffle(indexer) | ||||
|         arr = arr.take(indexer) | ||||
|  | ||||
|         arr_na = arr.insert(2, np.nan) | ||||
|  | ||||
|         arr = index_or_series_or_array(arr) | ||||
|         arr_na = index_or_series_or_array(arr_na) | ||||
|  | ||||
|         for skipna in [True, False]: | ||||
|             res = arr.min(skipna=skipna) | ||||
|             assert res == MIN | ||||
|             assert type(res) == type(MIN) | ||||
|  | ||||
|             res = arr.max(skipna=skipna) | ||||
|             assert res == MAX | ||||
|             assert type(res) == type(MAX) | ||||
|  | ||||
|         res = arr_na.min(skipna=False) | ||||
|         assert np.isnan(res) | ||||
|         res = arr_na.max(skipna=False) | ||||
|         assert np.isnan(res) | ||||
|  | ||||
|         res = arr_na.min(skipna=True) | ||||
|         assert res == MIN | ||||
|         assert type(res) == type(MIN) | ||||
|         res = arr_na.max(skipna=True) | ||||
|         assert res == MAX | ||||
|         assert type(res) == type(MAX) | ||||
| @ -0,0 +1,160 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
|  | ||||
|  | ||||
| def test_arrow_extension_type(): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType | ||||
|  | ||||
|     p1 = ArrowIntervalType(pa.int64(), "left") | ||||
|     p2 = ArrowIntervalType(pa.int64(), "left") | ||||
|     p3 = ArrowIntervalType(pa.int64(), "right") | ||||
|  | ||||
|     assert p1.closed == "left" | ||||
|     assert p1 == p2 | ||||
|     assert p1 != p3 | ||||
|     assert hash(p1) == hash(p2) | ||||
|     assert hash(p1) != hash(p3) | ||||
|  | ||||
|  | ||||
| def test_arrow_array(): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType | ||||
|  | ||||
|     intervals = pd.interval_range(1, 5, freq=1).array | ||||
|  | ||||
|     result = pa.array(intervals) | ||||
|     assert isinstance(result.type, ArrowIntervalType) | ||||
|     assert result.type.closed == intervals.closed | ||||
|     assert result.type.subtype == pa.int64() | ||||
|     assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64")) | ||||
|     assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64")) | ||||
|  | ||||
|     expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)]) | ||||
|     assert result.storage.equals(expected) | ||||
|  | ||||
|     # convert to its storage type | ||||
|     result = pa.array(intervals, type=expected.type) | ||||
|     assert result.equals(expected) | ||||
|  | ||||
|     # unsupported conversions | ||||
|     with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): | ||||
|         pa.array(intervals, type="float64") | ||||
|  | ||||
|     with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): | ||||
|         pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) | ||||
|  | ||||
|  | ||||
| def test_arrow_array_missing(): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType | ||||
|  | ||||
|     arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) | ||||
|     arr[1] = None | ||||
|  | ||||
|     result = pa.array(arr) | ||||
|     assert isinstance(result.type, ArrowIntervalType) | ||||
|     assert result.type.closed == arr.closed | ||||
|     assert result.type.subtype == pa.float64() | ||||
|  | ||||
|     # fields have missing values (not NaN) | ||||
|     left = pa.array([0.0, None, 2.0], type="float64") | ||||
|     right = pa.array([1.0, None, 3.0], type="float64") | ||||
|     assert result.storage.field("left").equals(left) | ||||
|     assert result.storage.field("right").equals(right) | ||||
|  | ||||
|     # structarray itself also has missing values on the array level | ||||
|     vals = [ | ||||
|         {"left": 0.0, "right": 1.0}, | ||||
|         {"left": None, "right": None}, | ||||
|         {"left": 2.0, "right": 3.0}, | ||||
|     ] | ||||
|     expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False])) | ||||
|     assert result.storage.equals(expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "breaks", | ||||
|     [[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")], | ||||
|     ids=["float", "datetime64[ns]"], | ||||
| ) | ||||
| def test_arrow_table_roundtrip(breaks): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType | ||||
|  | ||||
|     arr = IntervalArray.from_breaks(breaks) | ||||
|     arr[1] = None | ||||
|     df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|     table = pa.table(df) | ||||
|     assert isinstance(table.field("a").type, ArrowIntervalType) | ||||
|     result = table.to_pandas() | ||||
|     assert isinstance(result["a"].dtype, pd.IntervalDtype) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     table2 = pa.concat_tables([table, table]) | ||||
|     result = table2.to_pandas() | ||||
|     expected = pd.concat([df, df], ignore_index=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # GH#41040 | ||||
|     table = pa.table( | ||||
|         [pa.chunked_array([], type=table.column(0).type)], schema=table.schema | ||||
|     ) | ||||
|     result = table.to_pandas() | ||||
|     tm.assert_frame_equal(result, expected[0:0]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "breaks", | ||||
|     [[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")], | ||||
|     ids=["float", "datetime64[ns]"], | ||||
| ) | ||||
| def test_arrow_table_roundtrip_without_metadata(breaks): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     arr = IntervalArray.from_breaks(breaks) | ||||
|     arr[1] = None | ||||
|     df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|     table = pa.table(df) | ||||
|     # remove the metadata | ||||
|     table = table.replace_schema_metadata() | ||||
|     assert table.schema.metadata is None | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|     assert isinstance(result["a"].dtype, pd.IntervalDtype) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_from_arrow_from_raw_struct_array(): | ||||
|     # in case pyarrow lost the Interval extension type (eg on parquet roundtrip | ||||
|     # with datetime64[ns] subtype, see GH-45881), still allow conversion | ||||
|     # from arrow to IntervalArray | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}]) | ||||
|     dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither") | ||||
|  | ||||
|     result = dtype.__from_arrow__(arr) | ||||
|     expected = IntervalArray.from_breaks( | ||||
|         np.array([0, 1, 2], dtype="int64"), closed="neither" | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = dtype.__from_arrow__(pa.chunked_array([arr])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,93 @@ | ||||
| """Tests for Interval-Interval operations, such as overlaps, contains, etc.""" | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Interval, | ||||
|     IntervalIndex, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[IntervalArray, IntervalIndex]) | ||||
| def constructor(request): | ||||
|     """ | ||||
|     Fixture for testing both interval container classes. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         (Timedelta("0 days"), Timedelta("1 day")), | ||||
|         (Timestamp("2018-01-01"), Timedelta("1 day")), | ||||
|         (0, 1), | ||||
|     ], | ||||
|     ids=lambda x: type(x[0]).__name__, | ||||
| ) | ||||
| def start_shift(request): | ||||
|     """ | ||||
|     Fixture for generating intervals of different types from a start value | ||||
|     and a shift value that can be added to start to generate an endpoint. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestOverlaps: | ||||
|     def test_overlaps_interval(self, constructor, start_shift, closed, other_closed): | ||||
|         start, shift = start_shift | ||||
|         interval = Interval(start, start + 3 * shift, other_closed) | ||||
|  | ||||
|         # intervals: identical, nested, spanning, partial, adjacent, disjoint | ||||
|         tuples = [ | ||||
|             (start, start + 3 * shift), | ||||
|             (start + shift, start + 2 * shift), | ||||
|             (start - shift, start + 4 * shift), | ||||
|             (start + 2 * shift, start + 4 * shift), | ||||
|             (start + 3 * shift, start + 4 * shift), | ||||
|             (start + 4 * shift, start + 5 * shift), | ||||
|         ] | ||||
|         interval_container = constructor.from_tuples(tuples, closed) | ||||
|  | ||||
|         adjacent = interval.closed_right and interval_container.closed_left | ||||
|         expected = np.array([True, True, True, True, adjacent, False]) | ||||
|         result = interval_container.overlaps(interval) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex]) | ||||
|     def test_overlaps_interval_container(self, constructor, other_constructor): | ||||
|         # TODO: modify this test when implemented | ||||
|         interval_container = constructor.from_breaks(range(5)) | ||||
|         other_container = other_constructor.from_breaks(range(5)) | ||||
|         with pytest.raises(NotImplementedError, match="^$"): | ||||
|             interval_container.overlaps(other_container) | ||||
|  | ||||
|     def test_overlaps_na(self, constructor, start_shift): | ||||
|         """NA values are marked as False""" | ||||
|         start, shift = start_shift | ||||
|         interval = Interval(start, start + shift) | ||||
|  | ||||
|         tuples = [ | ||||
|             (start, start + shift), | ||||
|             np.nan, | ||||
|             (start + 2 * shift, start + 3 * shift), | ||||
|         ] | ||||
|         interval_container = constructor.from_tuples(tuples) | ||||
|  | ||||
|         expected = np.array([True, False, False]) | ||||
|         result = interval_container.overlaps(interval) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], | ||||
|         ids=lambda x: type(x).__name__, | ||||
|     ) | ||||
|     def test_overlaps_invalid_type(self, constructor, other): | ||||
|         interval_container = constructor.from_breaks(range(5)) | ||||
|         msg = f"`other` must be Interval-like, got {type(other).__name__}" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             interval_container.overlaps(other) | ||||
| @ -0,0 +1,248 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from typing import Any | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| # integer dtypes | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| scalars: list[Any] = [2] * len(arrays) | ||||
| # floating dtypes | ||||
| arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] | ||||
| scalars += [0.2, 0.2] | ||||
| # boolean | ||||
| arrays += [pd.array([True, False, True, None], dtype="boolean")] | ||||
| scalars += [False] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """Fixture returning parametrized (array, scalar) tuple. | ||||
|  | ||||
|     Used to test equivalence of scalars, numpy arrays with array ops, and the | ||||
|     equivalence of DataFrame and Series ops. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def check_skip(data, op_name): | ||||
|     if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name: | ||||
|         pytest.skip("subtract not implemented for boolean") | ||||
|  | ||||
|  | ||||
| def is_bool_not_implemented(data, op_name): | ||||
|     # match non-masked behavior | ||||
|     return data.dtype.kind == "b" and op_name.strip("_").lstrip("r") in [ | ||||
|         "pow", | ||||
|         "truediv", | ||||
|         "floordiv", | ||||
|     ] | ||||
|  | ||||
|  | ||||
| # Test equivalence of scalars, numpy arrays with array ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_array_scalar_like_equivalence(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) | ||||
|  | ||||
|     # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) | ||||
|     for scalar in [scalar, data.dtype.type(scalar)]: | ||||
|         if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|             msg = "operator '.*' not implemented for bool dtypes" | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(data, scalar) | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(data, scalar_array) | ||||
|         else: | ||||
|             result = op(data, scalar) | ||||
|             expected = op(data, scalar_array) | ||||
|             tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_NA(data, all_arithmetic_operators): | ||||
|     data, _ = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     scalar = pd.NA | ||||
|     scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype) | ||||
|  | ||||
|     mask = data._mask.copy() | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, scalar) | ||||
|         # GH#45421 check op doesn't alter data._mask inplace | ||||
|         tm.assert_numpy_array_equal(mask, data._mask) | ||||
|         return | ||||
|  | ||||
|     result = op(data, scalar) | ||||
|     # GH#45421 check op doesn't alter data._mask inplace | ||||
|     tm.assert_numpy_array_equal(mask, data._mask) | ||||
|  | ||||
|     expected = op(data, scalar_array) | ||||
|     tm.assert_numpy_array_equal(mask, data._mask) | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_numpy_array_equivalence(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype) | ||||
|     pd_array = pd.array(numpy_array, dtype=data.dtype) | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, numpy_array) | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, pd_array) | ||||
|         return | ||||
|  | ||||
|     result = op(data, numpy_array) | ||||
|     expected = op(data, pd_array) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test equivalence with Series and DataFrame ops | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_frame(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     # DataFrame with scalar | ||||
|     df = pd.DataFrame({"A": data}) | ||||
|  | ||||
|     if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(df, scalar) | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             op(data, scalar) | ||||
|         return | ||||
|  | ||||
|     result = op(df, scalar) | ||||
|     expected = pd.DataFrame({"A": op(data, scalar)}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series(data, all_arithmetic_operators): | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|     check_skip(data, all_arithmetic_operators) | ||||
|  | ||||
|     ser = pd.Series(data) | ||||
|  | ||||
|     others = [ | ||||
|         scalar, | ||||
|         np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype), | ||||
|         pd.array([scalar] * len(data), dtype=data.dtype), | ||||
|         pd.Series([scalar] * len(data), dtype=data.dtype), | ||||
|     ] | ||||
|  | ||||
|     for other in others: | ||||
|         if is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|             msg = "operator '.*' not implemented for bool dtypes" | ||||
|             with pytest.raises(NotImplementedError, match=msg): | ||||
|                 op(ser, other) | ||||
|  | ||||
|         else: | ||||
|             result = op(ser, other) | ||||
|             expected = pd.Series(op(data, other)) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # Test generic characteristics / errors | ||||
| # ----------------------------------------------------------------------------- | ||||
|  | ||||
|  | ||||
| def test_error_invalid_object(data, all_arithmetic_operators): | ||||
|     data, _ = data | ||||
|  | ||||
|     op = all_arithmetic_operators | ||||
|     opa = getattr(data, op) | ||||
|  | ||||
|     # 2d -> return NotImplemented | ||||
|     result = opa(pd.DataFrame({"A": data})) | ||||
|     assert result is NotImplemented | ||||
|  | ||||
|     msg = r"can only perform ops with 1-d structures" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         opa(np.arange(len(data)).reshape(-1, len(data))) | ||||
|  | ||||
|  | ||||
| def test_error_len_mismatch(data, all_arithmetic_operators): | ||||
|     # operating with a list-like with non-matching length raises | ||||
|     data, scalar = data | ||||
|     op = tm.get_op_from_name(all_arithmetic_operators) | ||||
|  | ||||
|     other = [scalar] * (len(data) - 1) | ||||
|  | ||||
|     err = ValueError | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             r"operands could not be broadcast together with shapes \(3,\) \(4,\)", | ||||
|             r"operands could not be broadcast together with shapes \(4,\) \(3,\)", | ||||
|         ] | ||||
|     ) | ||||
|     if data.dtype.kind == "b" and all_arithmetic_operators.strip("_") in [ | ||||
|         "sub", | ||||
|         "rsub", | ||||
|     ]: | ||||
|         err = TypeError | ||||
|         msg = ( | ||||
|             r"numpy boolean subtract, the `\-` operator, is not supported, use " | ||||
|             r"the bitwise_xor, the `\^` operator, or the logical_xor function instead" | ||||
|         ) | ||||
|     elif is_bool_not_implemented(data, all_arithmetic_operators): | ||||
|         msg = "operator '.*' not implemented for bool dtypes" | ||||
|         err = NotImplementedError | ||||
|  | ||||
|     for other in [other, np.array(other)]: | ||||
|         with pytest.raises(err, match=msg): | ||||
|             op(data, other) | ||||
|  | ||||
|         s = pd.Series(data) | ||||
|         with pytest.raises(err, match=msg): | ||||
|             op(s, other) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) | ||||
| def test_unary_op_does_not_propagate_mask(data, op): | ||||
|     # https://github.com/pandas-dev/pandas/issues/39943 | ||||
|     data, _ = data | ||||
|     ser = pd.Series(data) | ||||
|  | ||||
|     if op == "__invert__" and data.dtype.kind == "f": | ||||
|         # we follow numpy in raising | ||||
|         msg = "ufunc 'invert' not supported for the input types" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(ser, op)() | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             getattr(data, op)() | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # Check that this is still the numpy behavior | ||||
|             getattr(data._data, op)() | ||||
|  | ||||
|         return | ||||
|  | ||||
|     result = getattr(ser, op)() | ||||
|     expected = result.copy(deep=True) | ||||
|     ser[0] = None | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,210 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
| from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask | ||||
|  | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES] | ||||
| arrays += [pd.array([True, False, True, None], dtype="boolean")] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """ | ||||
|     Fixture returning parametrized array from given dtype, including integer, | ||||
|     float and boolean | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_arrow_array(data): | ||||
|     arr = pa.array(data) | ||||
|     expected = pa.array( | ||||
|         data.to_numpy(object, na_value=None), | ||||
|         type=pa.from_numpy_dtype(data.dtype.numpy_dtype), | ||||
|     ) | ||||
|     assert arr.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_roundtrip(data): | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     assert table.field("a").type == str(data.dtype.numpy_dtype) | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|     assert result["a"].dtype == data.dtype | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_dataframe_from_arrow_types_mapper(): | ||||
|     def types_mapper(arrow_type): | ||||
|         if pa.types.is_boolean(arrow_type): | ||||
|             return pd.BooleanDtype() | ||||
|         elif pa.types.is_integer(arrow_type): | ||||
|             return pd.Int64Dtype() | ||||
|  | ||||
|     bools_array = pa.array([True, None, False], type=pa.bool_()) | ||||
|     ints_array = pa.array([1, None, 2], type=pa.int64()) | ||||
|     small_ints_array = pa.array([-1, 0, 7], type=pa.int8()) | ||||
|     record_batch = pa.RecordBatch.from_arrays( | ||||
|         [bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"] | ||||
|     ) | ||||
|     result = record_batch.to_pandas(types_mapper=types_mapper) | ||||
|     bools = pd.Series([True, None, False], dtype="boolean") | ||||
|     ints = pd.Series([1, None, 2], dtype="Int64") | ||||
|     small_ints = pd.Series([-1, 0, 7], dtype="Int64") | ||||
|     expected = pd.DataFrame({"bools": bools, "ints": ints, "small_ints": small_ints}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_load_from_zero_chunks(data): | ||||
|     # GH-41040 | ||||
|  | ||||
|     df = pd.DataFrame({"a": data[0:0]}) | ||||
|     table = pa.table(df) | ||||
|     assert table.field("a").type == str(data.dtype.numpy_dtype) | ||||
|     table = pa.table( | ||||
|         [pa.chunked_array([], type=table.field("a").type)], schema=table.schema | ||||
|     ) | ||||
|     result = table.to_pandas() | ||||
|     assert result["a"].dtype == data.dtype | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_arrow_from_arrow_uint(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/31896 | ||||
|     # possible mismatch in types | ||||
|  | ||||
|     dtype = pd.UInt32Dtype() | ||||
|     result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) | ||||
|     expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_sliced(data): | ||||
|     # https://github.com/pandas-dev/pandas/issues/38525 | ||||
|  | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     result = table.slice(2, None).to_pandas() | ||||
|     expected = df.iloc[2:].reset_index(drop=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # no missing values | ||||
|     df2 = df.fillna(data[0]) | ||||
|     table = pa.table(df2) | ||||
|     result = table.slice(2, None).to_pandas() | ||||
|     expected = df2.iloc[2:].reset_index(drop=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def np_dtype_to_arrays(any_real_numpy_dtype): | ||||
|     """ | ||||
|     Fixture returning actual and expected dtype, pandas and numpy arrays and | ||||
|     mask from a given numpy dtype | ||||
|     """ | ||||
|     np_dtype = np.dtype(any_real_numpy_dtype) | ||||
|     pa_type = pa.from_numpy_dtype(np_dtype) | ||||
|  | ||||
|     # None ensures the creation of a bitmask buffer. | ||||
|     pa_array = pa.array([0, 1, 2, None], type=pa_type) | ||||
|     # Since masked Arrow buffer slots are not required to contain a specific | ||||
|     # value, assert only the first three values of the created np.array | ||||
|     np_expected = np.array([0, 1, 2], dtype=np_dtype) | ||||
|     mask_expected = np.array([True, True, True, False]) | ||||
|     return np_dtype, pa_array, np_expected, mask_expected | ||||
|  | ||||
|  | ||||
| def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): | ||||
|     """ | ||||
|     Test conversion from pyarrow array to numpy array. | ||||
|  | ||||
|     Modifies the pyarrow buffer to contain padding and offset, which are | ||||
|     considered valid buffers by pyarrow. | ||||
|  | ||||
|     Also tests empty pyarrow arrays with non empty buffers. | ||||
|     See https://github.com/pandas-dev/pandas/issues/40896 | ||||
|     """ | ||||
|     np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     mask_buffer = pa_array.buffers()[0] | ||||
|     data_buffer = pa_array.buffers()[1] | ||||
|     data_buffer_bytes = pa_array.buffers()[1].to_pybytes() | ||||
|  | ||||
|     # Add trailing padding to the buffer. | ||||
|     data_buffer_trail = pa.py_buffer(data_buffer_bytes + b"\x00") | ||||
|     pa_array_trail = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=len(pa_array), | ||||
|         buffers=[mask_buffer, data_buffer_trail], | ||||
|         offset=pa_array.offset, | ||||
|     ) | ||||
|     pa_array_trail.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     # Add offset to the buffer. | ||||
|     offset = b"\x00" * (pa_array.type.bit_width // 8) | ||||
|     data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes) | ||||
|     mask_buffer_offset = pa.py_buffer(b"\x0E") | ||||
|     pa_array_offset = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=len(pa_array), | ||||
|         buffers=[mask_buffer_offset, data_buffer_offset], | ||||
|         offset=pa_array.offset + 1, | ||||
|     ) | ||||
|     pa_array_offset.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected) | ||||
|  | ||||
|     # Empty array | ||||
|     np_expected_empty = np.array([], dtype=np_dtype) | ||||
|     mask_expected_empty = np.array([], dtype=np.bool_) | ||||
|  | ||||
|     pa_array_offset = pa.Array.from_buffers( | ||||
|         type=pa_array.type, | ||||
|         length=0, | ||||
|         buffers=[mask_buffer, data_buffer], | ||||
|         offset=pa_array.offset, | ||||
|     ) | ||||
|     pa_array_offset.validate() | ||||
|     data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype) | ||||
|     tm.assert_numpy_array_equal(data[:3], np_expected_empty) | ||||
|     tm.assert_numpy_array_equal(mask, mask_expected_empty) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", [pa.nulls(10), pa.chunked_array([pa.nulls(4), pa.nulls(6)])] | ||||
| ) | ||||
| def test_from_arrow_null(data, arr): | ||||
|     res = data.dtype.__from_arrow__(arr) | ||||
|     assert res.isna().all() | ||||
|     assert len(res) == 10 | ||||
|  | ||||
|  | ||||
| def test_from_arrow_type_error(data): | ||||
|     # ensure that __from_arrow__ returns a TypeError when getting a wrong | ||||
|     # array type | ||||
|  | ||||
|     arr = pa.array(data).cast("string") | ||||
|     with pytest.raises(TypeError, match=None): | ||||
|         # we don't test the exact error message, only the fact that it raises | ||||
|         # a TypeError is relevant | ||||
|         data.dtype.__from_arrow__(arr) | ||||
| @ -0,0 +1,74 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_integer_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import BaseMaskedArray | ||||
|  | ||||
| arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] | ||||
| arrays += [ | ||||
|     pd.array([0.141, -0.268, 5.895, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES | ||||
| ] | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays]) | ||||
| def data(request): | ||||
|     """ | ||||
|     Fixture returning parametrized 'data' array with different integer and | ||||
|     floating point types | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture() | ||||
| def numpy_dtype(data): | ||||
|     """ | ||||
|     Fixture returning numpy dtype from 'data' input array. | ||||
|     """ | ||||
|     # For integer dtype, the numpy conversion must be done to float | ||||
|     if is_integer_dtype(data): | ||||
|         numpy_dtype = float | ||||
|     else: | ||||
|         numpy_dtype = data.dtype.type | ||||
|     return numpy_dtype | ||||
|  | ||||
|  | ||||
| def test_round(data, numpy_dtype): | ||||
|     # No arguments | ||||
|     result = data.round() | ||||
|     expected = pd.array( | ||||
|         np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # Decimals argument | ||||
|     result = data.round(decimals=2) | ||||
|     expected = pd.array( | ||||
|         np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2), | ||||
|         dtype=data.dtype, | ||||
|     ) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_tolist(data): | ||||
|     result = data.tolist() | ||||
|     expected = list(data) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy(): | ||||
|     # GH#56991 | ||||
|  | ||||
|     class MyStringArray(BaseMaskedArray): | ||||
|         dtype = pd.StringDtype() | ||||
|         _dtype_cls = pd.StringDtype | ||||
|         _internal_fill_value = pd.NA | ||||
|  | ||||
|     arr = MyStringArray( | ||||
|         values=np.array(["a", "b", "c"]), mask=np.array([False, True, False]) | ||||
|     ) | ||||
|     result = arr.to_numpy() | ||||
|     expected = np.array(["a", pd.NA, "c"]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,60 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| class TestSetitemValidation: | ||||
|     def _check_setitem_invalid(self, arr, invalid): | ||||
|         msg = f"Invalid value '{invalid!s}' for dtype '{arr.dtype}'" | ||||
|         msg = re.escape(msg) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[0] = invalid | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[:] = invalid | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr[[0]] = invalid | ||||
|  | ||||
|         # FIXME: don't leave commented-out | ||||
|         # with pytest.raises(TypeError): | ||||
|         #    arr[[0]] = [invalid] | ||||
|  | ||||
|         # with pytest.raises(TypeError): | ||||
|         #    arr[[0]] = np.array([invalid], dtype=object) | ||||
|  | ||||
|         # Series non-coercion, behavior subject to change | ||||
|         ser = pd.Series(arr) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser[0] = invalid | ||||
|             # TODO: so, so many other variants of this... | ||||
|  | ||||
|     _invalid_scalars = [ | ||||
|         1 + 2j, | ||||
|         "True", | ||||
|         "1", | ||||
|         "1.0", | ||||
|         pd.NaT, | ||||
|         np.datetime64("NaT"), | ||||
|         np.timedelta64("NaT"), | ||||
|     ] | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] | ||||
|     ) | ||||
|     def test_setitem_validation_scalar_bool(self, invalid): | ||||
|         arr = pd.array([True, False, None], dtype="boolean") | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
|  | ||||
|     @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) | ||||
|     def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype): | ||||
|         arr = pd.array([1, 2, None], dtype=any_int_ea_dtype) | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
|  | ||||
|     @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) | ||||
|     def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype): | ||||
|         arr = pd.array([1, 2, None], dtype=float_ea_dtype) | ||||
|         self._check_setitem_invalid(arr, invalid) | ||||
| @ -0,0 +1,154 @@ | ||||
| """ | ||||
| Tests shared by MaskedArray subclasses. | ||||
| """ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.extension.base import BaseOpsUtil | ||||
|  | ||||
|  | ||||
| class ComparisonOps(BaseOpsUtil): | ||||
|     def _compare_other(self, data, op, other): | ||||
|         # array | ||||
|         result = pd.Series(op(data, other)) | ||||
|         expected = pd.Series(op(data._data, other), dtype="boolean") | ||||
|  | ||||
|         # fill the nan locations | ||||
|         expected[data._mask] = pd.NA | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # series | ||||
|         ser = pd.Series(data) | ||||
|         result = op(ser, other) | ||||
|  | ||||
|         # Set nullable dtype here to avoid upcasting when setting to pd.NA below | ||||
|         expected = op(pd.Series(data._data), other).astype("boolean") | ||||
|  | ||||
|         # fill the nan locations | ||||
|         expected[data._mask] = pd.NA | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # subclass will override to parametrize 'other' | ||||
|     def test_scalar(self, other, comparison_op, dtype): | ||||
|         op = comparison_op | ||||
|         left = pd.array([1, 0, None], dtype=dtype) | ||||
|  | ||||
|         result = op(left, other) | ||||
|  | ||||
|         if other is pd.NA: | ||||
|             expected = pd.array([None, None, None], dtype="boolean") | ||||
|         else: | ||||
|             values = op(left._data, other) | ||||
|             expected = pd.arrays.BooleanArray(values, left._mask, copy=True) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         result[0] = pd.NA | ||||
|         tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype)) | ||||
|  | ||||
|  | ||||
| class NumericOps: | ||||
|     # Shared by IntegerArray and FloatingArray, not BooleanArray | ||||
|  | ||||
|     def test_searchsorted_nan(self, dtype): | ||||
|         # The base class casts to object dtype, for which searchsorted returns | ||||
|         #  0 from the left and 10 from the right. | ||||
|         arr = pd.array(range(10), dtype=dtype) | ||||
|  | ||||
|         assert arr.searchsorted(np.nan, side="left") == 10 | ||||
|         assert arr.searchsorted(np.nan, side="right") == 10 | ||||
|  | ||||
|     def test_no_shared_mask(self, data): | ||||
|         result = data + 1 | ||||
|         assert not tm.shares_memory(result, data) | ||||
|  | ||||
|     def test_array(self, comparison_op, dtype): | ||||
|         op = comparison_op | ||||
|  | ||||
|         left = pd.array([0, 1, 2, None, None, None], dtype=dtype) | ||||
|         right = pd.array([0, 1, None, 0, 1, None], dtype=dtype) | ||||
|  | ||||
|         result = op(left, right) | ||||
|         values = op(left._data, right._data) | ||||
|         mask = left._mask | right._mask | ||||
|  | ||||
|         expected = pd.arrays.BooleanArray(values, mask) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # ensure we haven't mutated anything inplace | ||||
|         result[0] = pd.NA | ||||
|         tm.assert_extension_array_equal( | ||||
|             left, pd.array([0, 1, 2, None, None, None], dtype=dtype) | ||||
|         ) | ||||
|         tm.assert_extension_array_equal( | ||||
|             right, pd.array([0, 1, None, 0, 1, None], dtype=dtype) | ||||
|         ) | ||||
|  | ||||
|     def test_compare_with_booleanarray(self, comparison_op, dtype): | ||||
|         op = comparison_op | ||||
|  | ||||
|         left = pd.array([True, False, None] * 3, dtype="boolean") | ||||
|         right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype) | ||||
|         other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean") | ||||
|  | ||||
|         expected = op(left, other) | ||||
|         result = op(left, right) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # reversed op | ||||
|         expected = op(other, left) | ||||
|         result = op(right, left) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_compare_to_string(self, dtype): | ||||
|         # GH#28930 | ||||
|         ser = pd.Series([1, None], dtype=dtype) | ||||
|         result = ser == "a" | ||||
|         expected = pd.Series([False, pd.NA], dtype="boolean") | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_ufunc_with_out(self, dtype): | ||||
|         arr = pd.array([1, 2, 3], dtype=dtype) | ||||
|         arr2 = pd.array([1, 2, pd.NA], dtype=dtype) | ||||
|  | ||||
|         mask = arr == arr | ||||
|         mask2 = arr2 == arr2 | ||||
|  | ||||
|         result = np.zeros(3, dtype=bool) | ||||
|         result |= mask | ||||
|         # If MaskedArray.__array_ufunc__ handled "out" appropriately, | ||||
|         #  `result` should still be an ndarray. | ||||
|         assert isinstance(result, np.ndarray) | ||||
|         assert result.all() | ||||
|  | ||||
|         # result |= mask worked because mask could be cast losslessly to | ||||
|         #  boolean ndarray. mask2 can't, so this raises | ||||
|         result = np.zeros(3, dtype=bool) | ||||
|         msg = "Specify an appropriate 'na_value' for this dtype" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             result |= mask2 | ||||
|  | ||||
|         # addition | ||||
|         res = np.add(arr, arr2) | ||||
|         expected = pd.array([2, 4, pd.NA], dtype=dtype) | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|         # when passing out=arr, we will modify 'arr' inplace. | ||||
|         res = np.add(arr, arr2, out=arr) | ||||
|         assert res is arr | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|         tm.assert_extension_array_equal(arr, expected) | ||||
|  | ||||
|     def test_mul_td64_array(self, dtype): | ||||
|         # GH#45622 | ||||
|         arr = pd.array([1, 2, pd.NA], dtype=dtype) | ||||
|         other = np.arange(3, dtype=np.int64).view("m8[ns]") | ||||
|  | ||||
|         result = arr * other | ||||
|         expected = pd.array([pd.Timedelta(0), pd.Timedelta(2), pd.NaT]) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
| @ -0,0 +1,41 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas.core.dtypes.common import is_scalar | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSearchsorted: | ||||
|     def test_searchsorted_string(self, string_dtype): | ||||
|         arr = pd.array(["a", "b", "c"], dtype=string_dtype) | ||||
|  | ||||
|         result = arr.searchsorted("a", side="left") | ||||
|         assert is_scalar(result) | ||||
|         assert result == 0 | ||||
|  | ||||
|         result = arr.searchsorted("a", side="right") | ||||
|         assert is_scalar(result) | ||||
|         assert result == 1 | ||||
|  | ||||
|     def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype): | ||||
|         arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) | ||||
|         result = arr.searchsorted(30) | ||||
|         assert is_scalar(result) | ||||
|         assert result == 2 | ||||
|  | ||||
|         result = arr.searchsorted([30]) | ||||
|         expected = np.array([2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype): | ||||
|         arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype) | ||||
|         result = arr.searchsorted([2, 30]) | ||||
|         expected = np.array([1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_searchsorted_sorter(self, any_real_numpy_dtype): | ||||
|         arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype) | ||||
|         result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) | ||||
|         expected = np.array([0, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
| @ -0,0 +1,351 @@ | ||||
| """ | ||||
| Additional tests for NumpyExtensionArray that aren't covered by | ||||
| the interface tests. | ||||
| """ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import NumpyEADtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.arrays import NumpyExtensionArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         np.array(["a", "b"], dtype=object), | ||||
|         np.array([0, 1], dtype=float), | ||||
|         np.array([0, 1], dtype=int), | ||||
|         np.array([0, 1 + 2j], dtype=complex), | ||||
|         np.array([True, False], dtype=bool), | ||||
|         np.array([0, 1], dtype="datetime64[ns]"), | ||||
|         np.array([0, 1], dtype="timedelta64[ns]"), | ||||
|     ], | ||||
| ) | ||||
| def any_numpy_array(request): | ||||
|     """ | ||||
|     Parametrized fixture for NumPy arrays with different dtypes. | ||||
|  | ||||
|     This excludes string and bytes. | ||||
|     """ | ||||
|     return request.param.copy() | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # NumpyEADtype | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         ("bool", True), | ||||
|         ("int", True), | ||||
|         ("uint", True), | ||||
|         ("float", True), | ||||
|         ("complex", True), | ||||
|         ("str", False), | ||||
|         ("bytes", False), | ||||
|         ("datetime64[ns]", False), | ||||
|         ("object", False), | ||||
|         ("void", False), | ||||
|     ], | ||||
| ) | ||||
| def test_is_numeric(dtype, expected): | ||||
|     dtype = NumpyEADtype(dtype) | ||||
|     assert dtype._is_numeric is expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         ("bool", True), | ||||
|         ("int", False), | ||||
|         ("uint", False), | ||||
|         ("float", False), | ||||
|         ("complex", False), | ||||
|         ("str", False), | ||||
|         ("bytes", False), | ||||
|         ("datetime64[ns]", False), | ||||
|         ("object", False), | ||||
|         ("void", False), | ||||
|     ], | ||||
| ) | ||||
| def test_is_boolean(dtype, expected): | ||||
|     dtype = NumpyEADtype(dtype) | ||||
|     assert dtype._is_boolean is expected | ||||
|  | ||||
|  | ||||
| def test_repr(): | ||||
|     dtype = NumpyEADtype(np.dtype("int64")) | ||||
|     assert repr(dtype) == "NumpyEADtype('int64')" | ||||
|  | ||||
|  | ||||
| def test_constructor_from_string(): | ||||
|     result = NumpyEADtype.construct_from_string("int64") | ||||
|     expected = NumpyEADtype(np.dtype("int64")) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_dtype_idempotent(any_numpy_dtype): | ||||
|     dtype = NumpyEADtype(any_numpy_dtype) | ||||
|  | ||||
|     result = NumpyEADtype(dtype) | ||||
|     assert result == dtype | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Construction | ||||
|  | ||||
|  | ||||
| def test_constructor_no_coercion(): | ||||
|     with pytest.raises(ValueError, match="NumPy array"): | ||||
|         NumpyExtensionArray([1, 2, 3]) | ||||
|  | ||||
|  | ||||
| def test_series_constructor_with_copy(): | ||||
|     ndarray = np.array([1, 2, 3]) | ||||
|     ser = pd.Series(NumpyExtensionArray(ndarray), copy=True) | ||||
|  | ||||
|     assert ser.values is not ndarray | ||||
|  | ||||
|  | ||||
| def test_series_constructor_with_astype(): | ||||
|     ndarray = np.array([1, 2, 3]) | ||||
|     result = pd.Series(NumpyExtensionArray(ndarray), dtype="float64") | ||||
|     expected = pd.Series([1.0, 2.0, 3.0], dtype="float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_from_sequence_dtype(): | ||||
|     arr = np.array([1, 2, 3], dtype="int64") | ||||
|     result = NumpyExtensionArray._from_sequence(arr, dtype="uint64") | ||||
|     expected = NumpyExtensionArray(np.array([1, 2, 3], dtype="uint64")) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_constructor_copy(): | ||||
|     arr = np.array([0, 1]) | ||||
|     result = NumpyExtensionArray(arr, copy=True) | ||||
|  | ||||
|     assert not tm.shares_memory(result, arr) | ||||
|  | ||||
|  | ||||
| def test_constructor_with_data(any_numpy_array): | ||||
|     nparr = any_numpy_array | ||||
|     arr = NumpyExtensionArray(nparr) | ||||
|     assert arr.dtype.numpy_dtype == nparr.dtype | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Conversion | ||||
|  | ||||
|  | ||||
| def test_to_numpy(): | ||||
|     arr = NumpyExtensionArray(np.array([1, 2, 3])) | ||||
|     result = arr.to_numpy() | ||||
|     assert result is arr._ndarray | ||||
|  | ||||
|     result = arr.to_numpy(copy=True) | ||||
|     assert result is not arr._ndarray | ||||
|  | ||||
|     result = arr.to_numpy(dtype="f8") | ||||
|     expected = np.array([1, 2, 3], dtype="f8") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Setitem | ||||
|  | ||||
|  | ||||
| def test_setitem_series(): | ||||
|     ser = pd.Series([1, 2, 3]) | ||||
|     ser.array[0] = 10 | ||||
|     expected = pd.Series([10, 2, 3]) | ||||
|     tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem(any_numpy_array): | ||||
|     nparr = any_numpy_array | ||||
|     arr = NumpyExtensionArray(nparr, copy=True) | ||||
|  | ||||
|     arr[0] = arr[1] | ||||
|     nparr[0] = nparr[1] | ||||
|  | ||||
|     tm.assert_numpy_array_equal(arr.to_numpy(), nparr) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Reductions | ||||
|  | ||||
|  | ||||
| def test_bad_reduce_raises(): | ||||
|     arr = np.array([1, 2, 3], dtype="int64") | ||||
|     arr = NumpyExtensionArray(arr) | ||||
|     msg = "cannot perform not_a_method with type int" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         arr._reduce(msg) | ||||
|  | ||||
|  | ||||
| def test_validate_reduction_keyword_args(): | ||||
|     arr = NumpyExtensionArray(np.array([1, 2, 3])) | ||||
|     msg = "the 'keepdims' parameter is not supported .*all" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         arr.all(keepdims=True) | ||||
|  | ||||
|  | ||||
| def test_np_max_nested_tuples(): | ||||
|     # case where checking in ufunc.nout works while checking for tuples | ||||
|     #  does not | ||||
|     vals = [ | ||||
|         (("j", "k"), ("l", "m")), | ||||
|         (("l", "m"), ("o", "p")), | ||||
|         (("o", "p"), ("j", "k")), | ||||
|     ] | ||||
|     ser = pd.Series(vals) | ||||
|     arr = ser.array | ||||
|  | ||||
|     assert arr.max() is arr[2] | ||||
|     assert ser.max() is arr[2] | ||||
|  | ||||
|     result = np.maximum.reduce(arr) | ||||
|     assert result == arr[2] | ||||
|  | ||||
|     result = np.maximum.reduce(ser) | ||||
|     assert result == arr[2] | ||||
|  | ||||
|  | ||||
| def test_np_reduce_2d(): | ||||
|     raw = np.arange(12).reshape(4, 3) | ||||
|     arr = NumpyExtensionArray(raw) | ||||
|  | ||||
|     res = np.maximum.reduce(arr, axis=0) | ||||
|     tm.assert_extension_array_equal(res, arr[-1]) | ||||
|  | ||||
|     alt = arr.max(axis=0) | ||||
|     tm.assert_extension_array_equal(alt, arr[-1]) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Ops | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive]) | ||||
| def test_ufunc_unary(ufunc): | ||||
|     arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0])) | ||||
|     result = ufunc(arr) | ||||
|     expected = NumpyExtensionArray(ufunc(arr._ndarray)) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     # same thing but with the 'out' keyword | ||||
|     out = NumpyExtensionArray(np.array([-9.0, -9.0, -9.0])) | ||||
|     ufunc(arr, out=out) | ||||
|     tm.assert_extension_array_equal(out, expected) | ||||
|  | ||||
|  | ||||
| def test_ufunc(): | ||||
|     arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0])) | ||||
|  | ||||
|     r1, r2 = np.divmod(arr, np.add(arr, 2)) | ||||
|     e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) | ||||
|     e1 = NumpyExtensionArray(e1) | ||||
|     e2 = NumpyExtensionArray(e2) | ||||
|     tm.assert_extension_array_equal(r1, e1) | ||||
|     tm.assert_extension_array_equal(r2, e2) | ||||
|  | ||||
|  | ||||
| def test_basic_binop(): | ||||
|     # Just a basic smoke test. The EA interface tests exercise this | ||||
|     # more thoroughly. | ||||
|     x = NumpyExtensionArray(np.array([1, 2, 3])) | ||||
|     result = x + x | ||||
|     expected = NumpyExtensionArray(np.array([2, 4, 6])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [None, object]) | ||||
| def test_setitem_object_typecode(dtype): | ||||
|     arr = NumpyExtensionArray(np.array(["a", "b", "c"], dtype=dtype)) | ||||
|     arr[0] = "t" | ||||
|     expected = NumpyExtensionArray(np.array(["t", "b", "c"], dtype=dtype)) | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_no_coercion(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/28150 | ||||
|     arr = NumpyExtensionArray(np.array([1, 2, 3])) | ||||
|     with pytest.raises(ValueError, match="int"): | ||||
|         arr[0] = "a" | ||||
|  | ||||
|     # With a value that we do coerce, check that we coerce the value | ||||
|     #  and not the underlying array. | ||||
|     arr[0] = 2.5 | ||||
|     assert isinstance(arr[0], (int, np.integer)), type(arr[0]) | ||||
|  | ||||
|  | ||||
| def test_setitem_preserves_views(): | ||||
|     # GH#28150, see also extension test of the same name | ||||
|     arr = NumpyExtensionArray(np.array([1, 2, 3])) | ||||
|     view1 = arr.view() | ||||
|     view2 = arr[:] | ||||
|     view3 = np.asarray(arr) | ||||
|  | ||||
|     arr[0] = 9 | ||||
|     assert view1[0] == 9 | ||||
|     assert view2[0] == 9 | ||||
|     assert view3[0] == 9 | ||||
|  | ||||
|     arr[-1] = 2.5 | ||||
|     view1[-1] = 5 | ||||
|     assert arr[-1] == 5 | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [np.int64, np.uint64]) | ||||
| def test_quantile_empty(dtype): | ||||
|     # we should get back np.nans, not -1s | ||||
|     arr = NumpyExtensionArray(np.array([], dtype=dtype)) | ||||
|     idx = pd.Index([0.0, 0.5]) | ||||
|  | ||||
|     result = arr._quantile(idx, interpolation="linear") | ||||
|     expected = NumpyExtensionArray(np.array([np.nan, np.nan])) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_factorize_unsigned(): | ||||
|     # don't raise when calling factorize on unsigned int NumpyExtensionArray | ||||
|     arr = np.array([1, 2, 3], dtype=np.uint64) | ||||
|     obj = NumpyExtensionArray(arr) | ||||
|  | ||||
|     res_codes, res_unique = obj.factorize() | ||||
|     exp_codes, exp_unique = pd.factorize(arr) | ||||
|  | ||||
|     tm.assert_numpy_array_equal(res_codes, exp_codes) | ||||
|  | ||||
|     tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Output formatting | ||||
|  | ||||
|  | ||||
| def test_array_repr(any_numpy_array): | ||||
|     # GH#61085 | ||||
|     nparray = any_numpy_array | ||||
|     arr = NumpyExtensionArray(nparray) | ||||
|     if nparray.dtype == "object": | ||||
|         values = "['a', 'b']" | ||||
|     elif nparray.dtype == "float64": | ||||
|         values = "[0.0, 1.0]" | ||||
|     elif str(nparray.dtype).startswith("int"): | ||||
|         values = "[0, 1]" | ||||
|     elif nparray.dtype == "complex128": | ||||
|         values = "[0j, (1+2j)]" | ||||
|     elif nparray.dtype == "bool": | ||||
|         values = "[True, False]" | ||||
|     elif nparray.dtype == "datetime64[ns]": | ||||
|         values = "[1970-01-01T00:00:00.000000000, 1970-01-01T00:00:00.000000001]" | ||||
|     elif nparray.dtype == "timedelta64[ns]": | ||||
|         values = "[0 nanoseconds, 1 nanoseconds]" | ||||
|     expected = f"<NumpyExtensionArray>\n{values}\nLength: 2, dtype: {nparray.dtype}" | ||||
|     result = repr(arr) | ||||
|     assert result == expected, f"{result} vs {expected}" | ||||
| @ -0,0 +1,130 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.pyarrow import pa_version_under10p1 | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import PeriodDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import ( | ||||
|     PeriodArray, | ||||
|     period_array, | ||||
| ) | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" | ||||
| ) | ||||
|  | ||||
|  | ||||
| pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|  | ||||
| def test_arrow_extension_type(): | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType | ||||
|  | ||||
|     p1 = ArrowPeriodType("D") | ||||
|     p2 = ArrowPeriodType("D") | ||||
|     p3 = ArrowPeriodType("M") | ||||
|  | ||||
|     assert p1.freq == "D" | ||||
|     assert p1 == p2 | ||||
|     assert p1 != p3 | ||||
|     assert hash(p1) == hash(p2) | ||||
|     assert hash(p1) != hash(p3) | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(not pa_version_under10p1, reason="Wrong behavior with pyarrow 10") | ||||
| @pytest.mark.parametrize( | ||||
|     "data, freq", | ||||
|     [ | ||||
|         (pd.date_range("2017", periods=3), "D"), | ||||
|         (pd.date_range("2017", periods=3, freq="YE"), "Y-DEC"), | ||||
|     ], | ||||
| ) | ||||
| def test_arrow_array(data, freq): | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType | ||||
|  | ||||
|     periods = period_array(data, freq=freq) | ||||
|     result = pa.array(periods) | ||||
|     assert isinstance(result.type, ArrowPeriodType) | ||||
|     assert result.type.freq == freq | ||||
|     expected = pa.array(periods.asi8, type="int64") | ||||
|     assert result.storage.equals(expected) | ||||
|  | ||||
|     # convert to its storage type | ||||
|     result = pa.array(periods, type=pa.int64()) | ||||
|     assert result.equals(expected) | ||||
|  | ||||
|     # unsupported conversions | ||||
|     msg = "Not supported to convert PeriodArray to 'double' type" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pa.array(periods, type="float64") | ||||
|  | ||||
|     with pytest.raises(TypeError, match="different 'freq'"): | ||||
|         pa.array(periods, type=ArrowPeriodType("T")) | ||||
|  | ||||
|  | ||||
| def test_arrow_array_missing(): | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType | ||||
|  | ||||
|     arr = PeriodArray([1, 2, 3], dtype="period[D]") | ||||
|     arr[1] = pd.NaT | ||||
|  | ||||
|     result = pa.array(arr) | ||||
|     assert isinstance(result.type, ArrowPeriodType) | ||||
|     assert result.type.freq == "D" | ||||
|     expected = pa.array([1, None, 3], type="int64") | ||||
|     assert result.storage.equals(expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_table_roundtrip(): | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType | ||||
|  | ||||
|     arr = PeriodArray([1, 2, 3], dtype="period[D]") | ||||
|     arr[1] = pd.NaT | ||||
|     df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|     table = pa.table(df) | ||||
|     assert isinstance(table.field("a").type, ArrowPeriodType) | ||||
|     result = table.to_pandas() | ||||
|     assert isinstance(result["a"].dtype, PeriodDtype) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     table2 = pa.concat_tables([table, table]) | ||||
|     result = table2.to_pandas() | ||||
|     expected = pd.concat([df, df], ignore_index=True) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_arrow_load_from_zero_chunks(): | ||||
|     # GH-41040 | ||||
|  | ||||
|     from pandas.core.arrays.arrow.extension_types import ArrowPeriodType | ||||
|  | ||||
|     arr = PeriodArray([], dtype="period[D]") | ||||
|     df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|     table = pa.table(df) | ||||
|     assert isinstance(table.field("a").type, ArrowPeriodType) | ||||
|     table = pa.table( | ||||
|         [pa.chunked_array([], type=table.column(0).type)], schema=table.schema | ||||
|     ) | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|     assert isinstance(result["a"].dtype, PeriodDtype) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|  | ||||
| def test_arrow_table_roundtrip_without_metadata(): | ||||
|     arr = PeriodArray([1, 2, 3], dtype="period[h]") | ||||
|     arr[1] = pd.NaT | ||||
|     df = pd.DataFrame({"a": arr}) | ||||
|  | ||||
|     table = pa.table(df) | ||||
|     # remove the metadata | ||||
|     table = table.replace_schema_metadata() | ||||
|     assert table.schema.metadata is None | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|     assert isinstance(result["a"].dtype, PeriodDtype) | ||||
|     tm.assert_frame_equal(result, df) | ||||
| @ -0,0 +1,67 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import PeriodDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import period_array | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) | ||||
| def test_astype_int(dtype): | ||||
|     # We choose to ignore the sign and size of integers for | ||||
|     # Period/Datetime/Timedelta astype | ||||
|     arr = period_array(["2000", "2001", None], freq="D") | ||||
|  | ||||
|     if np.dtype(dtype) != np.int64: | ||||
|         with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): | ||||
|             arr.astype(dtype) | ||||
|         return | ||||
|  | ||||
|     result = arr.astype(dtype) | ||||
|     expected = arr._ndarray.view("i8") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_copies(): | ||||
|     arr = period_array(["2000", "2001", None], freq="D") | ||||
|     result = arr.astype(np.int64, copy=False) | ||||
|  | ||||
|     # Add the `.base`, since we now use `.asi8` which returns a view. | ||||
|     # We could maybe override it in PeriodArray to return ._ndarray directly. | ||||
|     assert result.base is arr._ndarray | ||||
|  | ||||
|     result = arr.astype(np.int64, copy=True) | ||||
|     assert result is not arr._ndarray | ||||
|     tm.assert_numpy_array_equal(result, arr._ndarray.view("i8")) | ||||
|  | ||||
|  | ||||
| def test_astype_categorical(): | ||||
|     arr = period_array(["2000", "2001", "2001", None], freq="D") | ||||
|     result = arr.astype("category") | ||||
|     categories = pd.PeriodIndex(["2000", "2001"], freq="D") | ||||
|     expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) | ||||
|     tm.assert_categorical_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_period(): | ||||
|     arr = period_array(["2000", "2001", None], freq="D") | ||||
|     result = arr.astype(PeriodDtype("M")) | ||||
|     expected = period_array(["2000", "2001", None], freq="M") | ||||
|     tm.assert_period_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) | ||||
| def test_astype_datetime(dtype): | ||||
|     arr = period_array(["2000", "2001", None], freq="D") | ||||
|     # slice off the [ns] so that the regex matches. | ||||
|     if dtype == "timedelta64[ns]": | ||||
|         with pytest.raises(TypeError, match=dtype[:-4]): | ||||
|             arr.astype(dtype) | ||||
|  | ||||
|     else: | ||||
|         # GH#45038 allow period->dt64 because we allow dt64->period | ||||
|         result = arr.astype(dtype) | ||||
|         expected = pd.DatetimeIndex(["2000", "2001", pd.NaT], dtype=dtype)._data | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
| @ -0,0 +1,156 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import iNaT | ||||
| from pandas._libs.tslibs.offsets import MonthEnd | ||||
| from pandas._libs.tslibs.period import IncompatibleFrequency | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import ( | ||||
|     PeriodArray, | ||||
|     period_array, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, freq, expected", | ||||
|     [ | ||||
|         ([pd.Period("2017", "D")], None, [17167]), | ||||
|         ([pd.Period("2017", "D")], "D", [17167]), | ||||
|         ([2017], "D", [17167]), | ||||
|         (["2017"], "D", [17167]), | ||||
|         ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), | ||||
|         ([pd.Period("2017", "D"), None], None, [17167, iNaT]), | ||||
|         (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), | ||||
|         (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), | ||||
|         (pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]), | ||||
|     ], | ||||
| ) | ||||
| def test_period_array_ok(data, freq, expected): | ||||
|     result = period_array(data, freq=freq).asi8 | ||||
|     expected = np.asarray(expected, dtype=np.int64) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_period_array_readonly_object(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/25403 | ||||
|     pa = period_array([pd.Period("2019-01-01")]) | ||||
|     arr = np.asarray(pa, dtype="object") | ||||
|     arr.setflags(write=False) | ||||
|  | ||||
|     result = period_array(arr) | ||||
|     tm.assert_period_array_equal(result, pa) | ||||
|  | ||||
|     result = pd.Series(arr) | ||||
|     tm.assert_series_equal(result, pd.Series(pa)) | ||||
|  | ||||
|     result = pd.DataFrame({"A": arr}) | ||||
|     tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) | ||||
|  | ||||
|  | ||||
| def test_from_datetime64_freq_changes(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/23438 | ||||
|     arr = pd.date_range("2017", periods=3, freq="D") | ||||
|     result = PeriodArray._from_datetime64(arr, freq="M") | ||||
|     expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") | ||||
|     tm.assert_period_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("freq", ["2M", MonthEnd(2)]) | ||||
| def test_from_datetime64_freq_2M(freq): | ||||
|     arr = np.array( | ||||
|         ["2020-01-01T00:00:00", "2020-01-02T00:00:00"], dtype="datetime64[ns]" | ||||
|     ) | ||||
|     result = PeriodArray._from_datetime64(arr, freq) | ||||
|     expected = period_array(["2020-01", "2020-01"], freq=freq) | ||||
|     tm.assert_period_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, freq, msg", | ||||
|     [ | ||||
|         ( | ||||
|             [pd.Period("2017", "D"), pd.Period("2017", "Y")], | ||||
|             None, | ||||
|             "Input has different freq", | ||||
|         ), | ||||
|         ([pd.Period("2017", "D")], "Y", "Input has different freq"), | ||||
|     ], | ||||
| ) | ||||
| def test_period_array_raises(data, freq, msg): | ||||
|     with pytest.raises(IncompatibleFrequency, match=msg): | ||||
|         period_array(data, freq) | ||||
|  | ||||
|  | ||||
| def test_period_array_non_period_series_raies(): | ||||
|     ser = pd.Series([1, 2, 3]) | ||||
|     with pytest.raises(TypeError, match="dtype"): | ||||
|         PeriodArray(ser, dtype="period[D]") | ||||
|  | ||||
|  | ||||
| def test_period_array_freq_mismatch(): | ||||
|     arr = period_array(["2000", "2001"], freq="D") | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         PeriodArray(arr, dtype="period[M]") | ||||
|  | ||||
|     dtype = pd.PeriodDtype(pd.tseries.offsets.MonthEnd()) | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         PeriodArray(arr, dtype=dtype) | ||||
|  | ||||
|  | ||||
| def test_from_sequence_disallows_i8(): | ||||
|     arr = period_array(["2000", "2001"], freq="D") | ||||
|  | ||||
|     msg = str(arr[0].ordinal) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype) | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype) | ||||
|  | ||||
|  | ||||
| def test_from_td64nat_sequence_raises(): | ||||
|     # GH#44507 | ||||
|     td = pd.NaT.to_numpy("m8[ns]") | ||||
|  | ||||
|     dtype = pd.period_range("2005-01-01", periods=3, freq="D").dtype | ||||
|  | ||||
|     arr = np.array([None], dtype=object) | ||||
|     arr[0] = td | ||||
|  | ||||
|     msg = "Value must be Period, string, integer, or datetime" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         PeriodArray._from_sequence(arr, dtype=dtype) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.PeriodIndex(arr, dtype=dtype) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.Index(arr, dtype=dtype) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.array(arr, dtype=dtype) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.Series(arr, dtype=dtype) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.DataFrame(arr, dtype=dtype) | ||||
|  | ||||
|  | ||||
| def test_freq_deprecated(): | ||||
|     # GH#52462 | ||||
|     data = np.arange(5).astype(np.int64) | ||||
|     msg = "The 'freq' keyword in the PeriodArray constructor is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         res = PeriodArray(data, freq="M") | ||||
|  | ||||
|     expected = PeriodArray(data, dtype="period[M]") | ||||
|     tm.assert_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_period_array_from_datetime64(): | ||||
|     arr = np.array( | ||||
|         ["2020-01-01T00:00:00", "2020-02-02T00:00:00"], dtype="datetime64[ns]" | ||||
|     ) | ||||
|     result = PeriodArray._from_datetime64(arr, freq=MonthEnd(2)) | ||||
|  | ||||
|     expected = period_array(["2020-01-01", "2020-02-01"], freq=MonthEnd(2)) | ||||
|     tm.assert_period_array_equal(result, expected) | ||||
| @ -0,0 +1,42 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas.core.arrays import period_array | ||||
|  | ||||
|  | ||||
| class TestReductions: | ||||
|     def test_min_max(self): | ||||
|         arr = period_array( | ||||
|             [ | ||||
|                 "2000-01-03", | ||||
|                 "2000-01-03", | ||||
|                 "NaT", | ||||
|                 "2000-01-02", | ||||
|                 "2000-01-05", | ||||
|                 "2000-01-04", | ||||
|             ], | ||||
|             freq="D", | ||||
|         ) | ||||
|  | ||||
|         result = arr.min() | ||||
|         expected = pd.Period("2000-01-02", freq="D") | ||||
|         assert result == expected | ||||
|  | ||||
|         result = arr.max() | ||||
|         expected = pd.Period("2000-01-05", freq="D") | ||||
|         assert result == expected | ||||
|  | ||||
|         result = arr.min(skipna=False) | ||||
|         assert result is pd.NaT | ||||
|  | ||||
|         result = arr.max(skipna=False) | ||||
|         assert result is pd.NaT | ||||
|  | ||||
|     @pytest.mark.parametrize("skipna", [True, False]) | ||||
|     def test_min_max_empty(self, skipna): | ||||
|         arr = period_array([], freq="D") | ||||
|         result = arr.min(skipna=skipna) | ||||
|         assert result is pd.NaT | ||||
|  | ||||
|         result = arr.max(skipna=skipna) | ||||
|         assert result is pd.NaT | ||||
| @ -0,0 +1,253 @@ | ||||
| import string | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestSeriesAccessor: | ||||
|     def test_to_dense(self): | ||||
|         ser = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]") | ||||
|         result = ser.sparse.to_dense() | ||||
|         expected = pd.Series([0, 1, 0, 10]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"]) | ||||
|     def test_get_attributes(self, attr): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         ser = pd.Series(arr) | ||||
|  | ||||
|         result = getattr(ser.sparse, attr) | ||||
|         expected = getattr(arr, attr) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_from_coo(self): | ||||
|         scipy_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         row = [0, 3, 1, 0] | ||||
|         col = [0, 3, 1, 2] | ||||
|         data = [4, 5, 7, 9] | ||||
|  | ||||
|         sp_array = scipy_sparse.coo_matrix((data, (row, col))) | ||||
|         result = pd.Series.sparse.from_coo(sp_array) | ||||
|  | ||||
|         index = pd.MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 np.array([0, 0, 1, 3], dtype=np.int32), | ||||
|                 np.array([0, 2, 1, 3], dtype=np.int32), | ||||
|             ], | ||||
|         ) | ||||
|         expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "sort_labels, expected_rows, expected_cols, expected_values_pos", | ||||
|         [ | ||||
|             ( | ||||
|                 False, | ||||
|                 [("b", 2), ("a", 2), ("b", 1), ("a", 1)], | ||||
|                 [("z", 1), ("z", 2), ("x", 2), ("z", 0)], | ||||
|                 {1: (1, 0), 3: (3, 3)}, | ||||
|             ), | ||||
|             ( | ||||
|                 True, | ||||
|                 [("a", 1), ("a", 2), ("b", 1), ("b", 2)], | ||||
|                 [("x", 2), ("z", 0), ("z", 1), ("z", 2)], | ||||
|                 {1: (1, 2), 3: (0, 1)}, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_coo( | ||||
|         self, sort_labels, expected_rows, expected_cols, expected_values_pos | ||||
|     ): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0) | ||||
|         index = pd.MultiIndex.from_tuples( | ||||
|             [ | ||||
|                 ("b", 2, "z", 1), | ||||
|                 ("a", 2, "z", 2), | ||||
|                 ("a", 2, "z", 1), | ||||
|                 ("a", 2, "x", 2), | ||||
|                 ("b", 1, "z", 1), | ||||
|                 ("a", 1, "z", 0), | ||||
|             ] | ||||
|         ) | ||||
|         ss = pd.Series(values, index=index) | ||||
|  | ||||
|         expected_A = np.zeros((4, 4)) | ||||
|         for value, (row, col) in expected_values_pos.items(): | ||||
|             expected_A[row, col] = value | ||||
|  | ||||
|         A, rows, cols = ss.sparse.to_coo( | ||||
|             row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels | ||||
|         ) | ||||
|         assert isinstance(A, sp_sparse.coo_matrix) | ||||
|         tm.assert_numpy_array_equal(A.toarray(), expected_A) | ||||
|         assert rows == expected_rows | ||||
|         assert cols == expected_cols | ||||
|  | ||||
|     def test_non_sparse_raises(self): | ||||
|         ser = pd.Series([1, 2, 3]) | ||||
|         with pytest.raises(AttributeError, match=".sparse"): | ||||
|             ser.sparse.density | ||||
|  | ||||
|  | ||||
| class TestFrameAccessor: | ||||
|     def test_accessor_raises(self): | ||||
|         df = pd.DataFrame({"A": [0, 1]}) | ||||
|         with pytest.raises(AttributeError, match="sparse"): | ||||
|             df.sparse | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) | ||||
|     @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])]) | ||||
|     @pytest.mark.parametrize("dtype", ["float64", "int64"]) | ||||
|     def test_from_spmatrix(self, format, labels, dtype): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) | ||||
|  | ||||
|         mat = sp_sparse.eye(10, format=format, dtype=dtype) | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) | ||||
|         expected = pd.DataFrame( | ||||
|             np.eye(10, dtype=dtype), index=labels, columns=labels | ||||
|         ).astype(sp_dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) | ||||
|     def test_from_spmatrix_including_explicit_zero(self, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(10, 2, density=0.5, format=format) | ||||
|         mat.data[0] = 0 | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat) | ||||
|         dtype = SparseDtype("float64", 0.0) | ||||
|         expected = pd.DataFrame(mat.todense()).astype(dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "columns", | ||||
|         [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], | ||||
|     ) | ||||
|     def test_from_spmatrix_columns(self, columns): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         dtype = SparseDtype("float64", 0.0) | ||||
|  | ||||
|         mat = sp_sparse.random(10, 2, density=0.5) | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) | ||||
|         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)] | ||||
|     ) | ||||
|     def test_to_coo(self, colnames): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         df = pd.DataFrame( | ||||
|             {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]" | ||||
|         ) | ||||
|         result = df.sparse.to_coo() | ||||
|         expected = sp_sparse.coo_matrix(np.asarray(df)) | ||||
|         assert (result != expected).nnz == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [1, np.nan]) | ||||
|     def test_to_coo_nonzero_fill_val_raises(self, fill_value): | ||||
|         pytest.importorskip("scipy") | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray( | ||||
|                     [fill_value, fill_value, fill_value, 2], fill_value=fill_value | ||||
|                 ), | ||||
|                 "B": SparseArray( | ||||
|                     [fill_value, 2, fill_value, fill_value], fill_value=fill_value | ||||
|                 ), | ||||
|             } | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match="fill value must be 0"): | ||||
|             df.sparse.to_coo() | ||||
|  | ||||
|     def test_to_coo_midx_categorical(self): | ||||
|         # GH#50996 | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         midx = pd.MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 pd.CategoricalIndex(list("ab"), name="x"), | ||||
|                 pd.CategoricalIndex([0, 1], name="y"), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         ser = pd.Series(1, index=midx, dtype="Sparse[int]") | ||||
|         result = ser.sparse.to_coo(row_levels=["x"], column_levels=["y"])[0] | ||||
|         expected = sp_sparse.coo_matrix( | ||||
|             (np.array([1, 1]), (np.array([0, 1]), np.array([0, 1]))), shape=(2, 2) | ||||
|         ) | ||||
|         assert (result != expected).nnz == 0 | ||||
|  | ||||
|     def test_to_dense(self): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), | ||||
|                 "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), | ||||
|                 "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), | ||||
|             }, | ||||
|             index=["b", "a"], | ||||
|         ) | ||||
|         result = df.sparse.to_dense() | ||||
|         expected = pd.DataFrame( | ||||
|             {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_density(self): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray([1, 0, 2, 1], fill_value=0), | ||||
|                 "B": SparseArray([0, 1, 1, 1], fill_value=0), | ||||
|             } | ||||
|         ) | ||||
|         res = df.sparse.density | ||||
|         expected = 0.75 | ||||
|         assert res == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "float64"]) | ||||
|     @pytest.mark.parametrize("dense_index", [True, False]) | ||||
|     def test_series_from_coo(self, dtype, dense_index): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         A = sp_sparse.eye(3, format="coo", dtype=dtype) | ||||
|         result = pd.Series.sparse.from_coo(A, dense_index=dense_index) | ||||
|  | ||||
|         index = pd.MultiIndex.from_tuples( | ||||
|             [ | ||||
|                 np.array([0, 0], dtype=np.int32), | ||||
|                 np.array([1, 1], dtype=np.int32), | ||||
|                 np.array([2, 2], dtype=np.int32), | ||||
|             ], | ||||
|         ) | ||||
|         expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) | ||||
|         if dense_index: | ||||
|             expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_from_coo_incorrect_format_raises(self): | ||||
|         # gh-26554 | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         m = sp_sparse.csr_matrix(np.array([[0, 1], [0, 0]])) | ||||
|         with pytest.raises( | ||||
|             TypeError, match="Expected coo_matrix. Got csr_matrix instead." | ||||
|         ): | ||||
|             pd.Series.sparse.from_coo(m) | ||||
|  | ||||
|     def test_with_column_named_sparse(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/30758 | ||||
|         df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])}) | ||||
|         assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor) | ||||
| @ -0,0 +1,514 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["integer", "block"]) | ||||
| def kind(request): | ||||
|     """kind kwarg to pass to SparseArray""" | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def mix(request): | ||||
|     """ | ||||
|     Fixture returning True or False, determining whether to operate | ||||
|     op(sparse, dense) instead of op(sparse, sparse) | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSparseArrayArithmetics: | ||||
|     def _assert(self, a, b): | ||||
|         # We have to use tm.assert_sp_array_equal. See GH #45126 | ||||
|         tm.assert_numpy_array_equal(a, b) | ||||
|  | ||||
|     def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op): | ||||
|         # Check that arithmetic behavior matches non-Sparse Series arithmetic | ||||
|  | ||||
|         if isinstance(a_dense, np.ndarray): | ||||
|             expected = op(pd.Series(a_dense), b_dense).values | ||||
|         elif isinstance(b_dense, np.ndarray): | ||||
|             expected = op(a_dense, pd.Series(b_dense)).values | ||||
|         else: | ||||
|             raise NotImplementedError | ||||
|  | ||||
|         with np.errstate(invalid="ignore", divide="ignore"): | ||||
|             if mix: | ||||
|                 result = op(a, b_dense).to_dense() | ||||
|             else: | ||||
|                 result = op(a, b).to_dense() | ||||
|  | ||||
|         self._assert(result, expected) | ||||
|  | ||||
|     def _check_bool_result(self, res): | ||||
|         assert isinstance(res, SparseArray) | ||||
|         assert isinstance(res.dtype, SparseDtype) | ||||
|         assert res.dtype.subtype == np.bool_ | ||||
|         assert isinstance(res.fill_value, bool) | ||||
|  | ||||
|     def _check_comparison_ops(self, a, b, a_dense, b_dense): | ||||
|         with np.errstate(invalid="ignore"): | ||||
|             # Unfortunately, trying to wrap the computation of each expected | ||||
|             # value is with np.errstate() is too tedious. | ||||
|             # | ||||
|             # sparse & sparse | ||||
|             self._check_bool_result(a == b) | ||||
|             self._assert((a == b).to_dense(), a_dense == b_dense) | ||||
|  | ||||
|             self._check_bool_result(a != b) | ||||
|             self._assert((a != b).to_dense(), a_dense != b_dense) | ||||
|  | ||||
|             self._check_bool_result(a >= b) | ||||
|             self._assert((a >= b).to_dense(), a_dense >= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a <= b) | ||||
|             self._assert((a <= b).to_dense(), a_dense <= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a > b) | ||||
|             self._assert((a > b).to_dense(), a_dense > b_dense) | ||||
|  | ||||
|             self._check_bool_result(a < b) | ||||
|             self._assert((a < b).to_dense(), a_dense < b_dense) | ||||
|  | ||||
|             # sparse & dense | ||||
|             self._check_bool_result(a == b_dense) | ||||
|             self._assert((a == b_dense).to_dense(), a_dense == b_dense) | ||||
|  | ||||
|             self._check_bool_result(a != b_dense) | ||||
|             self._assert((a != b_dense).to_dense(), a_dense != b_dense) | ||||
|  | ||||
|             self._check_bool_result(a >= b_dense) | ||||
|             self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a <= b_dense) | ||||
|             self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a > b_dense) | ||||
|             self._assert((a > b_dense).to_dense(), a_dense > b_dense) | ||||
|  | ||||
|             self._check_bool_result(a < b_dense) | ||||
|             self._assert((a < b_dense).to_dense(), a_dense < b_dense) | ||||
|  | ||||
|     def _check_logical_ops(self, a, b, a_dense, b_dense): | ||||
|         # sparse & sparse | ||||
|         self._check_bool_result(a & b) | ||||
|         self._assert((a & b).to_dense(), a_dense & b_dense) | ||||
|  | ||||
|         self._check_bool_result(a | b) | ||||
|         self._assert((a | b).to_dense(), a_dense | b_dense) | ||||
|         # sparse & dense | ||||
|         self._check_bool_result(a & b_dense) | ||||
|         self._assert((a & b_dense).to_dense(), a_dense & b_dense) | ||||
|  | ||||
|         self._check_bool_result(a | b_dense) | ||||
|         self._assert((a | b_dense).to_dense(), a_dense | b_dense) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [0, 1, 3]) | ||||
|     @pytest.mark.parametrize("fill_value", [None, 0, 2]) | ||||
|     def test_float_scalar( | ||||
|         self, kind, mix, all_arithmetic_functions, fill_value, scalar, request | ||||
|     ): | ||||
|         op = all_arithmetic_functions | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         a = SparseArray(values, kind=kind, fill_value=fill_value) | ||||
|         self._check_numeric_ops(a, scalar, values, scalar, mix, op) | ||||
|  | ||||
|     def test_float_scalar_comparison(self, kind): | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|     def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions): | ||||
|         # when sp_index are the same | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) | ||||
|         rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_same_index_with_nans( | ||||
|         self, kind, mix, all_arithmetic_functions, request | ||||
|     ): | ||||
|         # when sp_index are the same | ||||
|         op = all_arithmetic_functions | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_same_index_comparison(self, kind): | ||||
|         # when sp_index are the same | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) | ||||
|         rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_float_array(self, kind, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_array_different_kind(self, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer") | ||||
|         b = SparseArray(rvalues, kind="block") | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=0) | ||||
|         b = SparseArray(rvalues, kind="block") | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=0) | ||||
|         b = SparseArray(rvalues, kind="block", fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=1) | ||||
|         b = SparseArray(rvalues, kind="block", fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_array_comparison(self, kind): | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_int_array(self, kind, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         # have to specify dtype explicitly until fixing GH 667 | ||||
|         dtype = np.int64 | ||||
|  | ||||
|         values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=1, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype, fill_value=1) | ||||
|         b = SparseArray(rvalues, fill_value=2, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_int_array_comparison(self, kind): | ||||
|         dtype = "int64" | ||||
|         # int32 NI ATM | ||||
|  | ||||
|         values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [True, False, np.nan]) | ||||
|     def test_bool_same_index(self, kind, fill_value): | ||||
|         # GH 14000 | ||||
|         # when sp_index are the same | ||||
|         values = np.array([True, False, True, True], dtype=np.bool_) | ||||
|         rvalues = np.array([True, False, True, True], dtype=np.bool_) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         self._check_logical_ops(a, b, values, rvalues) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [True, False, np.nan]) | ||||
|     def test_bool_array_logical(self, kind, fill_value): | ||||
|         # GH 14000 | ||||
|         # when sp_index are the same | ||||
|         values = np.array([True, False, True, False, True, True], dtype=np.bool_) | ||||
|         rvalues = np.array([True, False, False, True, False, True], dtype=np.bool_) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         self._check_logical_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request): | ||||
|         op = all_arithmetic_functions | ||||
|         rdtype = "int64" | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         assert b.dtype == SparseDtype(rdtype, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_mixed_array_comparison(self, kind): | ||||
|         rdtype = "int64" | ||||
|         # int32 NI ATM | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|  | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         assert b.dtype == SparseDtype(rdtype, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_xor(self): | ||||
|         s = SparseArray([True, True, False, False]) | ||||
|         t = SparseArray([True, False, True, False]) | ||||
|         result = s ^ t | ||||
|         sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32")) | ||||
|         expected = SparseArray([False, True, True], sparse_index=sp_index) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", [operator.eq, operator.add]) | ||||
| def test_with_list(op): | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|     result = op(arr, [0, 1]) | ||||
|     expected = op(arr, SparseArray([0, 1])) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_with_dataframe(): | ||||
|     # GH#27910 | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|     df = pd.DataFrame([[1, 2], [3, 4]]) | ||||
|     result = arr.__add__(df) | ||||
|     assert result is NotImplemented | ||||
|  | ||||
|  | ||||
| def test_with_zerodim_ndarray(): | ||||
|     # GH#27910 | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|  | ||||
|     result = arr * np.array(2) | ||||
|     expected = arr * 2 | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] | ||||
| ) | ||||
| def test_ufuncs(ufunc, arr): | ||||
|     result = ufunc(arr) | ||||
|     fill_value = ufunc(arr.fill_value) | ||||
|     expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         (SparseArray([0, 0, 0]), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("ufunc", [np.add, np.greater]) | ||||
| def test_binary_ufuncs(ufunc, a, b): | ||||
|     # can't say anything about fill value here. | ||||
|     result = ufunc(a, b) | ||||
|     expected = ufunc(np.asarray(a), np.asarray(b)) | ||||
|     assert isinstance(result, SparseArray) | ||||
|     tm.assert_numpy_array_equal(np.asarray(result), expected) | ||||
|  | ||||
|  | ||||
| def test_ndarray_inplace(): | ||||
|     sparray = SparseArray([0, 2, 0, 0]) | ||||
|     ndarray = np.array([0, 1, 2, 3]) | ||||
|     ndarray += sparray | ||||
|     expected = np.array([0, 3, 2, 3]) | ||||
|     tm.assert_numpy_array_equal(ndarray, expected) | ||||
|  | ||||
|  | ||||
| def test_sparray_inplace(): | ||||
|     sparray = SparseArray([0, 2, 0, 0]) | ||||
|     ndarray = np.array([0, 1, 2, 3]) | ||||
|     sparray += ndarray | ||||
|     expected = SparseArray([0, 3, 2, 3], fill_value=0) | ||||
|     tm.assert_sp_array_equal(sparray, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("cons", [list, np.array, SparseArray]) | ||||
| def test_mismatched_length_cmp_op(cons): | ||||
|     left = SparseArray([True, True]) | ||||
|     right = cons([True, True, True]) | ||||
|     with pytest.raises(ValueError, match="operands have mismatched length"): | ||||
|         left & right | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 3]) | ||||
| def test_binary_operators(op, fill_value): | ||||
|     op = getattr(operator, op) | ||||
|     data1 = np.random.default_rng(2).standard_normal(20) | ||||
|     data2 = np.random.default_rng(2).standard_normal(20) | ||||
|  | ||||
|     data1[::2] = fill_value | ||||
|     data2[::3] = fill_value | ||||
|  | ||||
|     first = SparseArray(data1, fill_value=fill_value) | ||||
|     second = SparseArray(data2, fill_value=fill_value) | ||||
|  | ||||
|     with np.errstate(all="ignore"): | ||||
|         res = op(first, second) | ||||
|         exp = SparseArray( | ||||
|             op(first.to_dense(), second.to_dense()), fill_value=first.fill_value | ||||
|         ) | ||||
|         assert isinstance(res, SparseArray) | ||||
|         tm.assert_almost_equal(res.to_dense(), exp.to_dense()) | ||||
|  | ||||
|         res2 = op(first, second.to_dense()) | ||||
|         assert isinstance(res2, SparseArray) | ||||
|         tm.assert_sp_array_equal(res, res2) | ||||
|  | ||||
|         res3 = op(first.to_dense(), second) | ||||
|         assert isinstance(res3, SparseArray) | ||||
|         tm.assert_sp_array_equal(res, res3) | ||||
|  | ||||
|         res4 = op(first, 4) | ||||
|         assert isinstance(res4, SparseArray) | ||||
|  | ||||
|         # Ignore this if the actual op raises (e.g. pow). | ||||
|         try: | ||||
|             exp = op(first.to_dense(), 4) | ||||
|             exp_fv = op(first.fill_value, 4) | ||||
|         except ValueError: | ||||
|             pass | ||||
|         else: | ||||
|             tm.assert_almost_equal(res4.fill_value, exp_fv) | ||||
|             tm.assert_almost_equal(res4.to_dense(), exp) | ||||
| @ -0,0 +1,511 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr_data(): | ||||
|     """Fixture returning numpy array with valid and missing entries""" | ||||
|     return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr(arr_data): | ||||
|     """Fixture returning SparseArray from 'arr_data'""" | ||||
|     return SparseArray(arr_data) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def zarr(): | ||||
|     """Fixture returning SparseArray with integer entries and 'fill_value=0'""" | ||||
|     return SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|  | ||||
|  | ||||
| class TestSparseArray: | ||||
|     @pytest.mark.parametrize("fill_value", [0, None, np.nan]) | ||||
|     def test_shift_fill_value(self, fill_value): | ||||
|         # GH #24128 | ||||
|         sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) | ||||
|         res = sparse.shift(1, fill_value=fill_value) | ||||
|         if isna(fill_value): | ||||
|             fill_value = res.dtype.na_value | ||||
|         exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_set_fill_value(self): | ||||
|         arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) | ||||
|         arr.fill_value = 2 | ||||
|         assert arr.fill_value == 2 | ||||
|  | ||||
|         arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) | ||||
|         arr.fill_value = 2 | ||||
|         assert arr.fill_value == 2 | ||||
|  | ||||
|         msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr.fill_value = 3.1 | ||||
|         assert arr.fill_value == 3.1 | ||||
|  | ||||
|         arr.fill_value = np.nan | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         arr.fill_value = True | ||||
|         assert arr.fill_value is True | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr.fill_value = 0 | ||||
|  | ||||
|         arr.fill_value = np.nan | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) | ||||
|     def test_set_fill_invalid_non_scalar(self, val): | ||||
|         arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         msg = "fill_value must be a scalar" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.fill_value = val | ||||
|  | ||||
|     def test_copy(self, arr): | ||||
|         arr2 = arr.copy() | ||||
|         assert arr2.sp_values is not arr.sp_values | ||||
|         assert arr2.sp_index is arr.sp_index | ||||
|  | ||||
|     def test_values_asarray(self, arr_data, arr): | ||||
|         tm.assert_almost_equal(arr.to_dense(), arr_data) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,shape,dtype", | ||||
|         [ | ||||
|             ([0, 0, 0, 0, 0], (5,), None), | ||||
|             ([], (0,), None), | ||||
|             ([0], (1,), None), | ||||
|             (["A", "A", np.nan, "B"], (4,), object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_shape(self, data, shape, dtype): | ||||
|         # GH 21126 | ||||
|         out = SparseArray(data, dtype=dtype) | ||||
|         assert out.shape == shape | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "vals", | ||||
|         [ | ||||
|             [np.nan, np.nan, np.nan, np.nan, np.nan], | ||||
|             [1, np.nan, np.nan, 3, np.nan], | ||||
|             [1, np.nan, 0, 3, 0], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("fill_value", [None, 0]) | ||||
|     def test_dense_repr(self, vals, fill_value): | ||||
|         vals = np.array(vals) | ||||
|         arr = SparseArray(vals, fill_value=fill_value) | ||||
|  | ||||
|         res = arr.to_dense() | ||||
|         tm.assert_numpy_array_equal(res, vals) | ||||
|  | ||||
|     @pytest.mark.parametrize("fix", ["arr", "zarr"]) | ||||
|     def test_pickle(self, fix, request): | ||||
|         obj = request.getfixturevalue(fix) | ||||
|         unpickled = tm.round_trip_pickle(obj) | ||||
|         tm.assert_sp_array_equal(unpickled, obj) | ||||
|  | ||||
|     def test_generator_warnings(self): | ||||
|         sp_arr = SparseArray([1, 2, 3]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             for _ in sp_arr: | ||||
|                 pass | ||||
|  | ||||
|     def test_where_retain_fill_value(self): | ||||
|         # GH#45691 don't lose fill_value on _where | ||||
|         arr = SparseArray([np.nan, 1.0], fill_value=0) | ||||
|  | ||||
|         mask = np.array([True, False]) | ||||
|  | ||||
|         res = arr._where(~mask, 1) | ||||
|         exp = SparseArray([1, 1.0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         ser = pd.Series(arr) | ||||
|         res = ser.where(~mask, 1) | ||||
|         tm.assert_series_equal(res, pd.Series(exp)) | ||||
|  | ||||
|     def test_fillna(self): | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, 0, 3, 0]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([np.nan, np.nan, np.nan, np.nan]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # float dtype's fill_value is np.nan, replaced by -1 | ||||
|         s = SparseArray([0.0, 0.0, 0.0, 0.0]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # int dtype shouldn't have missing. No changes. | ||||
|         s = SparseArray([0, 0, 0, 0]) | ||||
|         assert s.dtype == SparseDtype(np.int64) | ||||
|         assert s.fill_value == 0 | ||||
|         res = s.fillna(-1) | ||||
|         tm.assert_sp_array_equal(res, s) | ||||
|  | ||||
|         s = SparseArray([0, 0, 0, 0], fill_value=0) | ||||
|         assert s.dtype == SparseDtype(np.int64) | ||||
|         assert s.fill_value == 0 | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0, 0, 0, 0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # fill_value can be nan if there is no missing hole. | ||||
|         # only fill_value will be changed | ||||
|         s = SparseArray([0, 0, 0, 0], fill_value=np.nan) | ||||
|         assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) | ||||
|         assert np.isnan(s.fill_value) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0, 0, 0, 0], fill_value=-1) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_fillna_overlap(self): | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan]) | ||||
|         # filling with existing value doesn't replace existing value with | ||||
|         # fill_value, i.e. existing 3 remains in sp_values | ||||
|         res = s.fillna(3) | ||||
|         exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) | ||||
|         tm.assert_numpy_array_equal(res.to_dense(), exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) | ||||
|         res = s.fillna(3) | ||||
|         exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_nonzero(self): | ||||
|         # Tests regression #21172. | ||||
|         sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) | ||||
|         expected = np.array([2, 5, 9], dtype=np.int32) | ||||
|         (result,) = sa.nonzero() | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|         sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) | ||||
|         (result,) = sa.nonzero() | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| class TestSparseArrayAnalytics: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 np.array([1, 2, 3, 4, 5], dtype=float),  # non-null data | ||||
|                 SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])), | ||||
|             ), | ||||
|             ( | ||||
|                 np.array([1, 2, np.nan, 4, 5], dtype=float),  # null data | ||||
|                 SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("numpy", [True, False]) | ||||
|     def test_cumsum(self, data, expected, numpy): | ||||
|         cumsum = np.cumsum if numpy else lambda s: s.cumsum() | ||||
|  | ||||
|         out = cumsum(SparseArray(data)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         out = cumsum(SparseArray(data, fill_value=np.nan)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         out = cumsum(SparseArray(data, fill_value=2)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         if numpy:  # numpy compatibility checks. | ||||
|             msg = "the 'dtype' parameter is not supported" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 np.cumsum(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|             msg = "the 'out' parameter is not supported" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 np.cumsum(SparseArray(data), out=out) | ||||
|         else: | ||||
|             axis = 1  # SparseArray currently 1-D, so only axis = 0 is valid. | ||||
|             msg = re.escape(f"axis(={axis}) out of bounds") | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 SparseArray(data).cumsum(axis=axis) | ||||
|  | ||||
|     def test_ufunc(self): | ||||
|         # GH 13853 make sure ufunc is applied to fill_value | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray([1, np.nan, 2, np.nan, 2]) | ||||
|         tm.assert_sp_array_equal(abs(sparse), result) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) | ||||
|         tm.assert_sp_array_equal(abs(sparse), result) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=-1) | ||||
|         exp = SparseArray([1, 1, 2, 2], fill_value=1) | ||||
|         tm.assert_sp_array_equal(abs(sparse), exp) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), exp) | ||||
|  | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 0, -2], fill_value=0) | ||||
|         result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|     def test_ufunc_args(self): | ||||
|         # GH 13853 make sure ufunc is applied to fill_value, including its arg | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray([2, np.nan, 3, np.nan, -1]) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray([2, 0, 3, -1], fill_value=2) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 0, -2], fill_value=0) | ||||
|         result = SparseArray([2, 0, 1, -1], fill_value=1) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [0.0, np.nan]) | ||||
|     def test_modf(self, fill_value): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26946 | ||||
|         sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) | ||||
|         r1, r2 = np.modf(sparse) | ||||
|         e1, e2 = np.modf(np.asarray(sparse)) | ||||
|         tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) | ||||
|         tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) | ||||
|  | ||||
|     def test_nbytes_integer(self): | ||||
|         arr = SparseArray([1, 0, 0, 0, 2], kind="integer") | ||||
|         result = arr.nbytes | ||||
|         # (2 * 8) + 2 * 4 | ||||
|         assert result == 24 | ||||
|  | ||||
|     def test_nbytes_block(self): | ||||
|         arr = SparseArray([1, 2, 0, 0, 0], kind="block") | ||||
|         result = arr.nbytes | ||||
|         # (2 * 8) + 4 + 4 | ||||
|         # sp_values, blocs, blengths | ||||
|         assert result == 24 | ||||
|  | ||||
|     def test_asarray_datetime64(self): | ||||
|         s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) | ||||
|         np.asarray(s) | ||||
|  | ||||
|     def test_density(self): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         assert arr.density == 0.5 | ||||
|  | ||||
|     def test_npoints(self): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         assert arr.npoints == 1 | ||||
|  | ||||
|  | ||||
| def test_setting_fill_value_fillna_still_works(): | ||||
|     # This is why letting users update fill_value / dtype is bad | ||||
|     # astype has the same problem. | ||||
|     arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0) | ||||
|     arr.fill_value = np.nan | ||||
|     result = arr.isna() | ||||
|     # Can't do direct comparison, since the sp_index will be different | ||||
|     # So let's convert to ndarray and check there. | ||||
|     result = np.asarray(result) | ||||
|  | ||||
|     expected = np.array([False, True, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_setting_fill_value_updates(): | ||||
|     arr = SparseArray([0.0, np.nan], fill_value=0) | ||||
|     arr.fill_value = np.nan | ||||
|     # use private constructor to get the index right | ||||
|     # otherwise both nans would be un-stored. | ||||
|     expected = SparseArray._simple_new( | ||||
|         sparse_array=np.array([np.nan]), | ||||
|         sparse_index=IntIndex(2, [1]), | ||||
|         dtype=SparseDtype(float, np.nan), | ||||
|     ) | ||||
|     tm.assert_sp_array_equal(arr, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr,fill_value,loc", | ||||
|     [ | ||||
|         ([None, 1, 2], None, 0), | ||||
|         ([0, None, 2], None, 1), | ||||
|         ([0, 1, None], None, 2), | ||||
|         ([0, 1, 1, None, None], None, 3), | ||||
|         ([1, 1, 1, 2], None, -1), | ||||
|         ([], None, -1), | ||||
|         ([None, 1, 0, 0, None, 2], None, 0), | ||||
|         ([None, 1, 0, 0, None, 2], 1, 1), | ||||
|         ([None, 1, 0, 0, None, 2], 2, 5), | ||||
|         ([None, 1, 0, 0, None, 2], 3, -1), | ||||
|         ([None, 0, 0, 1, 2, 1], 0, 1), | ||||
|         ([None, 0, 0, 1, 2, 1], 1, 3), | ||||
|     ], | ||||
| ) | ||||
| def test_first_fill_value_loc(arr, fill_value, loc): | ||||
|     result = SparseArray(arr, fill_value=fill_value)._first_fill_value_loc() | ||||
|     assert result == loc | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", | ||||
|     [ | ||||
|         [1, 2, np.nan, np.nan], | ||||
|         [1, np.nan, 2, np.nan], | ||||
|         [1, 2, np.nan], | ||||
|         [np.nan, 1, 0, 0, np.nan, 2], | ||||
|         [np.nan, 0, 0, 1, 2, 1], | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) | ||||
| def test_unique_na_fill(arr, fill_value): | ||||
|     a = SparseArray(arr, fill_value=fill_value).unique() | ||||
|     b = pd.Series(arr).unique() | ||||
|     assert isinstance(a, SparseArray) | ||||
|     a = np.asarray(a) | ||||
|     tm.assert_numpy_array_equal(a, b) | ||||
|  | ||||
|  | ||||
| def test_unique_all_sparse(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/23168 | ||||
|     arr = SparseArray([0, 0]) | ||||
|     result = arr.unique() | ||||
|     expected = SparseArray([0]) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(): | ||||
|     arr = SparseArray([0, 1, 2]) | ||||
|     expected = SparseArray([10, 11, 12], fill_value=10) | ||||
|  | ||||
|     # dict | ||||
|     result = arr.map({0: 10, 1: 11, 2: 12}) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     # series | ||||
|     result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     # function | ||||
|     result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) | ||||
|     expected = SparseArray([10, 11, 12], fill_value=10) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_missing(): | ||||
|     arr = SparseArray([0, 1, 2]) | ||||
|     expected = SparseArray([10, 11, None], fill_value=10) | ||||
|  | ||||
|     result = arr.map({0: 10, 1: 11}) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 1]) | ||||
| def test_dropna(fill_value): | ||||
|     # GH-28287 | ||||
|     arr = SparseArray([np.nan, 1], fill_value=fill_value) | ||||
|     exp = SparseArray([1.0], fill_value=fill_value) | ||||
|     tm.assert_sp_array_equal(arr.dropna(), exp) | ||||
|  | ||||
|     df = pd.DataFrame({"a": [0, 1], "b": arr}) | ||||
|     expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Index([1])) | ||||
|     tm.assert_equal(df.dropna(), expected_df) | ||||
|  | ||||
|  | ||||
| def test_drop_duplicates_fill_value(): | ||||
|     # GH 11726 | ||||
|     df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0)) | ||||
|     result = df.drop_duplicates() | ||||
|     expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_zero_sparse_column(): | ||||
|     # GH 27781 | ||||
|     df1 = pd.DataFrame({"A": SparseArray([0, 0, 0]), "B": [1, 2, 3]}) | ||||
|     df2 = pd.DataFrame({"A": SparseArray([0, 1, 0]), "B": [1, 2, 3]}) | ||||
|     result = df1.loc[df1["B"] != 2] | ||||
|     expected = df2.loc[df2["B"] != 2] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_interface(arr_data, arr): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60046 | ||||
|     result = np.asarray(arr) | ||||
|     tm.assert_numpy_array_equal(result, arr_data) | ||||
|  | ||||
|     # it always gives a copy by default | ||||
|     result_copy1 = np.asarray(arr) | ||||
|     result_copy2 = np.asarray(arr) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     # or with explicit copy=True | ||||
|     result_copy1 = np.array(arr, copy=True) | ||||
|     result_copy2 = np.array(arr, copy=True) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     if not np_version_gt2: | ||||
|         # copy=False semantics are only supported in NumPy>=2. | ||||
|         return | ||||
|  | ||||
|     msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         np.array(arr, copy=False) | ||||
|  | ||||
|     # except when there are actually no sparse filled values | ||||
|     arr2 = SparseArray(np.array([1, 2, 3])) | ||||
|     result_nocopy1 = np.array(arr2, copy=False) | ||||
|     result_nocopy2 = np.array(arr2, copy=False) | ||||
|     assert np.may_share_memory(result_nocopy1, result_nocopy2) | ||||
| @ -0,0 +1,133 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
|  | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     def test_astype(self): | ||||
|         # float -> float | ||||
|         arr = SparseArray([None, None, 0, 2]) | ||||
|         result = arr.astype("Sparse[float32]") | ||||
|         expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         dtype = SparseDtype("float64", fill_value=0) | ||||
|         result = arr.astype(dtype) | ||||
|         expected = SparseArray._simple_new( | ||||
|             np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         dtype = SparseDtype("int64", 0) | ||||
|         result = arr.astype(dtype) | ||||
|         expected = SparseArray._simple_new( | ||||
|             np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         arr = SparseArray([0, np.nan, 0, 1], fill_value=0) | ||||
|         with pytest.raises(ValueError, match="NA"): | ||||
|             arr.astype("Sparse[i8]") | ||||
|  | ||||
|     def test_astype_bool(self): | ||||
|         a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) | ||||
|         result = a.astype(bool) | ||||
|         expected = np.array([1, 0, 0, 1], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # update fill value | ||||
|         result = a.astype(SparseDtype(bool, False)) | ||||
|         expected = SparseArray( | ||||
|             [True, False, False, True], dtype=SparseDtype(bool, False) | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_all(self, any_real_numpy_dtype): | ||||
|         vals = np.array([1, 2, 3]) | ||||
|         arr = SparseArray(vals, fill_value=1) | ||||
|         typ = np.dtype(any_real_numpy_dtype) | ||||
|         res = arr.astype(typ) | ||||
|         tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr, dtype, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 SparseArray([0, 1]), | ||||
|                 "float", | ||||
|                 SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), | ||||
|             ), | ||||
|             (SparseArray([0, 1]), bool, SparseArray([False, True])), | ||||
|             ( | ||||
|                 SparseArray([0, 1], fill_value=1), | ||||
|                 bool, | ||||
|                 SparseArray([False, True], dtype=SparseDtype(bool, True)), | ||||
|             ), | ||||
|             pytest.param( | ||||
|                 SparseArray([0, 1]), | ||||
|                 "datetime64[ns]", | ||||
|                 SparseArray( | ||||
|                     np.array([0, 1], dtype="datetime64[ns]"), | ||||
|                     dtype=SparseDtype("datetime64[ns]", Timestamp("1970")), | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 SparseArray([0, 1, 10]), | ||||
|                 np.str_, | ||||
|                 SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")), | ||||
|             ), | ||||
|             (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), | ||||
|             ( | ||||
|                 SparseArray([0, 1, 0]), | ||||
|                 object, | ||||
|                 SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_more(self, arr, dtype, expected): | ||||
|         result = arr.astype(arr.dtype.update_dtype(dtype)) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_nan_raises(self): | ||||
|         arr = SparseArray([1.0, np.nan]) | ||||
|         with pytest.raises(ValueError, match="Cannot convert non-finite"): | ||||
|             arr.astype(int) | ||||
|  | ||||
|     def test_astype_copy_false(self): | ||||
|         # GH#34456 bug caused by using .view instead of .astype in astype_nansafe | ||||
|         arr = SparseArray([1, 2, 3]) | ||||
|  | ||||
|         dtype = SparseDtype(float, 0) | ||||
|  | ||||
|         result = arr.astype(dtype, copy=False) | ||||
|         expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_dt64_to_int64(self): | ||||
|         # GH#49631 match non-sparse behavior | ||||
|         values = np.array(["NaT", "2016-01-02", "2016-01-03"], dtype="M8[ns]") | ||||
|  | ||||
|         arr = SparseArray(values) | ||||
|         result = arr.astype("int64") | ||||
|         expected = values.astype("int64") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # we should also be able to cast to equivalent Sparse[int64] | ||||
|         dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min) | ||||
|         result2 = arr.astype(dtype_int64) | ||||
|         tm.assert_numpy_array_equal(result2.to_numpy(), expected) | ||||
|  | ||||
|         # GH#50087 we should match the non-sparse behavior regardless of | ||||
|         #  if we have a fill_value other than NaT | ||||
|         dtype = SparseDtype("datetime64[ns]", values[1]) | ||||
|         arr3 = SparseArray(values, dtype=dtype) | ||||
|         result3 = arr3.astype("int64") | ||||
|         tm.assert_numpy_array_equal(result3, expected) | ||||
| @ -0,0 +1,62 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestSparseArrayConcat: | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_basic(self, kind): | ||||
|         a = SparseArray([1, 0, 0, 2], kind=kind) | ||||
|         b = SparseArray([1, 0, 2, 2], kind=kind) | ||||
|  | ||||
|         result = SparseArray._concat_same_type([a, b]) | ||||
|         # Can't make any assertions about the sparse index itself | ||||
|         # since we aren't don't merge sparse blocs across arrays | ||||
|         # in to_concat | ||||
|         expected = np.array([1, 2, 1, 2, 2], dtype="int64") | ||||
|         tm.assert_numpy_array_equal(result.sp_values, expected) | ||||
|         assert result.kind == kind | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_uses_first_kind(self, kind): | ||||
|         other = "integer" if kind == "block" else "block" | ||||
|         a = SparseArray([1, 0, 0, 2], kind=kind) | ||||
|         b = SparseArray([1, 0, 2, 2], kind=other) | ||||
|  | ||||
|         result = SparseArray._concat_same_type([a, b]) | ||||
|         expected = np.array([1, 2, 1, 2, 2], dtype="int64") | ||||
|         tm.assert_numpy_array_equal(result.sp_values, expected) | ||||
|         assert result.kind == kind | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "other, expected_dtype", | ||||
|     [ | ||||
|         # compatible dtype -> preserve sparse | ||||
|         (pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)), | ||||
|         # (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)), | ||||
|         # incompatible dtype -> Sparse[common dtype] | ||||
|         (pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)), | ||||
|         # incompatible dtype -> Sparse[object] dtype | ||||
|         (pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)), | ||||
|         # categorical with compatible categories -> dtype of the categories | ||||
|         (pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")), | ||||
|         (pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")), | ||||
|         # categorical with incompatible categories -> object dtype | ||||
|         (pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_with_non_sparse(other, expected_dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/34336 | ||||
|     s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0)) | ||||
|  | ||||
|     result = pd.concat([s_sparse, other], ignore_index=True) | ||||
|     expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = pd.concat([other, s_sparse], ignore_index=True) | ||||
|     expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,285 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestConstructors: | ||||
|     def test_constructor_dtype(self): | ||||
|         arr = SparseArray([np.nan, 1, 2, np.nan]) | ||||
|         assert arr.dtype == SparseDtype(np.float64, np.nan) | ||||
|         assert arr.dtype.subtype == np.float64 | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) | ||||
|         assert arr.dtype == SparseDtype(np.float64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=np.float64) | ||||
|         assert arr.dtype == SparseDtype(np.float64, np.nan) | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=np.int64) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     def test_constructor_dtype_str(self): | ||||
|         result = SparseArray([1, 2, 3], dtype="int") | ||||
|         expected = SparseArray([1, 2, 3], dtype=int) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_sparse_dtype(self): | ||||
|         result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) | ||||
|         expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|         assert result.sp_values.dtype == np.dtype("int64") | ||||
|  | ||||
|     def test_constructor_sparse_dtype_str(self): | ||||
|         result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") | ||||
|         expected = SparseArray([1, 0, 0, 1], dtype=np.int32) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|         assert result.sp_values.dtype == np.dtype("int32") | ||||
|  | ||||
|     def test_constructor_object_dtype(self): | ||||
|         # GH#11856 | ||||
|         arr = SparseArray(["A", "A", np.nan, "B"], dtype=object) | ||||
|         assert arr.dtype == SparseDtype(object) | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A") | ||||
|         assert arr.dtype == SparseDtype(object, "A") | ||||
|         assert arr.fill_value == "A" | ||||
|  | ||||
|     def test_constructor_object_dtype_bool_fill(self): | ||||
|         # GH#17574 | ||||
|         data = [False, 0, 100.0, 0.0] | ||||
|         arr = SparseArray(data, dtype=object, fill_value=False) | ||||
|         assert arr.dtype == SparseDtype(object, False) | ||||
|         assert arr.fill_value is False | ||||
|         arr_expected = np.array(data, dtype=object) | ||||
|         it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) | ||||
|         assert np.fromiter(it, dtype=np.bool_).all() | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) | ||||
|     def test_constructor_na_dtype(self, dtype): | ||||
|         with pytest.raises(ValueError, match="Cannot convert"): | ||||
|             SparseArray([0, 1, np.nan], dtype=dtype) | ||||
|  | ||||
|     def test_constructor_warns_when_losing_timezone(self): | ||||
|         # GH#32501 warn when losing timezone information | ||||
|         dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") | ||||
|  | ||||
|         expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             result = SparseArray(dti) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             result = SparseArray(pd.Series(dti)) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_spindex_dtype(self): | ||||
|         arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) | ||||
|         # TODO: actionable? | ||||
|         # XXX: Behavior change: specifying SparseIndex no longer changes the | ||||
|         # fill_value | ||||
|         expected = SparseArray([0, 1, 2, 0], kind="integer") | ||||
|         tm.assert_sp_array_equal(arr, expected) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2, 3], | ||||
|             sparse_index=IntIndex(4, [1, 2, 3]), | ||||
|             dtype=np.int64, | ||||
|             fill_value=0, | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64 | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2, 3], | ||||
|             sparse_index=IntIndex(4, [1, 2, 3]), | ||||
|             dtype=None, | ||||
|             fill_value=0, | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 3], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) | ||||
|     def test_constructor_spindex_dtype_scalar(self, sparse_index): | ||||
|         # scalar input | ||||
|         msg = "Constructing SparseArray with scalar data is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) | ||||
|         exp = SparseArray([1], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) | ||||
|         exp = SparseArray([1], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     def test_constructor_spindex_dtype_scalar_broadcasts(self): | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, fill_value", | ||||
|         [ | ||||
|             (np.array([1, 2]), 0), | ||||
|             (np.array([1.0, 2.0]), np.nan), | ||||
|             ([True, False], False), | ||||
|             ([pd.Timestamp("2017-01-01")], pd.NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_inferred_fill_value(self, data, fill_value): | ||||
|         result = SparseArray(data).fill_value | ||||
|  | ||||
|         if isna(fill_value): | ||||
|             assert isna(result) | ||||
|         else: | ||||
|             assert result == fill_value | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) | ||||
|     @pytest.mark.parametrize("size", [0, 10]) | ||||
|     def test_from_spmatrix(self, size, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(size, 1, density=0.5, format=format) | ||||
|         result = SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|         result = np.asarray(result) | ||||
|         expected = mat.toarray().ravel() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) | ||||
|     def test_from_spmatrix_including_explicit_zero(self, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(10, 1, density=0.5, format=format) | ||||
|         mat.data[0] = 0 | ||||
|         result = SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|         result = np.asarray(result) | ||||
|         expected = mat.toarray().ravel() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_from_spmatrix_raises(self): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.eye(5, 4, format="csc") | ||||
|  | ||||
|         with pytest.raises(ValueError, match="not '4'"): | ||||
|             SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|     def test_constructor_from_too_large_array(self): | ||||
|         with pytest.raises(TypeError, match="expected dimension <= 1 data"): | ||||
|             SparseArray(np.arange(10).reshape((2, 5))) | ||||
|  | ||||
|     def test_constructor_from_sparse(self): | ||||
|         zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|         res = SparseArray(zarr) | ||||
|         assert res.fill_value == 0 | ||||
|         tm.assert_almost_equal(res.sp_values, zarr.sp_values) | ||||
|  | ||||
|     def test_constructor_copy(self): | ||||
|         arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|         arr = SparseArray(arr_data) | ||||
|  | ||||
|         cp = SparseArray(arr, copy=True) | ||||
|         cp.sp_values[:3] = 0 | ||||
|         assert not (arr.sp_values[:3] == 0).any() | ||||
|  | ||||
|         not_copy = SparseArray(arr) | ||||
|         not_copy.sp_values[:3] = 0 | ||||
|         assert (arr.sp_values[:3] == 0).all() | ||||
|  | ||||
|     def test_constructor_bool(self): | ||||
|         # GH#10648 | ||||
|         data = np.array([False, False, True, True, False, False]) | ||||
|         arr = SparseArray(data, fill_value=False, dtype=bool) | ||||
|  | ||||
|         assert arr.dtype == SparseDtype(bool) | ||||
|         tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) | ||||
|         # Behavior change: np.asarray densifies. | ||||
|         # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) | ||||
|         tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) | ||||
|  | ||||
|         dense = arr.to_dense() | ||||
|         assert dense.dtype == bool | ||||
|         tm.assert_numpy_array_equal(dense, data) | ||||
|  | ||||
|     def test_constructor_bool_fill_value(self): | ||||
|         arr = SparseArray([True, False, True], dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.bool_) | ||||
|         assert not arr.fill_value | ||||
|  | ||||
|         arr = SparseArray([True, False, True], dtype=np.bool_) | ||||
|         assert arr.dtype == SparseDtype(np.bool_) | ||||
|         assert not arr.fill_value | ||||
|  | ||||
|         arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True) | ||||
|         assert arr.dtype == SparseDtype(np.bool_, True) | ||||
|         assert arr.fill_value | ||||
|  | ||||
|     def test_constructor_float32(self): | ||||
|         # GH#10648 | ||||
|         data = np.array([1.0, np.nan, 3], dtype=np.float32) | ||||
|         arr = SparseArray(data, dtype=np.float32) | ||||
|  | ||||
|         assert arr.dtype == SparseDtype(np.float32) | ||||
|         tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) | ||||
|         # Behavior change: np.asarray densifies. | ||||
|         # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             arr.sp_index.indices, np.array([0, 2], dtype=np.int32) | ||||
|         ) | ||||
|  | ||||
|         dense = arr.to_dense() | ||||
|         assert dense.dtype == np.float32 | ||||
|         tm.assert_numpy_array_equal(dense, data) | ||||
| @ -0,0 +1,224 @@ | ||||
| import re | ||||
| import warnings | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, fill_value", | ||||
|     [ | ||||
|         ("int", 0), | ||||
|         ("float", np.nan), | ||||
|         ("bool", False), | ||||
|         ("object", np.nan), | ||||
|         ("datetime64[ns]", np.datetime64("NaT", "ns")), | ||||
|         ("timedelta64[ns]", np.timedelta64("NaT", "ns")), | ||||
|     ], | ||||
| ) | ||||
| def test_inferred_dtype(dtype, fill_value): | ||||
|     sparse_dtype = SparseDtype(dtype) | ||||
|     result = sparse_dtype.fill_value | ||||
|     if pd.isna(fill_value): | ||||
|         assert pd.isna(result) and type(result) == type(fill_value) | ||||
|     else: | ||||
|         assert result == fill_value | ||||
|  | ||||
|  | ||||
| def test_from_sparse_dtype(): | ||||
|     dtype = SparseDtype("float", 0) | ||||
|     result = SparseDtype(dtype) | ||||
|     assert result.fill_value == 0 | ||||
|  | ||||
|  | ||||
| def test_from_sparse_dtype_fill_value(): | ||||
|     dtype = SparseDtype("int", 1) | ||||
|     result = SparseDtype(dtype, fill_value=2) | ||||
|     expected = SparseDtype("int", 2) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, fill_value", | ||||
|     [ | ||||
|         ("int", None), | ||||
|         ("float", None), | ||||
|         ("bool", None), | ||||
|         ("object", None), | ||||
|         ("datetime64[ns]", None), | ||||
|         ("timedelta64[ns]", None), | ||||
|         ("int", np.nan), | ||||
|         ("float", 0), | ||||
|     ], | ||||
| ) | ||||
| def test_equal(dtype, fill_value): | ||||
|     a = SparseDtype(dtype, fill_value) | ||||
|     b = SparseDtype(dtype, fill_value) | ||||
|     assert a == b | ||||
|     assert b == a | ||||
|  | ||||
|  | ||||
| def test_nans_equal(): | ||||
|     a = SparseDtype(float, float("nan")) | ||||
|     b = SparseDtype(float, np.nan) | ||||
|     assert a == b | ||||
|     assert b == a | ||||
|  | ||||
|  | ||||
| with warnings.catch_warnings(): | ||||
|     msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" | ||||
|     warnings.filterwarnings("ignore", msg, category=FutureWarning) | ||||
|  | ||||
|     tups = [ | ||||
|         (SparseDtype("float64"), SparseDtype("float32")), | ||||
|         (SparseDtype("float64"), SparseDtype("float64", 0)), | ||||
|         (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), | ||||
|         (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), | ||||
|         (SparseDtype("float64"), np.dtype("float64")), | ||||
|     ] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     tups, | ||||
| ) | ||||
| def test_not_equal(a, b): | ||||
|     assert a != b | ||||
|  | ||||
|  | ||||
| def test_construct_from_string_raises(): | ||||
|     with pytest.raises( | ||||
|         TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'" | ||||
|     ): | ||||
|         SparseDtype.construct_from_string("not a dtype") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         (SparseDtype(int), True), | ||||
|         (SparseDtype(float), True), | ||||
|         (SparseDtype(bool), True), | ||||
|         (SparseDtype(object), False), | ||||
|         (SparseDtype(str), False), | ||||
|     ], | ||||
| ) | ||||
| def test_is_numeric(dtype, expected): | ||||
|     assert dtype._is_numeric is expected | ||||
|  | ||||
|  | ||||
| def test_str_uses_object(): | ||||
|     result = SparseDtype(str).subtype | ||||
|     assert result == np.dtype("object") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string, expected", | ||||
|     [ | ||||
|         ("Sparse[float64]", SparseDtype(np.dtype("float64"))), | ||||
|         ("Sparse[float32]", SparseDtype(np.dtype("float32"))), | ||||
|         ("Sparse[int]", SparseDtype(np.dtype("int"))), | ||||
|         ("Sparse[str]", SparseDtype(np.dtype("str"))), | ||||
|         ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))), | ||||
|         ("Sparse", SparseDtype(np.dtype("float"), np.nan)), | ||||
|     ], | ||||
| ) | ||||
| def test_construct_from_string(string, expected): | ||||
|     result = SparseDtype.construct_from_string(string) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b, expected", | ||||
|     [ | ||||
|         (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True), | ||||
|         (SparseDtype(int, 0), SparseDtype(int, 0), True), | ||||
|         (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True), | ||||
|         (SparseDtype(float, 0), SparseDtype(float, np.nan), False), | ||||
|         (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False), | ||||
|     ], | ||||
| ) | ||||
| def test_hash_equal(a, b, expected): | ||||
|     result = a == b | ||||
|     assert result is expected | ||||
|  | ||||
|     result = hash(a) == hash(b) | ||||
|     assert result is expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string, expected", | ||||
|     [ | ||||
|         ("Sparse[int]", "int"), | ||||
|         ("Sparse[int, 0]", "int"), | ||||
|         ("Sparse[int64]", "int64"), | ||||
|         ("Sparse[int64, 0]", "int64"), | ||||
|         ("Sparse[datetime64[ns], 0]", "datetime64[ns]"), | ||||
|     ], | ||||
| ) | ||||
| def test_parse_subtype(string, expected): | ||||
|     subtype, _ = SparseDtype._parse_subtype(string) | ||||
|     assert subtype == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"] | ||||
| ) | ||||
| def test_construct_from_string_fill_value_raises(string): | ||||
|     with pytest.raises(TypeError, match="fill_value in the string is not"): | ||||
|         SparseDtype.construct_from_string(string) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "original, dtype, expected", | ||||
|     [ | ||||
|         (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), | ||||
|         (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), | ||||
|         (SparseDtype(int, 1), np.str_, SparseDtype(object, "1")), | ||||
|         (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), | ||||
|     ], | ||||
| ) | ||||
| def test_update_dtype(original, dtype, expected): | ||||
|     result = original.update_dtype(dtype) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "original, dtype, expected_error_msg", | ||||
|     [ | ||||
|         ( | ||||
|             SparseDtype(float, np.nan), | ||||
|             int, | ||||
|             re.escape("Cannot convert non-finite values (NA or inf) to integer"), | ||||
|         ), | ||||
|         ( | ||||
|             SparseDtype(str, "abc"), | ||||
|             int, | ||||
|             r"invalid literal for int\(\) with base 10: ('abc'|np\.str_\('abc'\))", | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_update_dtype_raises(original, dtype, expected_error_msg): | ||||
|     with pytest.raises(ValueError, match=expected_error_msg): | ||||
|         original.update_dtype(dtype) | ||||
|  | ||||
|  | ||||
| def test_repr(): | ||||
|     # GH-34352 | ||||
|     result = str(SparseDtype("int64", fill_value=0)) | ||||
|     expected = "Sparse[int64, 0]" | ||||
|     assert result == expected | ||||
|  | ||||
|     result = str(SparseDtype(object, fill_value="0")) | ||||
|     expected = "Sparse[object, '0']" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_sparse_dtype_subtype_must_be_numpy_dtype(): | ||||
|     # GH#53160 | ||||
|     msg = "SparseDtype subtype must be a numpy dtype" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         SparseDtype("category", fill_value="c") | ||||
| @ -0,0 +1,302 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr_data(): | ||||
|     return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr(arr_data): | ||||
|     return SparseArray(arr_data) | ||||
|  | ||||
|  | ||||
| class TestGetitem: | ||||
|     def test_getitem(self, arr): | ||||
|         dense = arr.to_dense() | ||||
|         for i, value in enumerate(arr): | ||||
|             tm.assert_almost_equal(value, dense[i]) | ||||
|             tm.assert_almost_equal(arr[-i], dense[-i]) | ||||
|  | ||||
|     def test_getitem_arraylike_mask(self, arr): | ||||
|         arr = SparseArray([0, 1, 2]) | ||||
|         result = arr[[True, False, True]] | ||||
|         expected = SparseArray([0, 2]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "slc", | ||||
|         [ | ||||
|             np.s_[:], | ||||
|             np.s_[1:10], | ||||
|             np.s_[1:100], | ||||
|             np.s_[10:1], | ||||
|             np.s_[:-3], | ||||
|             np.s_[-5:-4], | ||||
|             np.s_[:-12], | ||||
|             np.s_[-12:], | ||||
|             np.s_[2:], | ||||
|             np.s_[2::3], | ||||
|             np.s_[::2], | ||||
|             np.s_[::-1], | ||||
|             np.s_[::-2], | ||||
|             np.s_[1:6:2], | ||||
|             np.s_[:-6:-2], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []] | ||||
|     ) | ||||
|     def test_getslice(self, slc, as_dense): | ||||
|         as_dense = np.array(as_dense) | ||||
|         arr = SparseArray(as_dense) | ||||
|  | ||||
|         result = arr[slc] | ||||
|         expected = SparseArray(as_dense[slc]) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_getslice_tuple(self): | ||||
|         dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) | ||||
|  | ||||
|         sparse = SparseArray(dense) | ||||
|         res = sparse[(slice(4, None),)] | ||||
|         exp = SparseArray(dense[4:]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         sparse = SparseArray(dense, fill_value=0) | ||||
|         res = sparse[(slice(4, None),)] | ||||
|         exp = SparseArray(dense[4:], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         msg = "too many indices for array" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse[4:, :] | ||||
|  | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             # check numpy compat | ||||
|             dense[4:, :] | ||||
|  | ||||
|     def test_boolean_slice_empty(self): | ||||
|         arr = SparseArray([0, 1, 2]) | ||||
|         res = arr[[False, False, False]] | ||||
|         assert res.dtype == arr.dtype | ||||
|  | ||||
|     def test_getitem_bool_sparse_array(self, arr): | ||||
|         # GH 23122 | ||||
|         spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) | ||||
|         exp = SparseArray([np.nan, 2, np.nan, 5, 6]) | ||||
|         tm.assert_sp_array_equal(arr[spar_bool], exp) | ||||
|  | ||||
|         spar_bool = ~spar_bool | ||||
|         res = arr[spar_bool] | ||||
|         exp = SparseArray([np.nan, 1, 3, 4, np.nan]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         spar_bool = SparseArray( | ||||
|             [False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan | ||||
|         ) | ||||
|         res = arr[spar_bool] | ||||
|         exp = SparseArray([np.nan, 3, 5]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_getitem_bool_sparse_array_as_comparison(self): | ||||
|         # GH 45110 | ||||
|         arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan) | ||||
|         res = arr[arr > 2] | ||||
|         exp = SparseArray([3.0, 4.0], fill_value=np.nan) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_get_item(self, arr): | ||||
|         zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|  | ||||
|         assert np.isnan(arr[1]) | ||||
|         assert arr[2] == 1 | ||||
|         assert arr[7] == 5 | ||||
|  | ||||
|         assert zarr[0] == 0 | ||||
|         assert zarr[2] == 1 | ||||
|         assert zarr[7] == 5 | ||||
|  | ||||
|         errmsg = "must be an integer between -10 and 10" | ||||
|  | ||||
|         with pytest.raises(IndexError, match=errmsg): | ||||
|             arr[11] | ||||
|  | ||||
|         with pytest.raises(IndexError, match=errmsg): | ||||
|             arr[-11] | ||||
|  | ||||
|         assert arr[-1] == arr[len(arr) - 1] | ||||
|  | ||||
|  | ||||
| class TestSetitem: | ||||
|     def test_set_item(self, arr_data): | ||||
|         arr = SparseArray(arr_data).copy() | ||||
|  | ||||
|         def setitem(): | ||||
|             arr[5] = 3 | ||||
|  | ||||
|         def setslice(): | ||||
|             arr[1:5] = 2 | ||||
|  | ||||
|         with pytest.raises(TypeError, match="assignment via setitem"): | ||||
|             setitem() | ||||
|  | ||||
|         with pytest.raises(TypeError, match="assignment via setitem"): | ||||
|             setslice() | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take_scalar_raises(self, arr): | ||||
|         msg = "'indices' must be an array, not a scalar '2'." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.take(2) | ||||
|  | ||||
|     def test_take(self, arr_data, arr): | ||||
|         exp = SparseArray(np.take(arr_data, [2, 3])) | ||||
|         tm.assert_sp_array_equal(arr.take([2, 3]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(arr_data, [0, 1, 2])) | ||||
|         tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp) | ||||
|  | ||||
|     def test_take_all_empty(self): | ||||
|         sparse = pd.array([0, 0], dtype=SparseDtype("int64")) | ||||
|         result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan) | ||||
|         tm.assert_sp_array_equal(sparse, result) | ||||
|  | ||||
|     def test_take_different_fill_value(self): | ||||
|         # Take with a different fill value shouldn't overwrite the original | ||||
|         sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0)) | ||||
|         result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan) | ||||
|         expected = pd.array([0, np.nan], dtype=sparse.dtype) | ||||
|         tm.assert_sp_array_equal(expected, result) | ||||
|  | ||||
|     def test_take_fill_value(self): | ||||
|         data = np.array([1, np.nan, 0, 3, 0]) | ||||
|         sparse = SparseArray(data, fill_value=0) | ||||
|  | ||||
|         exp = SparseArray(np.take(data, [0]), fill_value=0) | ||||
|         tm.assert_sp_array_equal(sparse.take([0]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) | ||||
|         tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) | ||||
|  | ||||
|     def test_take_negative(self, arr_data, arr): | ||||
|         exp = SparseArray(np.take(arr_data, [-1])) | ||||
|         tm.assert_sp_array_equal(arr.take([-1]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(arr_data, [-4, -3, -2])) | ||||
|         tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp) | ||||
|  | ||||
|     def test_bad_take(self, arr): | ||||
|         with pytest.raises(IndexError, match="bounds"): | ||||
|             arr.take([11]) | ||||
|  | ||||
|     def test_take_filling(self): | ||||
|         # similar tests as GH 12631 | ||||
|         sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([np.nan, np.nan, 4]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # TODO: actionable? | ||||
|         # XXX: test change: fill_value=True -> allow_fill=True | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=True) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = SparseArray([np.nan, np.nan, 4]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "Invalid value in 'indices'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -2]), allow_fill=True) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -5]), allow_fill=True) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), allow_fill=True) | ||||
|  | ||||
|     def test_take_filling_fill_value(self): | ||||
|         # same tests as GH#12631 | ||||
|         sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([0, np.nan, 4], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=True) | ||||
|         # TODO: actionable? | ||||
|         # XXX: behavior change. | ||||
|         # the old way of filling self.fill_value doesn't follow EA rules. | ||||
|         # It's supposed to be self.dtype.na_value (nan in this case) | ||||
|         expected = SparseArray([0, np.nan, np.nan], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = SparseArray([0, np.nan, 4], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "Invalid value in 'indices'." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -2]), allow_fill=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -5]), allow_fill=True) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), fill_value=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["block", "integer"]) | ||||
|     def test_take_filling_all_nan(self, kind): | ||||
|         sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         result = sparse.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), fill_value=True) | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where_retain_fill_value(self): | ||||
|         # GH#45691 don't lose fill_value on _where | ||||
|         arr = SparseArray([np.nan, 1.0], fill_value=0) | ||||
|  | ||||
|         mask = np.array([True, False]) | ||||
|  | ||||
|         res = arr._where(~mask, 1) | ||||
|         exp = SparseArray([1, 1.0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         ser = pd.Series(arr) | ||||
|         res = ser.where(~mask, 1) | ||||
|         tm.assert_series_equal(res, pd.Series(exp)) | ||||
| @ -0,0 +1,551 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas._libs.sparse as splib | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import ( | ||||
|     BlockIndex, | ||||
|     IntIndex, | ||||
|     make_sparse_index, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def test_length(): | ||||
|     return 20 | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         [ | ||||
|             [0, 7, 15], | ||||
|             [3, 5, 5], | ||||
|             [2, 9, 14], | ||||
|             [2, 3, 5], | ||||
|             [2, 9, 15], | ||||
|             [1, 3, 4], | ||||
|         ], | ||||
|         [ | ||||
|             [0, 5], | ||||
|             [4, 4], | ||||
|             [1], | ||||
|             [4], | ||||
|             [1], | ||||
|             [3], | ||||
|         ], | ||||
|         [ | ||||
|             [0], | ||||
|             [10], | ||||
|             [0, 5], | ||||
|             [3, 7], | ||||
|             [0, 5], | ||||
|             [3, 5], | ||||
|         ], | ||||
|         [ | ||||
|             [10], | ||||
|             [5], | ||||
|             [0, 12], | ||||
|             [5, 3], | ||||
|             [12], | ||||
|             [3], | ||||
|         ], | ||||
|         [ | ||||
|             [0, 10], | ||||
|             [4, 6], | ||||
|             [5, 17], | ||||
|             [4, 2], | ||||
|             [], | ||||
|             [], | ||||
|         ], | ||||
|         [ | ||||
|             [0], | ||||
|             [5], | ||||
|             [], | ||||
|             [], | ||||
|             [], | ||||
|             [], | ||||
|         ], | ||||
|     ], | ||||
|     ids=[ | ||||
|         "plain_case", | ||||
|         "delete_blocks", | ||||
|         "split_blocks", | ||||
|         "skip_block", | ||||
|         "no_intersect", | ||||
|         "one_empty", | ||||
|     ], | ||||
| ) | ||||
| def cases(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSparseIndexUnion: | ||||
|     @pytest.mark.parametrize( | ||||
|         "xloc, xlen, yloc, ylen, eloc, elen", | ||||
|         [ | ||||
|             [[0], [5], [5], [4], [0], [9]], | ||||
|             [[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]], | ||||
|             [[1], [5], [3], [5], [1], [7]], | ||||
|             [[2, 10], [4, 4], [4], [8], [2], [12]], | ||||
|             [[0, 5], [3, 5], [0], [7], [0], [10]], | ||||
|             [[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]], | ||||
|             [[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]], | ||||
|             [[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]], | ||||
|         ], | ||||
|     ) | ||||
|     def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen, test_length): | ||||
|         # Case 1 | ||||
|         # x: ---- | ||||
|         # y:     ---- | ||||
|         # r: -------- | ||||
|         # Case 2 | ||||
|         # x: -----     ----- | ||||
|         # y:   -----          -- | ||||
|         # Case 3 | ||||
|         # x: ------ | ||||
|         # y:    ------- | ||||
|         # r: ---------- | ||||
|         # Case 4 | ||||
|         # x: ------  ----- | ||||
|         # y:    ------- | ||||
|         # r: ------------- | ||||
|         # Case 5 | ||||
|         # x: ---  ----- | ||||
|         # y: ------- | ||||
|         # r: ------------- | ||||
|         # Case 6 | ||||
|         # x: ------  ----- | ||||
|         # y:    -------  --- | ||||
|         # r: ------------- | ||||
|         # Case 7 | ||||
|         # x: ---------------------- | ||||
|         # y:   ----  ----   --- | ||||
|         # r: ---------------------- | ||||
|         # Case 8 | ||||
|         # x: ----       --- | ||||
|         # y:       ---       --- | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|         bresult = xindex.make_union(yindex) | ||||
|         assert isinstance(bresult, BlockIndex) | ||||
|         tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32)) | ||||
|  | ||||
|         ixindex = xindex.to_int_index() | ||||
|         iyindex = yindex.to_int_index() | ||||
|         iresult = ixindex.make_union(iyindex) | ||||
|         assert isinstance(iresult, IntIndex) | ||||
|         tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) | ||||
|  | ||||
|     def test_int_index_make_union(self): | ||||
|         a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 2], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 2], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 2], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([0, 1], dtype=np.int32)) | ||||
|         b = IntIndex(4, np.array([0, 1], dtype=np.int32)) | ||||
|  | ||||
|         msg = "Indices must reference same underlying length" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             a.make_union(b) | ||||
|  | ||||
|  | ||||
| class TestSparseIndexIntersect: | ||||
|     @td.skip_if_windows | ||||
|     def test_intersect(self, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, eloc, elen = cases | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|         expected = BlockIndex(test_length, eloc, elen) | ||||
|         longer_index = BlockIndex(test_length + 1, yloc, ylen) | ||||
|  | ||||
|         result = xindex.intersect(yindex) | ||||
|         assert result.equals(expected) | ||||
|         result = xindex.to_int_index().intersect(yindex.to_int_index()) | ||||
|         assert result.equals(expected.to_int_index()) | ||||
|  | ||||
|         msg = "Indices must reference same underlying length" | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             xindex.intersect(longer_index) | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             xindex.to_int_index().intersect(longer_index.to_int_index()) | ||||
|  | ||||
|     def test_intersect_empty(self): | ||||
|         xindex = IntIndex(4, np.array([], dtype=np.int32)) | ||||
|         yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) | ||||
|         assert xindex.intersect(yindex).equals(xindex) | ||||
|         assert yindex.intersect(xindex).equals(xindex) | ||||
|  | ||||
|         xindex = xindex.to_block_index() | ||||
|         yindex = yindex.to_block_index() | ||||
|         assert xindex.intersect(yindex).equals(xindex) | ||||
|         assert yindex.intersect(xindex).equals(xindex) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", | ||||
|         [ | ||||
|             # Argument 2 to "IntIndex" has incompatible type "ndarray[Any, | ||||
|             # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]" | ||||
|             IntIndex(5, np.array([1, 2], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(0, np.array([], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(5, np.array([], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|         ], | ||||
|     ) | ||||
|     def test_intersect_identical(self, case): | ||||
|         assert case.intersect(case).equals(case) | ||||
|         case = case.to_block_index() | ||||
|         assert case.intersect(case).equals(case) | ||||
|  | ||||
|  | ||||
| class TestSparseIndexCommon: | ||||
|     def test_int_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index( | ||||
|             4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" | ||||
|         ) | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) | ||||
|  | ||||
|     def test_block_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 3 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_lookup(self, kind): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == -1 | ||||
|         assert idx.lookup(1) == -1 | ||||
|         assert idx.lookup(2) == 0 | ||||
|         assert idx.lookup(3) == 1 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) | ||||
|  | ||||
|         for i in range(-1, 5): | ||||
|             assert idx.lookup(i) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == 0 | ||||
|         assert idx.lookup(1) == 1 | ||||
|         assert idx.lookup(2) == 2 | ||||
|         assert idx.lookup(3) == 3 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == 0 | ||||
|         assert idx.lookup(1) == -1 | ||||
|         assert idx.lookup(2) == 1 | ||||
|         assert idx.lookup(3) == 2 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_lookup_array(self, kind): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, 0], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) | ||||
|         exp = np.array([-1, 0, -1, 1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, -1, -1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) | ||||
|         exp = np.array([-1, 0, 2], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) | ||||
|         exp = np.array([-1, 2, 1, 3], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) | ||||
|         exp = np.array([1, -1, 2, 0], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, 1, -1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx, expected", | ||||
|         [ | ||||
|             [0, -1], | ||||
|             [5, 0], | ||||
|             [7, 2], | ||||
|             [8, -1], | ||||
|             [9, -1], | ||||
|             [10, -1], | ||||
|             [11, -1], | ||||
|             [12, 3], | ||||
|             [17, 8], | ||||
|             [18, -1], | ||||
|         ], | ||||
|     ) | ||||
|     def test_lookup_basics(self, idx, expected): | ||||
|         bindex = BlockIndex(20, [5, 12], [3, 6]) | ||||
|         assert bindex.lookup(idx) == expected | ||||
|  | ||||
|         iindex = bindex.to_int_index() | ||||
|         assert iindex.lookup(idx) == expected | ||||
|  | ||||
|  | ||||
| class TestBlockIndex: | ||||
|     def test_block_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 3 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) | ||||
|  | ||||
|     @pytest.mark.parametrize("i", [5, 10, 100, 101]) | ||||
|     def test_make_block_boundary(self, i): | ||||
|         idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") | ||||
|  | ||||
|         exp = np.arange(0, i, 2, dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(idx.blocs, exp) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) | ||||
|  | ||||
|     def test_equals(self): | ||||
|         index = BlockIndex(10, [0, 4], [2, 5]) | ||||
|  | ||||
|         assert index.equals(index) | ||||
|         assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) | ||||
|  | ||||
|     def test_check_integrity(self): | ||||
|         locs = [] | ||||
|         lengths = [] | ||||
|  | ||||
|         # 0-length OK | ||||
|         BlockIndex(0, locs, lengths) | ||||
|  | ||||
|         # also OK even though empty | ||||
|         BlockIndex(1, locs, lengths) | ||||
|  | ||||
|         msg = "Block 0 extends beyond end" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             BlockIndex(10, [5], [10]) | ||||
|  | ||||
|         msg = "Block 0 overlaps" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             BlockIndex(10, [2, 5], [5, 3]) | ||||
|  | ||||
|     def test_to_int_index(self): | ||||
|         locs = [0, 10] | ||||
|         lengths = [4, 6] | ||||
|         exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] | ||||
|  | ||||
|         block = BlockIndex(20, locs, lengths) | ||||
|         dense = block.to_int_index() | ||||
|  | ||||
|         tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32)) | ||||
|  | ||||
|     def test_to_block_index(self): | ||||
|         index = BlockIndex(10, [0, 5], [4, 5]) | ||||
|         assert index.to_block_index() is index | ||||
|  | ||||
|  | ||||
| class TestIntIndex: | ||||
|     def test_check_integrity(self): | ||||
|         # Too many indices than specified in self.length | ||||
|         msg = "Too many indices" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=1, indices=[1, 2, 3]) | ||||
|  | ||||
|         # No index can be negative. | ||||
|         msg = "No index can be less than zero" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, -2, 3]) | ||||
|  | ||||
|         # No index can be negative. | ||||
|         msg = "No index can be less than zero" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, -2, 3]) | ||||
|  | ||||
|         # All indices must be less than the length. | ||||
|         msg = "All indices must be less than the length" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 2, 5]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 2, 6]) | ||||
|  | ||||
|         # Indices must be strictly ascending. | ||||
|         msg = "Indices must be strictly increasing" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 3, 2]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 3, 3]) | ||||
|  | ||||
|     def test_int_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index( | ||||
|             4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" | ||||
|         ) | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) | ||||
|  | ||||
|     def test_equals(self): | ||||
|         index = IntIndex(10, [0, 1, 2, 3, 4]) | ||||
|         assert index.equals(index) | ||||
|         assert not index.equals(IntIndex(10, [0, 1, 2, 3])) | ||||
|  | ||||
|     def test_to_block_index(self, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, _, _ = cases | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|  | ||||
|         # see if survive the round trip | ||||
|         xbindex = xindex.to_int_index().to_block_index() | ||||
|         ybindex = yindex.to_int_index().to_block_index() | ||||
|         assert isinstance(xbindex, BlockIndex) | ||||
|         assert xbindex.equals(xindex) | ||||
|         assert ybindex.equals(yindex) | ||||
|  | ||||
|     def test_to_int_index(self): | ||||
|         index = IntIndex(10, [2, 3, 4, 5, 6]) | ||||
|         assert index.to_int_index() is index | ||||
|  | ||||
|  | ||||
| class TestSparseOperators: | ||||
|     @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) | ||||
|     def test_op(self, opname, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, _, _ = cases | ||||
|         sparse_op = getattr(splib, f"sparse_{opname}_float64") | ||||
|         python_op = getattr(operator, opname) | ||||
|  | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|  | ||||
|         xdindex = xindex.to_int_index() | ||||
|         ydindex = yindex.to_int_index() | ||||
|  | ||||
|         x = np.arange(xindex.npoints) * 10.0 + 1 | ||||
|         y = np.arange(yindex.npoints) * 100.0 + 1 | ||||
|  | ||||
|         xfill = 0 | ||||
|         yfill = 2 | ||||
|  | ||||
|         result_block_vals, rb_index, bfill = sparse_op( | ||||
|             x, xindex, xfill, y, yindex, yfill | ||||
|         ) | ||||
|         result_int_vals, ri_index, ifill = sparse_op( | ||||
|             x, xdindex, xfill, y, ydindex, yfill | ||||
|         ) | ||||
|  | ||||
|         assert rb_index.to_int_index().equals(ri_index) | ||||
|         tm.assert_numpy_array_equal(result_block_vals, result_int_vals) | ||||
|         assert bfill == ifill | ||||
|  | ||||
|         # check versus Series... | ||||
|         xseries = Series(x, xdindex.indices) | ||||
|         xseries = xseries.reindex(np.arange(test_length)).fillna(xfill) | ||||
|  | ||||
|         yseries = Series(y, ydindex.indices) | ||||
|         yseries = yseries.reindex(np.arange(test_length)).fillna(yfill) | ||||
|  | ||||
|         series_result = python_op(xseries, yseries) | ||||
|         series_result = series_result.reindex(ri_index.indices) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result_block_vals, series_result.values) | ||||
|         tm.assert_numpy_array_equal(result_int_vals, series_result.values) | ||||
| @ -0,0 +1,306 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NaT, | ||||
|     SparseDtype, | ||||
|     Timestamp, | ||||
|     isna, | ||||
| ) | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestReductions: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([True, True, True], True, False), | ||||
|             ([1, 2, 1], 1, 0), | ||||
|             ([1.0, 2.0, 1.0], 1.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_all(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = SparseArray(data).all() | ||||
|         assert out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).all() | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = SparseArray(data).all() | ||||
|         assert not out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).all() | ||||
|         assert not out | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([True, True, True], True, False), | ||||
|             ([1, 2, 1], 1, 0), | ||||
|             ([1.0, 2.0, 1.0], 1.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numpy_all(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = np.all(SparseArray(data)) | ||||
|         assert out | ||||
|  | ||||
|         out = np.all(SparseArray(data, fill_value=pos)) | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = np.all(SparseArray(data)) | ||||
|         assert not out | ||||
|  | ||||
|         out = np.all(SparseArray(data, fill_value=pos)) | ||||
|         assert not out | ||||
|  | ||||
|         # raises with a different message on py2. | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.all(SparseArray(data), out=np.array([])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([False, True, False], True, False), | ||||
|             ([0, 2, 0], 2, 0), | ||||
|             ([0.0, 2.0, 0.0], 2.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_any(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = SparseArray(data).any() | ||||
|         assert out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).any() | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = SparseArray(data).any() | ||||
|         assert not out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).any() | ||||
|         assert not out | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([False, True, False], True, False), | ||||
|             ([0, 2, 0], 2, 0), | ||||
|             ([0.0, 2.0, 0.0], 2.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numpy_any(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = np.any(SparseArray(data)) | ||||
|         assert out | ||||
|  | ||||
|         out = np.any(SparseArray(data, fill_value=pos)) | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = np.any(SparseArray(data)) | ||||
|         assert not out | ||||
|  | ||||
|         out = np.any(SparseArray(data, fill_value=pos)) | ||||
|         assert not out | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.any(SparseArray(data), out=out) | ||||
|  | ||||
|     def test_sum(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = SparseArray(data).sum() | ||||
|         assert out == 45.0 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = SparseArray(data, fill_value=2).sum() | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         out = SparseArray(data, fill_value=np.nan).sum() | ||||
|         assert out == 40.0 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr", | ||||
|         [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("fill_value", [0, 1, np.nan]) | ||||
|     @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) | ||||
|     def test_sum_min_count(self, arr, fill_value, min_count, expected): | ||||
|         # GH#25777 | ||||
|         sparray = SparseArray(arr, fill_value=fill_value) | ||||
|         result = sparray.sum(min_count=min_count) | ||||
|         if np.isnan(expected): | ||||
|             assert np.isnan(result) | ||||
|         else: | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_bool_sum_min_count(self): | ||||
|         spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) | ||||
|         res = spar_bool.sum(min_count=1) | ||||
|         assert res == 5 | ||||
|         res = spar_bool.sum(min_count=11) | ||||
|         assert isna(res) | ||||
|  | ||||
|     def test_numpy_sum(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = np.sum(SparseArray(data)) | ||||
|         assert out == 45.0 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = np.sum(SparseArray(data, fill_value=2)) | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         out = np.sum(SparseArray(data, fill_value=np.nan)) | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         msg = "the 'dtype' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.sum(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.sum(SparseArray(data), out=out) | ||||
|  | ||||
|     def test_mean(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = SparseArray(data).mean() | ||||
|         assert out == 4.5 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = SparseArray(data).mean() | ||||
|         assert out == 40.0 / 9 | ||||
|  | ||||
|     def test_numpy_mean(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = np.mean(SparseArray(data)) | ||||
|         assert out == 4.5 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = np.mean(SparseArray(data)) | ||||
|         assert out == 40.0 / 9 | ||||
|  | ||||
|         msg = "the 'dtype' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.mean(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.mean(SparseArray(data), out=out) | ||||
|  | ||||
|  | ||||
| class TestMinMax: | ||||
|     @pytest.mark.parametrize( | ||||
|         "raw_data,max_expected,min_expected", | ||||
|         [ | ||||
|             (np.arange(5.0), [4], [0]), | ||||
|             (-np.arange(5.0), [0], [-4]), | ||||
|             (np.array([0, 1, 2, np.nan, 4]), [4], [0]), | ||||
|             (np.array([np.nan] * 5), [np.nan], [np.nan]), | ||||
|             (np.array([]), [np.nan], [np.nan]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_nan_fill_value(self, raw_data, max_expected, min_expected): | ||||
|         arr = SparseArray(raw_data) | ||||
|         max_result = arr.max() | ||||
|         min_result = arr.min() | ||||
|         assert max_result in max_expected | ||||
|         assert min_result in min_expected | ||||
|  | ||||
|         max_result = arr.max(skipna=False) | ||||
|         min_result = arr.min(skipna=False) | ||||
|         if np.isnan(raw_data).any(): | ||||
|             assert np.isnan(max_result) | ||||
|             assert np.isnan(min_result) | ||||
|         else: | ||||
|             assert max_result in max_expected | ||||
|             assert min_result in min_expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_value,max_expected,min_expected", | ||||
|         [ | ||||
|             (100, 100, 0), | ||||
|             (-100, 1, -100), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fill_value(self, fill_value, max_expected, min_expected): | ||||
|         arr = SparseArray( | ||||
|             np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value) | ||||
|         ) | ||||
|         max_result = arr.max() | ||||
|         assert max_result == max_expected | ||||
|  | ||||
|         min_result = arr.min() | ||||
|         assert min_result == min_expected | ||||
|  | ||||
|     def test_only_fill_value(self): | ||||
|         fv = 100 | ||||
|         arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv)) | ||||
|         assert len(arr._valid_sp_values) == 0 | ||||
|  | ||||
|         assert arr.max() == fv | ||||
|         assert arr.min() == fv | ||||
|         assert arr.max(skipna=False) == fv | ||||
|         assert arr.min(skipna=False) == fv | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["min", "max"]) | ||||
|     @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype,expected", | ||||
|         [ | ||||
|             (SparseDtype(np.float64, np.nan), np.nan), | ||||
|             (SparseDtype(np.float64, 5.0), np.nan), | ||||
|             (SparseDtype("datetime64[ns]", NaT), NaT), | ||||
|             (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_na_value_if_no_valid_values(self, func, data, dtype, expected): | ||||
|         arr = SparseArray(data, dtype=dtype) | ||||
|         result = getattr(arr, func)() | ||||
|         if expected is NaT: | ||||
|             # TODO: pin down whether we wrap datetime64("NaT") | ||||
|             assert result is NaT or np.isnat(result) | ||||
|         else: | ||||
|             assert np.isnan(result) | ||||
|  | ||||
|  | ||||
| class TestArgmaxArgmin: | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr,argmax_expected,argmin_expected", | ||||
|         [ | ||||
|             (SparseArray([1, 2, 0, 1, 2]), 1, 2), | ||||
|             (SparseArray([-1, -2, 0, -1, -2]), 2, 1), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10), | ||||
|             (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0), | ||||
|             (SparseArray([1] + [0] * 10, fill_value=0), 0, 1), | ||||
|             (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0), | ||||
|             (SparseArray([1] + [0] * 10, fill_value=1), 0, 1), | ||||
|         ], | ||||
|     ) | ||||
|     def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): | ||||
|         argmax_result = arr.argmax() | ||||
|         argmin_result = arr.argmin() | ||||
|         assert argmax_result == argmax_expected | ||||
|         assert argmin_result == argmin_expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr,method", | ||||
|         [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], | ||||
|     ) | ||||
|     def test_empty_array(self, arr, method): | ||||
|         msg = f"attempt to get {method} of an empty sequence" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.argmax() if method == "argmax" else arr.argmin() | ||||
| @ -0,0 +1,79 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") | ||||
| @pytest.mark.parametrize("fill_value", [0, np.nan]) | ||||
| @pytest.mark.parametrize("op", [operator.pos, operator.neg]) | ||||
| def test_unary_op(op, fill_value): | ||||
|     arr = np.array([0, 1, np.nan, 2]) | ||||
|     sparray = SparseArray(arr, fill_value=fill_value) | ||||
|     result = op(sparray) | ||||
|     expected = SparseArray(op(arr), fill_value=op(fill_value)) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fill_value", [True, False]) | ||||
| def test_invert(fill_value): | ||||
|     arr = np.array([True, False, False, True]) | ||||
|     sparray = SparseArray(arr, fill_value=fill_value) | ||||
|     result = ~sparray | ||||
|     expected = SparseArray(~arr, fill_value=not fill_value) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     result = ~pd.Series(sparray) | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = ~pd.DataFrame({"A": sparray}) | ||||
|     expected = pd.DataFrame({"A": expected}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestUnaryMethods: | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_neg_operator(self): | ||||
|         arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         res = -arr | ||||
|         exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) | ||||
|         res = -arr | ||||
|         exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_abs_operator(self): | ||||
|         arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         res = abs(arr) | ||||
|         exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) | ||||
|         res = abs(arr) | ||||
|         exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|     def test_invert_operator(self): | ||||
|         arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         exp = SparseArray( | ||||
|             np.invert([False, True, False, True]), fill_value=True, dtype=np.bool_ | ||||
|         ) | ||||
|         res = ~arr | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32) | ||||
|         res = ~arr | ||||
|         exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
| @ -0,0 +1,73 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import HAS_PYARROW | ||||
|  | ||||
| from pandas.core.dtypes.cast import find_common_type | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.util.version import Version | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "to_concat_dtypes, result_dtype", | ||||
|     [ | ||||
|         # same types | ||||
|         ([("pyarrow", pd.NA), ("pyarrow", pd.NA)], ("pyarrow", pd.NA)), | ||||
|         ([("pyarrow", np.nan), ("pyarrow", np.nan)], ("pyarrow", np.nan)), | ||||
|         ([("python", pd.NA), ("python", pd.NA)], ("python", pd.NA)), | ||||
|         ([("python", np.nan), ("python", np.nan)], ("python", np.nan)), | ||||
|         # pyarrow preference | ||||
|         ([("pyarrow", pd.NA), ("python", pd.NA)], ("pyarrow", pd.NA)), | ||||
|         # NA preference | ||||
|         ([("python", pd.NA), ("python", np.nan)], ("python", pd.NA)), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_series(request, to_concat_dtypes, result_dtype): | ||||
|     if any(storage == "pyarrow" for storage, _ in to_concat_dtypes) and not HAS_PYARROW: | ||||
|         pytest.skip("Could not import 'pyarrow'") | ||||
|  | ||||
|     ser_list = [ | ||||
|         pd.Series(["a", "b", None], dtype=pd.StringDtype(storage, na_value)) | ||||
|         for storage, na_value in to_concat_dtypes | ||||
|     ] | ||||
|  | ||||
|     result = pd.concat(ser_list, ignore_index=True) | ||||
|     expected = pd.Series( | ||||
|         ["a", "b", None, "a", "b", None], dtype=pd.StringDtype(*result_dtype) | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # order doesn't matter for result | ||||
|     result = pd.concat(ser_list[::1], ignore_index=True) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_concat_with_object(string_dtype_arguments): | ||||
|     # _get_common_dtype cannot inspect values, so object dtype with strings still | ||||
|     # results in object dtype | ||||
|     result = pd.concat( | ||||
|         [ | ||||
|             pd.Series(["a", "b", None], dtype=pd.StringDtype(*string_dtype_arguments)), | ||||
|             pd.Series(["a", "b", None], dtype=object), | ||||
|         ] | ||||
|     ) | ||||
|     assert result.dtype == np.dtype("object") | ||||
|  | ||||
|  | ||||
| def test_concat_with_numpy(string_dtype_arguments): | ||||
|     # common type with a numpy string dtype always preserves the pandas string dtype | ||||
|     dtype = pd.StringDtype(*string_dtype_arguments) | ||||
|     assert find_common_type([dtype, np.dtype("U")]) == dtype | ||||
|     assert find_common_type([np.dtype("U"), dtype]) == dtype | ||||
|     assert find_common_type([dtype, np.dtype("U10")]) == dtype | ||||
|     assert find_common_type([np.dtype("U10"), dtype]) == dtype | ||||
|  | ||||
|     # with any other numpy dtype -> object | ||||
|     assert find_common_type([dtype, np.dtype("S")]) == np.dtype("object") | ||||
|     assert find_common_type([dtype, np.dtype("int64")]) == np.dtype("object") | ||||
|  | ||||
|     if Version(np.__version__) >= Version("2"): | ||||
|         assert find_common_type([dtype, np.dtypes.StringDType()]) == dtype | ||||
|         assert find_common_type([np.dtypes.StringDType(), dtype]) == dtype | ||||
| @ -0,0 +1,854 @@ | ||||
| """ | ||||
| This module tests the functionality of StringArray and ArrowStringArray. | ||||
| Tests for the str accessors are in pandas/tests/strings/test_string_array.py | ||||
| """ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas.compat import HAS_PYARROW | ||||
| from pandas.compat.pyarrow import ( | ||||
|     pa_version_under12p0, | ||||
|     pa_version_under19p0, | ||||
| ) | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas.core.dtypes.common import is_dtype_equal | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.string_ import StringArrayNumpySemantics | ||||
| from pandas.core.arrays.string_arrow import ( | ||||
|     ArrowStringArray, | ||||
|     ArrowStringArrayNumpySemantics, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def dtype(string_dtype_arguments): | ||||
|     """Fixture giving StringDtype from parametrized storage and na_value arguments""" | ||||
|     storage, na_value = string_dtype_arguments | ||||
|     return pd.StringDtype(storage=storage, na_value=na_value) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def dtype2(string_dtype_arguments2): | ||||
|     storage, na_value = string_dtype_arguments2 | ||||
|     return pd.StringDtype(storage=storage, na_value=na_value) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def cls(dtype): | ||||
|     """Fixture giving array type from parametrized 'dtype'""" | ||||
|     return dtype.construct_array_type() | ||||
|  | ||||
|  | ||||
| def string_dtype_highest_priority(dtype1, dtype2): | ||||
|     if HAS_PYARROW: | ||||
|         DTYPE_HIERARCHY = [ | ||||
|             pd.StringDtype("python", na_value=np.nan), | ||||
|             pd.StringDtype("pyarrow", na_value=np.nan), | ||||
|             pd.StringDtype("python", na_value=pd.NA), | ||||
|             pd.StringDtype("pyarrow", na_value=pd.NA), | ||||
|         ] | ||||
|     else: | ||||
|         DTYPE_HIERARCHY = [ | ||||
|             pd.StringDtype("python", na_value=np.nan), | ||||
|             pd.StringDtype("python", na_value=pd.NA), | ||||
|         ] | ||||
|  | ||||
|     h1 = DTYPE_HIERARCHY.index(dtype1) | ||||
|     h2 = DTYPE_HIERARCHY.index(dtype2) | ||||
|     return DTYPE_HIERARCHY[max(h1, h2)] | ||||
|  | ||||
|  | ||||
| def test_dtype_constructor(): | ||||
|     pytest.importorskip("pyarrow") | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning): | ||||
|         dtype = pd.StringDtype("pyarrow_numpy") | ||||
|     assert dtype == pd.StringDtype("pyarrow", na_value=np.nan) | ||||
|  | ||||
|  | ||||
| def test_dtype_equality(): | ||||
|     pytest.importorskip("pyarrow") | ||||
|  | ||||
|     dtype1 = pd.StringDtype("python") | ||||
|     dtype2 = pd.StringDtype("pyarrow") | ||||
|     dtype3 = pd.StringDtype("pyarrow", na_value=np.nan) | ||||
|  | ||||
|     assert dtype1 == pd.StringDtype("python", na_value=pd.NA) | ||||
|     assert dtype1 != dtype2 | ||||
|     assert dtype1 != dtype3 | ||||
|  | ||||
|     assert dtype2 == pd.StringDtype("pyarrow", na_value=pd.NA) | ||||
|     assert dtype2 != dtype1 | ||||
|     assert dtype2 != dtype3 | ||||
|  | ||||
|     assert dtype3 == pd.StringDtype("pyarrow", na_value=np.nan) | ||||
|     assert dtype3 == pd.StringDtype("pyarrow", na_value=float("nan")) | ||||
|     assert dtype3 != dtype1 | ||||
|     assert dtype3 != dtype2 | ||||
|  | ||||
|  | ||||
| def test_repr(dtype): | ||||
|     df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype=dtype)}) | ||||
|     if dtype.na_value is np.nan: | ||||
|         expected = "     A\n0    a\n1  NaN\n2    b" | ||||
|     else: | ||||
|         expected = "      A\n0     a\n1  <NA>\n2     b" | ||||
|     assert repr(df) == expected | ||||
|  | ||||
|     if dtype.na_value is np.nan: | ||||
|         expected = "0      a\n1    NaN\n2      b\nName: A, dtype: str" | ||||
|     else: | ||||
|         expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string" | ||||
|     assert repr(df.A) == expected | ||||
|  | ||||
|     if dtype.storage == "pyarrow" and dtype.na_value is pd.NA: | ||||
|         arr_name = "ArrowStringArray" | ||||
|         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string" | ||||
|     elif dtype.storage == "pyarrow" and dtype.na_value is np.nan: | ||||
|         arr_name = "ArrowStringArrayNumpySemantics" | ||||
|         expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str" | ||||
|     elif dtype.storage == "python" and dtype.na_value is np.nan: | ||||
|         arr_name = "StringArrayNumpySemantics" | ||||
|         expected = f"<{arr_name}>\n['a', nan, 'b']\nLength: 3, dtype: str" | ||||
|     else: | ||||
|         arr_name = "StringArray" | ||||
|         expected = f"<{arr_name}>\n['a', <NA>, 'b']\nLength: 3, dtype: string" | ||||
|     assert repr(df.A.array) == expected | ||||
|  | ||||
|  | ||||
| def test_none_to_nan(cls, dtype): | ||||
|     a = cls._from_sequence(["a", None, "b"], dtype=dtype) | ||||
|     assert a[1] is not None | ||||
|     assert a[1] is a.dtype.na_value | ||||
|  | ||||
|  | ||||
| def test_setitem_validates(cls, dtype): | ||||
|     arr = cls._from_sequence(["a", "b"], dtype=dtype) | ||||
|  | ||||
|     msg = "Invalid value '10' for dtype 'str" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         arr[0] = 10 | ||||
|  | ||||
|     msg = "Invalid value for dtype 'str" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         arr[:] = np.array([1, 2]) | ||||
|  | ||||
|  | ||||
| def test_setitem_with_scalar_string(dtype): | ||||
|     # is_float_dtype considers some strings, like 'd', to be floats | ||||
|     # which can cause issues. | ||||
|     arr = pd.array(["a", "c"], dtype=dtype) | ||||
|     arr[0] = "d" | ||||
|     expected = pd.array(["d", "c"], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_with_array_with_missing(dtype): | ||||
|     # ensure that when setting with an array of values, we don't mutate the | ||||
|     # array `value` in __setitem__(self, key, value) | ||||
|     arr = pd.array(["a", "b", "c"], dtype=dtype) | ||||
|     value = np.array(["A", None]) | ||||
|     value_orig = value.copy() | ||||
|     arr[[0, 1]] = value | ||||
|  | ||||
|     expected = pd.array(["A", pd.NA, "c"], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(arr, expected) | ||||
|     tm.assert_numpy_array_equal(value, value_orig) | ||||
|  | ||||
|  | ||||
| def test_astype_roundtrip(dtype): | ||||
|     ser = pd.Series(pd.date_range("2000", periods=12)) | ||||
|     ser[0] = None | ||||
|  | ||||
|     casted = ser.astype(dtype) | ||||
|     assert is_dtype_equal(casted.dtype, dtype) | ||||
|  | ||||
|     result = casted.astype("datetime64[ns]") | ||||
|     tm.assert_series_equal(result, ser) | ||||
|  | ||||
|     # GH#38509 same thing for timedelta64 | ||||
|     ser2 = ser - ser.iloc[-1] | ||||
|     casted2 = ser2.astype(dtype) | ||||
|     assert is_dtype_equal(casted2.dtype, dtype) | ||||
|  | ||||
|     result2 = casted2.astype(ser2.dtype) | ||||
|     tm.assert_series_equal(result2, ser2) | ||||
|  | ||||
|  | ||||
| def test_add(dtype): | ||||
|     a = pd.Series(["a", "b", "c", None, None], dtype=dtype) | ||||
|     b = pd.Series(["x", "y", None, "z", None], dtype=dtype) | ||||
|  | ||||
|     result = a + b | ||||
|     expected = pd.Series(["ax", "by", None, None, None], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = a.add(b) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = a.radd(b) | ||||
|     expected = pd.Series(["xa", "yb", None, None, None], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = a.add(b, fill_value="-") | ||||
|     expected = pd.Series(["ax", "by", "c-", "-z", None], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_add_2d(dtype, request): | ||||
|     if dtype.storage == "pyarrow": | ||||
|         reason = "Failed: DID NOT RAISE <class 'ValueError'>" | ||||
|         mark = pytest.mark.xfail(raises=None, reason=reason) | ||||
|         request.applymarker(mark) | ||||
|  | ||||
|     a = pd.array(["a", "b", "c"], dtype=dtype) | ||||
|     b = np.array([["a", "b", "c"]], dtype=object) | ||||
|     with pytest.raises(ValueError, match="3 != 1"): | ||||
|         a + b | ||||
|  | ||||
|     s = pd.Series(a) | ||||
|     with pytest.raises(ValueError, match="3 != 1"): | ||||
|         s + b | ||||
|  | ||||
|  | ||||
| def test_add_sequence(dtype): | ||||
|     a = pd.array(["a", "b", None, None], dtype=dtype) | ||||
|     other = ["x", None, "y", None] | ||||
|  | ||||
|     result = a + other | ||||
|     expected = pd.array(["ax", None, None, None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = other + a | ||||
|     expected = pd.array(["xa", None, None, None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mul(dtype): | ||||
|     a = pd.array(["a", "b", None], dtype=dtype) | ||||
|     result = a * 2 | ||||
|     expected = pd.array(["aa", "bb", None], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     result = 2 * a | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(reason="GH-28527") | ||||
| def test_add_strings(dtype): | ||||
|     arr = pd.array(["a", "b", "c", "d"], dtype=dtype) | ||||
|     df = pd.DataFrame([["t", "y", "v", "w"]], dtype=object) | ||||
|     assert arr.__add__(df) is NotImplemented | ||||
|  | ||||
|     result = arr + df | ||||
|     expected = pd.DataFrame([["at", "by", "cv", "dw"]]).astype(dtype) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df + arr | ||||
|     expected = pd.DataFrame([["ta", "yb", "vc", "wd"]]).astype(dtype) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(reason="GH-28527") | ||||
| def test_add_frame(dtype): | ||||
|     arr = pd.array(["a", "b", np.nan, np.nan], dtype=dtype) | ||||
|     df = pd.DataFrame([["x", np.nan, "y", np.nan]]) | ||||
|  | ||||
|     assert arr.__add__(df) is NotImplemented | ||||
|  | ||||
|     result = arr + df | ||||
|     expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype(dtype) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df + arr | ||||
|     expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype(dtype) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_comparison_methods_scalar(comparison_op, dtype): | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     other = "a" | ||||
|     result = getattr(a, op_name)(other) | ||||
|     if dtype.na_value is np.nan: | ||||
|         expected = np.array([getattr(item, op_name)(other) for item in a]) | ||||
|         if comparison_op == operator.ne: | ||||
|             expected[1] = True | ||||
|         else: | ||||
|             expected[1] = False | ||||
|         tm.assert_numpy_array_equal(result, expected.astype(np.bool_)) | ||||
|     else: | ||||
|         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean" | ||||
|         expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object) | ||||
|         expected = pd.array(expected, dtype=expected_dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_comparison_methods_scalar_pd_na(comparison_op, dtype): | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     result = getattr(a, op_name)(pd.NA) | ||||
|  | ||||
|     if dtype.na_value is np.nan: | ||||
|         if operator.ne == comparison_op: | ||||
|             expected = np.array([True, True, True]) | ||||
|         else: | ||||
|             expected = np.array([False, False, False]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|     else: | ||||
|         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean" | ||||
|         expected = pd.array([None, None, None], dtype=expected_dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_comparison_methods_scalar_not_string(comparison_op, dtype): | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|  | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     other = 42 | ||||
|  | ||||
|     if op_name not in ["__eq__", "__ne__"]: | ||||
|         with pytest.raises(TypeError, match="Invalid comparison|not supported between"): | ||||
|             getattr(a, op_name)(other) | ||||
|  | ||||
|         return | ||||
|  | ||||
|     result = getattr(a, op_name)(other) | ||||
|  | ||||
|     if dtype.na_value is np.nan: | ||||
|         expected_data = { | ||||
|             "__eq__": [False, False, False], | ||||
|             "__ne__": [True, True, True], | ||||
|         }[op_name] | ||||
|         expected = np.array(expected_data) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|     else: | ||||
|         expected_data = {"__eq__": [False, None, False], "__ne__": [True, None, True]}[ | ||||
|             op_name | ||||
|         ] | ||||
|         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean" | ||||
|         expected = pd.array(expected_data, dtype=expected_dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_comparison_methods_array(comparison_op, dtype, dtype2): | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|  | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     other = pd.array([None, None, "c"], dtype=dtype2) | ||||
|     result = comparison_op(a, other) | ||||
|  | ||||
|     # ensure operation is commutative | ||||
|     result2 = comparison_op(other, a) | ||||
|     tm.assert_equal(result, result2) | ||||
|  | ||||
|     if dtype.na_value is np.nan and dtype2.na_value is np.nan: | ||||
|         if operator.ne == comparison_op: | ||||
|             expected = np.array([True, True, False]) | ||||
|         else: | ||||
|             expected = np.array([False, False, False]) | ||||
|             expected[-1] = getattr(other[-1], op_name)(a[-1]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     else: | ||||
|         max_dtype = string_dtype_highest_priority(dtype, dtype2) | ||||
|         if max_dtype.storage == "python": | ||||
|             expected_dtype = "boolean" | ||||
|         else: | ||||
|             expected_dtype = "bool[pyarrow]" | ||||
|  | ||||
|         expected = np.full(len(a), fill_value=None, dtype="object") | ||||
|         expected[-1] = getattr(other[-1], op_name)(a[-1]) | ||||
|         expected = pd.array(expected, dtype=expected_dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_if_no("pyarrow") | ||||
| def test_comparison_methods_array_arrow_extension(comparison_op, dtype2): | ||||
|     # Test pd.ArrowDtype(pa.string()) against other string arrays | ||||
|     import pyarrow as pa | ||||
|  | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|     dtype = pd.ArrowDtype(pa.string()) | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     other = pd.array([None, None, "c"], dtype=dtype2) | ||||
|     result = comparison_op(a, other) | ||||
|  | ||||
|     # ensure operation is commutative | ||||
|     result2 = comparison_op(other, a) | ||||
|     tm.assert_equal(result, result2) | ||||
|  | ||||
|     expected = pd.array([None, None, True], dtype="bool[pyarrow]") | ||||
|     expected[-1] = getattr(other[-1], op_name)(a[-1]) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_comparison_methods_list(comparison_op, dtype): | ||||
|     op_name = f"__{comparison_op.__name__}__" | ||||
|  | ||||
|     a = pd.array(["a", None, "c"], dtype=dtype) | ||||
|     other = [None, None, "c"] | ||||
|     result = comparison_op(a, other) | ||||
|  | ||||
|     # ensure operation is commutative | ||||
|     result2 = comparison_op(other, a) | ||||
|     tm.assert_equal(result, result2) | ||||
|  | ||||
|     if dtype.na_value is np.nan: | ||||
|         if operator.ne == comparison_op: | ||||
|             expected = np.array([True, True, False]) | ||||
|         else: | ||||
|             expected = np.array([False, False, False]) | ||||
|             expected[-1] = getattr(other[-1], op_name)(a[-1]) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     else: | ||||
|         expected_dtype = "boolean[pyarrow]" if dtype.storage == "pyarrow" else "boolean" | ||||
|         expected = np.full(len(a), fill_value=None, dtype="object") | ||||
|         expected[-1] = getattr(other[-1], op_name)(a[-1]) | ||||
|         expected = pd.array(expected, dtype=expected_dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_constructor_raises(cls): | ||||
|     if cls is pd.arrays.StringArray: | ||||
|         msg = "StringArray requires a sequence of strings or pandas.NA" | ||||
|     elif cls is StringArrayNumpySemantics: | ||||
|         msg = "StringArrayNumpySemantics requires a sequence of strings or NaN" | ||||
|     else: | ||||
|         msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         cls(np.array(["a", "b"], dtype="S1")) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         cls(np.array([])) | ||||
|  | ||||
|     if cls is pd.arrays.StringArray or cls is StringArrayNumpySemantics: | ||||
|         # GH#45057 np.nan and None do NOT raise, as they are considered valid NAs | ||||
|         #  for string dtype | ||||
|         cls(np.array(["a", np.nan], dtype=object)) | ||||
|         cls(np.array(["a", None], dtype=object)) | ||||
|     else: | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cls(np.array(["a", np.nan], dtype=object)) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             cls(np.array(["a", None], dtype=object)) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         cls(np.array(["a", pd.NaT], dtype=object)) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         cls(np.array(["a", np.datetime64("NaT", "ns")], dtype=object)) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         cls(np.array(["a", np.timedelta64("NaT", "ns")], dtype=object)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na", [np.nan, np.float64("nan"), float("nan"), None, pd.NA]) | ||||
| def test_constructor_nan_like(na): | ||||
|     expected = pd.arrays.StringArray(np.array(["a", pd.NA])) | ||||
|     tm.assert_extension_array_equal( | ||||
|         pd.arrays.StringArray(np.array(["a", na], dtype="object")), expected | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("copy", [True, False]) | ||||
| def test_from_sequence_no_mutate(copy, cls, dtype): | ||||
|     nan_arr = np.array(["a", np.nan], dtype=object) | ||||
|     expected_input = nan_arr.copy() | ||||
|     na_arr = np.array(["a", pd.NA], dtype=object) | ||||
|  | ||||
|     result = cls._from_sequence(nan_arr, dtype=dtype, copy=copy) | ||||
|  | ||||
|     if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics): | ||||
|         import pyarrow as pa | ||||
|  | ||||
|         expected = cls(pa.array(na_arr, type=pa.string(), from_pandas=True)) | ||||
|     elif cls is StringArrayNumpySemantics: | ||||
|         expected = cls(nan_arr) | ||||
|     else: | ||||
|         expected = cls(na_arr) | ||||
|  | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|     tm.assert_numpy_array_equal(nan_arr, expected_input) | ||||
|  | ||||
|  | ||||
| def test_astype_int(dtype): | ||||
|     arr = pd.array(["1", "2", "3"], dtype=dtype) | ||||
|     result = arr.astype("int64") | ||||
|     expected = np.array([1, 2, 3], dtype="int64") | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     arr = pd.array(["1", pd.NA, "3"], dtype=dtype) | ||||
|     if dtype.na_value is np.nan: | ||||
|         err = ValueError | ||||
|         msg = "cannot convert float NaN to integer" | ||||
|     else: | ||||
|         err = TypeError | ||||
|         msg = ( | ||||
|             r"int\(\) argument must be a string, a bytes-like " | ||||
|             r"object or a( real)? number" | ||||
|         ) | ||||
|     with pytest.raises(err, match=msg): | ||||
|         arr.astype("int64") | ||||
|  | ||||
|  | ||||
| def test_astype_nullable_int(dtype): | ||||
|     arr = pd.array(["1", pd.NA, "3"], dtype=dtype) | ||||
|  | ||||
|     result = arr.astype("Int64") | ||||
|     expected = pd.array([1, pd.NA, 3], dtype="Int64") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_float(dtype, any_float_dtype): | ||||
|     # Don't compare arrays (37974) | ||||
|     ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype) | ||||
|     result = ser.astype(any_float_dtype) | ||||
|     expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| def test_reduce(skipna, dtype): | ||||
|     arr = pd.Series(["a", "b", "c"], dtype=dtype) | ||||
|     result = arr.sum(skipna=skipna) | ||||
|     assert result == "abc" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| def test_reduce_missing(skipna, dtype): | ||||
|     arr = pd.Series([None, "a", None, "b", "c", None], dtype=dtype) | ||||
|     result = arr.sum(skipna=skipna) | ||||
|     if skipna: | ||||
|         assert result == "abc" | ||||
|     else: | ||||
|         assert pd.isna(result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["min", "max"]) | ||||
| @pytest.mark.parametrize("skipna", [True, False]) | ||||
| def test_min_max(method, skipna, dtype): | ||||
|     arr = pd.Series(["a", "b", "c", None], dtype=dtype) | ||||
|     result = getattr(arr, method)(skipna=skipna) | ||||
|     if skipna: | ||||
|         expected = "a" if method == "min" else "c" | ||||
|         assert result == expected | ||||
|     else: | ||||
|         assert result is arr.dtype.na_value | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["min", "max"]) | ||||
| @pytest.mark.parametrize("box", [pd.Series, pd.array]) | ||||
| def test_min_max_numpy(method, box, dtype, request): | ||||
|     if dtype.storage == "pyarrow" and box is pd.array: | ||||
|         if box is pd.array: | ||||
|             reason = "'<=' not supported between instances of 'str' and 'NoneType'" | ||||
|         else: | ||||
|             reason = "'ArrowStringArray' object has no attribute 'max'" | ||||
|         mark = pytest.mark.xfail(raises=TypeError, reason=reason) | ||||
|         request.applymarker(mark) | ||||
|  | ||||
|     arr = box(["a", "b", "c", None], dtype=dtype) | ||||
|     result = getattr(np, method)(arr) | ||||
|     expected = "a" if method == "min" else "c" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_fillna_args(dtype): | ||||
|     # GH 37987 | ||||
|  | ||||
|     arr = pd.array(["a", pd.NA], dtype=dtype) | ||||
|  | ||||
|     res = arr.fillna(value="b") | ||||
|     expected = pd.array(["a", "b"], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     res = arr.fillna(value=np.str_("b")) | ||||
|     expected = pd.array(["a", "b"], dtype=dtype) | ||||
|     tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     msg = "Invalid value '1' for dtype 'str" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         arr.fillna(value=1) | ||||
|  | ||||
|  | ||||
| def test_arrow_array(dtype): | ||||
|     # protocol added in 0.15.0 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|     import pyarrow.compute as pc | ||||
|  | ||||
|     data = pd.array(["a", "b", "c"], dtype=dtype) | ||||
|     arr = pa.array(data) | ||||
|     expected = pa.array(list(data), type=pa.large_string(), from_pandas=True) | ||||
|     if dtype.storage == "pyarrow" and pa_version_under12p0: | ||||
|         expected = pa.chunked_array(expected) | ||||
|     if dtype.storage == "python": | ||||
|         expected = pc.cast(expected, pa.string()) | ||||
|     assert arr.equals(expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") | ||||
| def test_arrow_roundtrip(dtype, string_storage, using_infer_string): | ||||
|     # roundtrip possible from arrow 1.0.0 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     data = pd.array(["a", "b", None], dtype=dtype) | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     if dtype.storage == "python": | ||||
|         assert table.field("a").type == "string" | ||||
|     else: | ||||
|         assert table.field("a").type == "large_string" | ||||
|     with pd.option_context("string_storage", string_storage): | ||||
|         result = table.to_pandas() | ||||
|     if dtype.na_value is np.nan and not using_infer_string: | ||||
|         assert result["a"].dtype == "object" | ||||
|     else: | ||||
|         assert isinstance(result["a"].dtype, pd.StringDtype) | ||||
|         expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value)) | ||||
|         if using_infer_string: | ||||
|             expected.columns = expected.columns.astype( | ||||
|                 pd.StringDtype(string_storage, na_value=np.nan) | ||||
|             ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|         # ensure the missing value is represented by NA and not np.nan or None | ||||
|         assert result.loc[2, "a"] is result["a"].dtype.na_value | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") | ||||
| def test_arrow_from_string(using_infer_string): | ||||
|     # not roundtrip,  but starting with pyarrow table without pandas metadata | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|     table = pa.table({"a": pa.array(["a", "b", None], type=pa.string())}) | ||||
|  | ||||
|     result = table.to_pandas() | ||||
|  | ||||
|     if using_infer_string and not pa_version_under19p0: | ||||
|         expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="str") | ||||
|     else: | ||||
|         expected = pd.DataFrame({"a": ["a", "b", None]}, dtype="object") | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning") | ||||
| def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string): | ||||
|     # GH-41040 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     data = pd.array([], dtype=dtype) | ||||
|     df = pd.DataFrame({"a": data}) | ||||
|     table = pa.table(df) | ||||
|     if dtype.storage == "python": | ||||
|         assert table.field("a").type == "string" | ||||
|     else: | ||||
|         assert table.field("a").type == "large_string" | ||||
|     # Instantiate the same table with no chunks at all | ||||
|     table = pa.table([pa.chunked_array([], type=pa.string())], schema=table.schema) | ||||
|     with pd.option_context("string_storage", string_storage): | ||||
|         result = table.to_pandas() | ||||
|  | ||||
|     if dtype.na_value is np.nan and not using_string_dtype(): | ||||
|         assert result["a"].dtype == "object" | ||||
|     else: | ||||
|         assert isinstance(result["a"].dtype, pd.StringDtype) | ||||
|         expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value)) | ||||
|         if using_infer_string: | ||||
|             expected.columns = expected.columns.astype( | ||||
|                 pd.StringDtype(string_storage, na_value=np.nan) | ||||
|             ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_na(dtype): | ||||
|     if dtype.na_value is np.nan: | ||||
|         exp_dtype = "int64" | ||||
|     elif dtype.storage == "pyarrow": | ||||
|         exp_dtype = "int64[pyarrow]" | ||||
|     else: | ||||
|         exp_dtype = "Int64" | ||||
|     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype) | ||||
|     result = arr.value_counts(dropna=False) | ||||
|     expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype=exp_dtype, name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = arr.value_counts(dropna=True) | ||||
|     expected = pd.Series([2, 1], index=arr[:2], dtype=exp_dtype, name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_with_normalize(dtype): | ||||
|     if dtype.na_value is np.nan: | ||||
|         exp_dtype = np.float64 | ||||
|     elif dtype.storage == "pyarrow": | ||||
|         exp_dtype = "double[pyarrow]" | ||||
|     else: | ||||
|         exp_dtype = "Float64" | ||||
|     ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) | ||||
|     result = ser.value_counts(normalize=True) | ||||
|     expected = pd.Series([2, 1], index=ser[:2], dtype=exp_dtype, name="proportion") / 3 | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, expected", | ||||
|     [ | ||||
|         (["a", "b", "c"], np.array([False, False, False])), | ||||
|         (["a", "b", None], np.array([False, False, True])), | ||||
|     ], | ||||
| ) | ||||
| def test_use_inf_as_na(values, expected, dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/33655 | ||||
|     values = pd.array(values, dtype=dtype) | ||||
|     msg = "use_inf_as_na option is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         with pd.option_context("mode.use_inf_as_na", True): | ||||
|             result = values.isna() | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|             result = pd.Series(values).isna() | ||||
|             expected = pd.Series(expected) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|             result = pd.DataFrame(values).isna() | ||||
|             expected = pd.DataFrame(expected) | ||||
|             tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_value_counts_sort_false(dtype): | ||||
|     if dtype.na_value is np.nan: | ||||
|         exp_dtype = "int64" | ||||
|     elif dtype.storage == "pyarrow": | ||||
|         exp_dtype = "int64[pyarrow]" | ||||
|     else: | ||||
|         exp_dtype = "Int64" | ||||
|     ser = pd.Series(["a", "b", "c", "b"], dtype=dtype) | ||||
|     result = ser.value_counts(sort=False) | ||||
|     expected = pd.Series([1, 2, 1], index=ser[:3], dtype=exp_dtype, name="count") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_memory_usage(dtype): | ||||
|     # GH 33963 | ||||
|  | ||||
|     if dtype.storage == "pyarrow": | ||||
|         pytest.skip(f"not applicable for {dtype.storage}") | ||||
|  | ||||
|     series = pd.Series(["a", "b", "c"], dtype=dtype) | ||||
|  | ||||
|     assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("float_dtype", [np.float16, np.float32, np.float64]) | ||||
| def test_astype_from_float_dtype(float_dtype, dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/36451 | ||||
|     ser = pd.Series([0.1], dtype=float_dtype) | ||||
|     result = ser.astype(dtype) | ||||
|     expected = pd.Series(["0.1"], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy_returns_pdna_default(dtype): | ||||
|     arr = pd.array(["a", pd.NA, "b"], dtype=dtype) | ||||
|     result = np.array(arr) | ||||
|     expected = np.array(["a", dtype.na_value, "b"], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy_na_value(dtype, nulls_fixture): | ||||
|     na_value = nulls_fixture | ||||
|     arr = pd.array(["a", pd.NA, "b"], dtype=dtype) | ||||
|     result = arr.to_numpy(na_value=na_value) | ||||
|     expected = np.array(["a", na_value, "b"], dtype=object) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin(dtype, fixed_now_ts): | ||||
|     s = pd.Series(["a", "b", None], dtype=dtype) | ||||
|  | ||||
|     result = s.isin(["a", "c"]) | ||||
|     expected = pd.Series([True, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin(["a", pd.NA]) | ||||
|     expected = pd.Series([True, False, True]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin([]) | ||||
|     expected = pd.Series([False, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin(["a", fixed_now_ts]) | ||||
|     expected = pd.Series([True, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin([fixed_now_ts]) | ||||
|     expected = pd.Series([False, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_string_array(dtype, dtype2): | ||||
|     s = pd.Series(["a", "b", None], dtype=dtype) | ||||
|  | ||||
|     result = s.isin(pd.array(["a", "c"], dtype=dtype2)) | ||||
|     expected = pd.Series([True, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin(pd.array(["a", None], dtype=dtype2)) | ||||
|     expected = pd.Series([True, False, True]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_arrow_string_array(dtype): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|     s = pd.Series(["a", "b", None], dtype=dtype) | ||||
|  | ||||
|     result = s.isin(pd.array(["a", "c"], dtype=pd.ArrowDtype(pa.string()))) | ||||
|     expected = pd.Series([True, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.isin(pd.array(["a", None], dtype=pd.ArrowDtype(pa.string()))) | ||||
|     expected = pd.Series([True, False, True]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_scalar_with_mask_validation(dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/47628 | ||||
|     # setting None with a boolean mask (through _putmaks) should still result | ||||
|     # in pd.NA values in the underlying array | ||||
|     ser = pd.Series(["a", "b", "c"], dtype=dtype) | ||||
|     mask = np.array([False, True, False]) | ||||
|  | ||||
|     ser[mask] = None | ||||
|     assert ser.array[1] is ser.dtype.na_value | ||||
|  | ||||
|     # for other non-string we should also raise an error | ||||
|     ser = pd.Series(["a", "b", "c"], dtype=dtype) | ||||
|     msg = "Invalid value '1' for dtype 'str" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser[mask] = 1 | ||||
|  | ||||
|  | ||||
| def test_from_numpy_str(dtype): | ||||
|     vals = ["a", "b", "c"] | ||||
|     arr = np.array(vals, dtype=np.str_) | ||||
|     result = pd.array(arr, dtype=dtype) | ||||
|     expected = pd.array(vals, dtype=dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_tolist(dtype): | ||||
|     vals = ["a", "b", "c"] | ||||
|     arr = pd.array(vals, dtype=dtype) | ||||
|     result = arr.tolist() | ||||
|     expected = vals | ||||
|     tm.assert_equal(result, expected) | ||||
| @ -0,0 +1,282 @@ | ||||
| import pickle | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.string_ import ( | ||||
|     StringArray, | ||||
|     StringDtype, | ||||
| ) | ||||
| from pandas.core.arrays.string_arrow import ( | ||||
|     ArrowStringArray, | ||||
|     ArrowStringArrayNumpySemantics, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_eq_all_na(): | ||||
|     pytest.importorskip("pyarrow") | ||||
|     a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow")) | ||||
|     result = a == a | ||||
|     expected = pd.array([pd.NA, pd.NA], dtype="boolean[pyarrow]") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_config(string_storage, using_infer_string): | ||||
|     # with the default string_storage setting | ||||
|     # always "python" at the moment | ||||
|     assert StringDtype().storage == "python" | ||||
|  | ||||
|     with pd.option_context("string_storage", string_storage): | ||||
|         assert StringDtype().storage == string_storage | ||||
|         result = pd.array(["a", "b"]) | ||||
|         assert result.dtype.storage == string_storage | ||||
|  | ||||
|     # pd.array(..) by default always returns the NA-variant | ||||
|     dtype = StringDtype(string_storage, na_value=pd.NA) | ||||
|     expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_config_bad_storage_raises(): | ||||
|     msg = re.escape("Value must be one of python|pyarrow") | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         pd.options.mode.string_storage = "foo" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("chunked", [True, False]) | ||||
| @pytest.mark.parametrize("array_lib", ["numpy", "pyarrow"]) | ||||
| def test_constructor_not_string_type_raises(array_lib, chunked): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     array_lib = pa if array_lib == "pyarrow" else np | ||||
|  | ||||
|     arr = array_lib.array([1, 2, 3]) | ||||
|     if chunked: | ||||
|         if array_lib is np: | ||||
|             pytest.skip("chunked not applicable to numpy array") | ||||
|         arr = pa.chunked_array(arr) | ||||
|     if array_lib is np: | ||||
|         msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray" | ||||
|     else: | ||||
|         msg = re.escape( | ||||
|             "ArrowStringArray requires a PyArrow (chunked) array of large_string type" | ||||
|         ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ArrowStringArray(arr) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("chunked", [True, False]) | ||||
| def test_constructor_not_string_type_value_dictionary_raises(chunked): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     arr = pa.array([1, 2, 3], pa.dictionary(pa.int32(), pa.int32())) | ||||
|     if chunked: | ||||
|         arr = pa.chunked_array(arr) | ||||
|  | ||||
|     msg = re.escape( | ||||
|         "ArrowStringArray requires a PyArrow (chunked) array of large_string type" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ArrowStringArray(arr) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("string_type", ["string", "large_string"]) | ||||
| @pytest.mark.parametrize("chunked", [True, False]) | ||||
| def test_constructor_valid_string_type_value_dictionary(string_type, chunked): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     arr = pa.array(["1", "2", "3"], getattr(pa, string_type)()).dictionary_encode() | ||||
|     if chunked: | ||||
|         arr = pa.chunked_array(arr) | ||||
|  | ||||
|     arr = ArrowStringArray(arr) | ||||
|     # dictionary type get converted to dense large string array | ||||
|     assert pa.types.is_large_string(arr._pa_array.type) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("chunked", [True, False]) | ||||
| def test_constructor_valid_string_view(chunked): | ||||
|     # requires pyarrow>=18 for casting string_view to string | ||||
|     pa = pytest.importorskip("pyarrow", minversion="18") | ||||
|  | ||||
|     arr = pa.array(["1", "2", "3"], pa.string_view()) | ||||
|     if chunked: | ||||
|         arr = pa.chunked_array(arr) | ||||
|  | ||||
|     arr = ArrowStringArray(arr) | ||||
|     # dictionary type get converted to dense large string array | ||||
|     assert pa.types.is_large_string(arr._pa_array.type) | ||||
|  | ||||
|  | ||||
| def test_constructor_from_list(): | ||||
|     # GH#27673 | ||||
|     pytest.importorskip("pyarrow") | ||||
|     result = pd.Series(["E"], dtype=StringDtype(storage="pyarrow")) | ||||
|     assert isinstance(result.dtype, StringDtype) | ||||
|     assert result.dtype.storage == "pyarrow" | ||||
|  | ||||
|  | ||||
| def test_from_sequence_wrong_dtype_raises(using_infer_string): | ||||
|     pytest.importorskip("pyarrow") | ||||
|     with pd.option_context("string_storage", "python"): | ||||
|         ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") | ||||
|  | ||||
|     with pd.option_context("string_storage", "pyarrow"): | ||||
|         ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") | ||||
|  | ||||
|     with pytest.raises(AssertionError, match=None): | ||||
|         ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]") | ||||
|  | ||||
|     ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") | ||||
|  | ||||
|     if not using_infer_string: | ||||
|         with pytest.raises(AssertionError, match=None): | ||||
|             with pd.option_context("string_storage", "python"): | ||||
|                 ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) | ||||
|  | ||||
|     with pd.option_context("string_storage", "pyarrow"): | ||||
|         ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) | ||||
|  | ||||
|     if not using_infer_string: | ||||
|         with pytest.raises(AssertionError, match=None): | ||||
|             ArrowStringArray._from_sequence( | ||||
|                 ["a", None, "c"], dtype=StringDtype("python") | ||||
|             ) | ||||
|  | ||||
|     ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) | ||||
|  | ||||
|     with pd.option_context("string_storage", "python"): | ||||
|         StringArray._from_sequence(["a", None, "c"], dtype="string") | ||||
|  | ||||
|     with pd.option_context("string_storage", "pyarrow"): | ||||
|         StringArray._from_sequence(["a", None, "c"], dtype="string") | ||||
|  | ||||
|     StringArray._from_sequence(["a", None, "c"], dtype="string[python]") | ||||
|  | ||||
|     with pytest.raises(AssertionError, match=None): | ||||
|         StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]") | ||||
|  | ||||
|     if not using_infer_string: | ||||
|         with pd.option_context("string_storage", "python"): | ||||
|             StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) | ||||
|  | ||||
|     if not using_infer_string: | ||||
|         with pytest.raises(AssertionError, match=None): | ||||
|             with pd.option_context("string_storage", "pyarrow"): | ||||
|                 StringArray._from_sequence(["a", None, "c"], dtype=StringDtype()) | ||||
|  | ||||
|     StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python")) | ||||
|  | ||||
|     with pytest.raises(AssertionError, match=None): | ||||
|         StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) | ||||
|  | ||||
|  | ||||
| @td.skip_if_installed("pyarrow") | ||||
| def test_pyarrow_not_installed_raises(): | ||||
|     msg = re.escape("pyarrow>=10.0.1 is required for PyArrow backed") | ||||
|  | ||||
|     with pytest.raises(ImportError, match=msg): | ||||
|         StringDtype(storage="pyarrow") | ||||
|  | ||||
|     with pytest.raises(ImportError, match=msg): | ||||
|         ArrowStringArray([]) | ||||
|  | ||||
|     with pytest.raises(ImportError, match=msg): | ||||
|         ArrowStringArrayNumpySemantics([]) | ||||
|  | ||||
|     with pytest.raises(ImportError, match=msg): | ||||
|         ArrowStringArray._from_sequence(["a", None, "b"]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("multiple_chunks", [False, True]) | ||||
| @pytest.mark.parametrize( | ||||
|     "key, value, expected", | ||||
|     [ | ||||
|         (-1, "XX", ["a", "b", "c", "d", "XX"]), | ||||
|         (1, "XX", ["a", "XX", "c", "d", "e"]), | ||||
|         (1, None, ["a", None, "c", "d", "e"]), | ||||
|         (1, pd.NA, ["a", None, "c", "d", "e"]), | ||||
|         ([1, 3], "XX", ["a", "XX", "c", "XX", "e"]), | ||||
|         ([1, 3], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]), | ||||
|         ([1, 3], ["XX", None], ["a", "XX", "c", None, "e"]), | ||||
|         ([1, 3], ["XX", pd.NA], ["a", "XX", "c", None, "e"]), | ||||
|         ([0, -1], ["XX", "YY"], ["XX", "b", "c", "d", "YY"]), | ||||
|         ([-1, 0], ["XX", "YY"], ["YY", "b", "c", "d", "XX"]), | ||||
|         (slice(3, None), "XX", ["a", "b", "c", "XX", "XX"]), | ||||
|         (slice(2, 4), ["XX", "YY"], ["a", "b", "XX", "YY", "e"]), | ||||
|         (slice(3, 1, -1), ["XX", "YY"], ["a", "b", "YY", "XX", "e"]), | ||||
|         (slice(None), "XX", ["XX", "XX", "XX", "XX", "XX"]), | ||||
|         ([False, True, False, True, False], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]), | ||||
|     ], | ||||
| ) | ||||
| def test_setitem(multiple_chunks, key, value, expected): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     result = pa.array(list("abcde")) | ||||
|     expected = pa.array(expected) | ||||
|  | ||||
|     if multiple_chunks: | ||||
|         result = pa.chunked_array([result[:3], result[3:]]) | ||||
|         expected = pa.chunked_array([expected[:3], expected[3:]]) | ||||
|  | ||||
|     result = ArrowStringArray(result) | ||||
|     expected = ArrowStringArray(expected) | ||||
|  | ||||
|     result[key] = value | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_invalid_indexer_raises(): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     arr = ArrowStringArray(pa.array(list("abcde"))) | ||||
|  | ||||
|     with pytest.raises(IndexError, match=None): | ||||
|         arr[5] = "foo" | ||||
|  | ||||
|     with pytest.raises(IndexError, match=None): | ||||
|         arr[-6] = "foo" | ||||
|  | ||||
|     with pytest.raises(IndexError, match=None): | ||||
|         arr[[0, 5]] = "foo" | ||||
|  | ||||
|     with pytest.raises(IndexError, match=None): | ||||
|         arr[[0, -6]] = "foo" | ||||
|  | ||||
|     with pytest.raises(IndexError, match=None): | ||||
|         arr[[True, True, False]] = "foo" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=None): | ||||
|         arr[[0, 1]] = ["foo", "bar", "baz"] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_value", [pd.NA, np.nan]) | ||||
| def test_pickle_roundtrip(na_value): | ||||
|     # GH 42600 | ||||
|     pytest.importorskip("pyarrow") | ||||
|     dtype = StringDtype("pyarrow", na_value=na_value) | ||||
|     expected = pd.Series(range(10), dtype=dtype) | ||||
|     expected_sliced = expected.head(2) | ||||
|     full_pickled = pickle.dumps(expected) | ||||
|     sliced_pickled = pickle.dumps(expected_sliced) | ||||
|  | ||||
|     assert len(full_pickled) > len(sliced_pickled) | ||||
|  | ||||
|     result = pickle.loads(full_pickled) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result_sliced = pickle.loads(sliced_pickled) | ||||
|     tm.assert_series_equal(result_sliced, expected_sliced) | ||||
|  | ||||
|  | ||||
| def test_string_dtype_error_message(): | ||||
|     # GH#55051 | ||||
|     pytest.importorskip("pyarrow") | ||||
|     msg = "Storage must be 'python' or 'pyarrow'." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         StringDtype("bla") | ||||
							
								
								
									
										519
									
								
								lib/python3.11/site-packages/pandas/tests/arrays/test_array.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										519
									
								
								lib/python3.11/site-packages/pandas/tests/arrays/test_array.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,519 @@ | ||||
| import datetime | ||||
| import decimal | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.api.extensions import register_extension_dtype | ||||
| from pandas.arrays import ( | ||||
|     BooleanArray, | ||||
|     DatetimeArray, | ||||
|     FloatingArray, | ||||
|     IntegerArray, | ||||
|     IntervalArray, | ||||
|     SparseArray, | ||||
|     TimedeltaArray, | ||||
| ) | ||||
| from pandas.core.arrays import ( | ||||
|     NumpyExtensionArray, | ||||
|     period_array, | ||||
| ) | ||||
| from pandas.tests.extension.decimal import ( | ||||
|     DecimalArray, | ||||
|     DecimalDtype, | ||||
|     to_decimal, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype_unit", ["M8[h]", "M8[m]", "m8[h]", "M8[m]"]) | ||||
| def test_dt64_array(dtype_unit): | ||||
|     # PR 53817 | ||||
|     dtype_var = np.dtype(dtype_unit) | ||||
|     msg = ( | ||||
|         r"datetime64 and timedelta64 dtype resolutions other than " | ||||
|         r"'s', 'ms', 'us', and 'ns' are deprecated. " | ||||
|         r"In future releases passing unsupported resolutions will " | ||||
|         r"raise an exception." | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=re.escape(msg)): | ||||
|         pd.array([], dtype=dtype_var) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, dtype, expected", | ||||
|     [ | ||||
|         # Basic NumPy defaults. | ||||
|         ([], None, FloatingArray._from_sequence([], dtype="Float64")), | ||||
|         ([1, 2], None, IntegerArray._from_sequence([1, 2], dtype="Int64")), | ||||
|         ([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))), | ||||
|         ( | ||||
|             [1, 2], | ||||
|             np.dtype("float32"), | ||||
|             NumpyExtensionArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([], dtype=object), | ||||
|             None, | ||||
|             NumpyExtensionArray(np.array([], dtype=object)), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="int64"), | ||||
|             None, | ||||
|             IntegerArray._from_sequence([1, 2], dtype="Int64"), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1.0, 2.0], dtype="float64"), | ||||
|             None, | ||||
|             FloatingArray._from_sequence([1.0, 2.0], dtype="Float64"), | ||||
|         ), | ||||
|         # String alias passes through to NumPy | ||||
|         ([1, 2], "float32", NumpyExtensionArray(np.array([1, 2], dtype="float32"))), | ||||
|         ([1, 2], "int64", NumpyExtensionArray(np.array([1, 2], dtype=np.int64))), | ||||
|         # GH#44715 FloatingArray does not support float16, so fall | ||||
|         #  back to NumpyExtensionArray | ||||
|         ( | ||||
|             np.array([1, 2], dtype=np.float16), | ||||
|             None, | ||||
|             NumpyExtensionArray(np.array([1, 2], dtype=np.float16)), | ||||
|         ), | ||||
|         # idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64")) | ||||
|         ( | ||||
|             NumpyExtensionArray(np.array([1, 2], dtype=np.int32)), | ||||
|             None, | ||||
|             NumpyExtensionArray(np.array([1, 2], dtype=np.int32)), | ||||
|         ), | ||||
|         # Period alias | ||||
|         ( | ||||
|             [pd.Period("2000", "D"), pd.Period("2001", "D")], | ||||
|             "Period[D]", | ||||
|             period_array(["2000", "2001"], freq="D"), | ||||
|         ), | ||||
|         # Period dtype | ||||
|         ( | ||||
|             [pd.Period("2000", "D")], | ||||
|             pd.PeriodDtype("D"), | ||||
|             period_array(["2000"], freq="D"), | ||||
|         ), | ||||
|         # Datetime (naive) | ||||
|         ( | ||||
|             [1, 2], | ||||
|             np.dtype("datetime64[ns]"), | ||||
|             DatetimeArray._from_sequence( | ||||
|                 np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2], | ||||
|             np.dtype("datetime64[s]"), | ||||
|             DatetimeArray._from_sequence( | ||||
|                 np.array([1, 2], dtype="M8[s]"), dtype="M8[s]" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="datetime64[ns]"), | ||||
|             None, | ||||
|             DatetimeArray._from_sequence( | ||||
|                 np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             pd.DatetimeIndex(["2000", "2001"]), | ||||
|             np.dtype("datetime64[ns]"), | ||||
|             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             pd.DatetimeIndex(["2000", "2001"]), | ||||
|             None, | ||||
|             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             ["2000", "2001"], | ||||
|             np.dtype("datetime64[ns]"), | ||||
|             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), | ||||
|         ), | ||||
|         # Datetime (tz-aware) | ||||
|         ( | ||||
|             ["2000", "2001"], | ||||
|             pd.DatetimeTZDtype(tz="CET"), | ||||
|             DatetimeArray._from_sequence( | ||||
|                 ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") | ||||
|             ), | ||||
|         ), | ||||
|         # Timedelta | ||||
|         ( | ||||
|             ["1h", "2h"], | ||||
|             np.dtype("timedelta64[ns]"), | ||||
|             TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             pd.TimedeltaIndex(["1h", "2h"]), | ||||
|             np.dtype("timedelta64[ns]"), | ||||
|             TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="m8[s]"), | ||||
|             np.dtype("timedelta64[s]"), | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2], dtype="m8[s]"), dtype="m8[s]" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             pd.TimedeltaIndex(["1h", "2h"]), | ||||
|             None, | ||||
|             TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             # preserve non-nano, i.e. don't cast to NumpyExtensionArray | ||||
|             TimedeltaArray._simple_new( | ||||
|                 np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") | ||||
|             ), | ||||
|             None, | ||||
|             TimedeltaArray._simple_new( | ||||
|                 np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             # preserve non-nano, i.e. don't cast to NumpyExtensionArray | ||||
|             TimedeltaArray._simple_new( | ||||
|                 np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") | ||||
|             ), | ||||
|             np.dtype("m8[s]"), | ||||
|             TimedeltaArray._simple_new( | ||||
|                 np.arange(5, dtype=np.int64).view("m8[s]"), dtype=np.dtype("m8[s]") | ||||
|             ), | ||||
|         ), | ||||
|         # Category | ||||
|         (["a", "b"], "category", pd.Categorical(["a", "b"])), | ||||
|         ( | ||||
|             ["a", "b"], | ||||
|             pd.CategoricalDtype(None, ordered=True), | ||||
|             pd.Categorical(["a", "b"], ordered=True), | ||||
|         ), | ||||
|         # Interval | ||||
|         ( | ||||
|             [pd.Interval(1, 2), pd.Interval(3, 4)], | ||||
|             "interval", | ||||
|             IntervalArray.from_tuples([(1, 2), (3, 4)]), | ||||
|         ), | ||||
|         # Sparse | ||||
|         ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), | ||||
|         # IntegerNA | ||||
|         ([1, None], "Int16", pd.array([1, None], dtype="Int16")), | ||||
|         ( | ||||
|             pd.Series([1, 2]), | ||||
|             None, | ||||
|             NumpyExtensionArray(np.array([1, 2], dtype=np.int64)), | ||||
|         ), | ||||
|         # String | ||||
|         ( | ||||
|             ["a", None], | ||||
|             "string", | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", None], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         ( | ||||
|             ["a", None], | ||||
|             "str", | ||||
|             pd.StringDtype(na_value=np.nan) | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", None], dtype=pd.StringDtype(na_value=np.nan)) | ||||
|             if using_string_dtype() | ||||
|             else NumpyExtensionArray(np.array(["a", "None"])), | ||||
|         ), | ||||
|         ( | ||||
|             ["a", None], | ||||
|             pd.StringDtype(), | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", None], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         ( | ||||
|             ["a", None], | ||||
|             pd.StringDtype(na_value=np.nan), | ||||
|             pd.StringDtype(na_value=np.nan) | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", None], dtype=pd.StringDtype(na_value=np.nan)), | ||||
|         ), | ||||
|         ( | ||||
|             # numpy array with string dtype | ||||
|             np.array(["a", "b"], dtype=str), | ||||
|             pd.StringDtype(), | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", "b"], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         ( | ||||
|             # numpy array with string dtype | ||||
|             np.array(["a", "b"], dtype=str), | ||||
|             pd.StringDtype(na_value=np.nan), | ||||
|             pd.StringDtype(na_value=np.nan) | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)), | ||||
|         ), | ||||
|         # Boolean | ||||
|         ( | ||||
|             [True, None], | ||||
|             "boolean", | ||||
|             BooleanArray._from_sequence([True, None], dtype="boolean"), | ||||
|         ), | ||||
|         ( | ||||
|             [True, None], | ||||
|             pd.BooleanDtype(), | ||||
|             BooleanArray._from_sequence([True, None], dtype="boolean"), | ||||
|         ), | ||||
|         # Index | ||||
|         (pd.Index([1, 2]), None, NumpyExtensionArray(np.array([1, 2], dtype=np.int64))), | ||||
|         # Series[EA] returns the EA | ||||
|         ( | ||||
|             pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), | ||||
|             None, | ||||
|             pd.Categorical(["a", "b"], categories=["a", "b", "c"]), | ||||
|         ), | ||||
|         # "3rd party" EAs work | ||||
|         ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), | ||||
|         # pass an ExtensionArray, but a different dtype | ||||
|         ( | ||||
|             period_array(["2000", "2001"], freq="D"), | ||||
|             "category", | ||||
|             pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_array(data, dtype, expected): | ||||
|     result = pd.array(data, dtype=dtype) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_copy(): | ||||
|     a = np.array([1, 2]) | ||||
|     # default is to copy | ||||
|     b = pd.array(a, dtype=a.dtype) | ||||
|     assert not tm.shares_memory(a, b) | ||||
|  | ||||
|     # copy=True | ||||
|     b = pd.array(a, dtype=a.dtype, copy=True) | ||||
|     assert not tm.shares_memory(a, b) | ||||
|  | ||||
|     # copy=False | ||||
|     b = pd.array(a, dtype=a.dtype, copy=False) | ||||
|     assert tm.shares_memory(a, b) | ||||
|  | ||||
|  | ||||
| cet = pytz.timezone("CET") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, expected", | ||||
|     [ | ||||
|         # period | ||||
|         ( | ||||
|             [pd.Period("2000", "D"), pd.Period("2001", "D")], | ||||
|             period_array(["2000", "2001"], freq="D"), | ||||
|         ), | ||||
|         # interval | ||||
|         ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])), | ||||
|         # datetime | ||||
|         ( | ||||
|             [pd.Timestamp("2000"), pd.Timestamp("2001")], | ||||
|             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], | ||||
|             DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="M8[ns]"), | ||||
|             DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="M8[us]"), | ||||
|             DatetimeArray._simple_new( | ||||
|                 np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]") | ||||
|             ), | ||||
|         ), | ||||
|         # datetimetz | ||||
|         ( | ||||
|             [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], | ||||
|             DatetimeArray._from_sequence( | ||||
|                 ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns") | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             [ | ||||
|                 datetime.datetime(2000, 1, 1, tzinfo=cet), | ||||
|                 datetime.datetime(2001, 1, 1, tzinfo=cet), | ||||
|             ], | ||||
|             DatetimeArray._from_sequence( | ||||
|                 ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns") | ||||
|             ), | ||||
|         ), | ||||
|         # timedelta | ||||
|         ( | ||||
|             [pd.Timedelta("1h"), pd.Timedelta("2h")], | ||||
|             TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="m8[ns]"), | ||||
|             TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")), | ||||
|         ), | ||||
|         ( | ||||
|             np.array([1, 2], dtype="m8[us]"), | ||||
|             TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")), | ||||
|         ), | ||||
|         # integer | ||||
|         ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")), | ||||
|         ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")), | ||||
|         ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")), | ||||
|         ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")), | ||||
|         # float | ||||
|         ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")), | ||||
|         ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), | ||||
|         ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), | ||||
|         ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")), | ||||
|         # integer-like float | ||||
|         ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")), | ||||
|         ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), | ||||
|         ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), | ||||
|         ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")), | ||||
|         # mixed-integer-float | ||||
|         ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")), | ||||
|         ( | ||||
|             [1, np.nan, 2.0], | ||||
|             FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"), | ||||
|         ), | ||||
|         # string | ||||
|         ( | ||||
|             ["a", "b"], | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", "b"], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         ( | ||||
|             ["a", None], | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", None], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         ( | ||||
|             # numpy array with string dtype | ||||
|             np.array(["a", "b"], dtype=str), | ||||
|             pd.StringDtype() | ||||
|             .construct_array_type() | ||||
|             ._from_sequence(["a", "b"], dtype=pd.StringDtype()), | ||||
|         ), | ||||
|         # Boolean | ||||
|         ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")), | ||||
|         ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")), | ||||
|     ], | ||||
| ) | ||||
| def test_array_inference(data, expected): | ||||
|     result = pd.array(data) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         # mix of frequencies | ||||
|         [pd.Period("2000", "D"), pd.Period("2001", "Y")], | ||||
|         # mix of closed | ||||
|         [pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")], | ||||
|         # Mix of timezones | ||||
|         [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")], | ||||
|         # Mix of tz-aware and tz-naive | ||||
|         [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")], | ||||
|         np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]), | ||||
|     ], | ||||
| ) | ||||
| def test_array_inference_fails(data): | ||||
|     result = pd.array(data) | ||||
|     expected = NumpyExtensionArray(np.array(data, dtype=object)) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("data", [np.array(0)]) | ||||
| def test_nd_raises(data): | ||||
|     with pytest.raises(ValueError, match="NumpyExtensionArray must be 1-dimensional"): | ||||
|         pd.array(data, dtype="int64") | ||||
|  | ||||
|  | ||||
| def test_scalar_raises(): | ||||
|     with pytest.raises(ValueError, match="Cannot pass scalar '1'"): | ||||
|         pd.array(1) | ||||
|  | ||||
|  | ||||
| def test_dataframe_raises(): | ||||
|     # GH#51167 don't accidentally cast to StringArray by doing inference on columns | ||||
|     df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) | ||||
|     msg = "Cannot pass DataFrame to 'pandas.array'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         pd.array(df) | ||||
|  | ||||
|  | ||||
| def test_bounds_check(): | ||||
|     # GH21796 | ||||
|     with pytest.raises( | ||||
|         TypeError, match=r"cannot safely cast non-equivalent int(32|64) to uint16" | ||||
|     ): | ||||
|         pd.array([-1, 2, 3], dtype="UInt16") | ||||
|  | ||||
|  | ||||
| # --------------------------------------------------------------------------- | ||||
| # A couple dummy classes to ensure that Series and Indexes are unboxed before | ||||
| # getting to the EA classes. | ||||
|  | ||||
|  | ||||
| @register_extension_dtype | ||||
| class DecimalDtype2(DecimalDtype): | ||||
|     name = "decimal2" | ||||
|  | ||||
|     @classmethod | ||||
|     def construct_array_type(cls): | ||||
|         """ | ||||
|         Return the array type associated with this dtype. | ||||
|  | ||||
|         Returns | ||||
|         ------- | ||||
|         type | ||||
|         """ | ||||
|         return DecimalArray2 | ||||
|  | ||||
|  | ||||
| class DecimalArray2(DecimalArray): | ||||
|     @classmethod | ||||
|     def _from_sequence(cls, scalars, *, dtype=None, copy=False): | ||||
|         if isinstance(scalars, (pd.Series, pd.Index)): | ||||
|             raise TypeError("scalars should not be of type pd.Series or pd.Index") | ||||
|  | ||||
|         return super()._from_sequence(scalars, dtype=dtype, copy=copy) | ||||
|  | ||||
|  | ||||
| def test_array_unboxes(index_or_series): | ||||
|     box = index_or_series | ||||
|  | ||||
|     data = box([decimal.Decimal("1"), decimal.Decimal("2")]) | ||||
|     dtype = DecimalDtype2() | ||||
|     # make sure it works | ||||
|     with pytest.raises( | ||||
|         TypeError, match="scalars should not be of type pd.Series or pd.Index" | ||||
|     ): | ||||
|         DecimalArray2._from_sequence(data, dtype=dtype) | ||||
|  | ||||
|     result = pd.array(data, dtype="decimal2") | ||||
|     expected = DecimalArray2._from_sequence(data.values, dtype=dtype) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_to_numpy_na(): | ||||
|     # GH#40638 | ||||
|     arr = pd.array([pd.NA, 1], dtype="string[python]") | ||||
|     result = arr.to_numpy(na_value=True, dtype=bool) | ||||
|     expected = np.array([True, True]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,840 @@ | ||||
| """ | ||||
| Tests for DatetimeArray | ||||
| """ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from datetime import timedelta | ||||
| import operator | ||||
|  | ||||
| try: | ||||
|     from zoneinfo import ZoneInfo | ||||
| except ImportError: | ||||
|     # Cannot assign to a type | ||||
|     ZoneInfo = None  # type: ignore[misc, assignment] | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import tz_compare | ||||
|  | ||||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import ( | ||||
|     DatetimeArray, | ||||
|     TimedeltaArray, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestNonNano: | ||||
|     @pytest.fixture(params=["s", "ms", "us"]) | ||||
|     def unit(self, request): | ||||
|         """Fixture returning parametrized time units""" | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def dtype(self, unit, tz_naive_fixture): | ||||
|         tz = tz_naive_fixture | ||||
|         if tz is None: | ||||
|             return np.dtype(f"datetime64[{unit}]") | ||||
|         else: | ||||
|             return DatetimeTZDtype(unit=unit, tz=tz) | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def dta_dti(self, unit, dtype): | ||||
|         tz = getattr(dtype, "tz", None) | ||||
|  | ||||
|         dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz) | ||||
|         if tz is None: | ||||
|             arr = np.asarray(dti).astype(f"M8[{unit}]") | ||||
|         else: | ||||
|             arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype( | ||||
|                 f"M8[{unit}]" | ||||
|             ) | ||||
|  | ||||
|         dta = DatetimeArray._simple_new(arr, dtype=dtype) | ||||
|         return dta, dti | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def dta(self, dta_dti): | ||||
|         dta, dti = dta_dti | ||||
|         return dta | ||||
|  | ||||
|     def test_non_nano(self, unit, dtype): | ||||
|         arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") | ||||
|         dta = DatetimeArray._simple_new(arr, dtype=dtype) | ||||
|  | ||||
|         assert dta.dtype == dtype | ||||
|         assert dta[0].unit == unit | ||||
|         assert tz_compare(dta.tz, dta[0].tz) | ||||
|         assert (dta[0] == dta[:1]).all() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "field", DatetimeArray._field_ops + DatetimeArray._bool_ops | ||||
|     ) | ||||
|     def test_fields(self, unit, field, dtype, dta_dti): | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         assert (dti == dta).all() | ||||
|  | ||||
|         res = getattr(dta, field) | ||||
|         expected = getattr(dti._data, field) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|     def test_normalize(self, unit): | ||||
|         dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D") | ||||
|         arr = np.asarray(dti).astype(f"M8[{unit}]") | ||||
|  | ||||
|         dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) | ||||
|  | ||||
|         assert not dta.is_normalized | ||||
|  | ||||
|         # TODO: simplify once we can just .astype to other unit | ||||
|         exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]") | ||||
|         expected = DatetimeArray._simple_new(exp, dtype=exp.dtype) | ||||
|  | ||||
|         res = dta.normalize() | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|     def test_simple_new_requires_match(self, unit): | ||||
|         arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]") | ||||
|         dtype = DatetimeTZDtype(unit, "UTC") | ||||
|  | ||||
|         dta = DatetimeArray._simple_new(arr, dtype=dtype) | ||||
|         assert dta.dtype == dtype | ||||
|  | ||||
|         wrong = DatetimeTZDtype("ns", "UTC") | ||||
|         with pytest.raises(AssertionError, match=""): | ||||
|             DatetimeArray._simple_new(arr, dtype=wrong) | ||||
|  | ||||
|     def test_std_non_nano(self, unit): | ||||
|         dti = pd.date_range("2016-01-01", periods=55, freq="D") | ||||
|         arr = np.asarray(dti).astype(f"M8[{unit}]") | ||||
|  | ||||
|         dta = DatetimeArray._simple_new(arr, dtype=arr.dtype) | ||||
|  | ||||
|         # we should match the nano-reso std, but floored to our reso. | ||||
|         res = dta.std() | ||||
|         assert res._creso == dta._creso | ||||
|         assert res == dti.std().floor(unit) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning") | ||||
|     def test_to_period(self, dta_dti): | ||||
|         dta, dti = dta_dti | ||||
|         result = dta.to_period("D") | ||||
|         expected = dti._data.to_period("D") | ||||
|  | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_iter(self, dta): | ||||
|         res = next(iter(dta)) | ||||
|         expected = dta[0] | ||||
|  | ||||
|         assert type(res) is pd.Timestamp | ||||
|         assert res._value == expected._value | ||||
|         assert res._creso == expected._creso | ||||
|         assert res == expected | ||||
|  | ||||
|     def test_astype_object(self, dta): | ||||
|         result = dta.astype(object) | ||||
|         assert all(x._creso == dta._creso for x in result) | ||||
|         assert all(x == y for x, y in zip(result, dta)) | ||||
|  | ||||
|     def test_to_pydatetime(self, dta_dti): | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         result = dta.to_pydatetime() | ||||
|         expected = dti.to_pydatetime() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("meth", ["time", "timetz", "date"]) | ||||
|     def test_time_date(self, dta_dti, meth): | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         result = getattr(dta, meth) | ||||
|         expected = getattr(dti, meth) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_format_native_types(self, unit, dtype, dta_dti): | ||||
|         # In this case we should get the same formatted values with our nano | ||||
|         #  version dti._data as we do with the non-nano dta | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         res = dta._format_native_types() | ||||
|         exp = dti._data._format_native_types() | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_repr(self, dta_dti, unit): | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}") | ||||
|  | ||||
|     # TODO: tests with td64 | ||||
|     def test_compare_mismatched_resolutions(self, comparison_op): | ||||
|         # comparison that numpy gets wrong bc of silent overflows | ||||
|         op = comparison_op | ||||
|  | ||||
|         iinfo = np.iinfo(np.int64) | ||||
|         vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64) | ||||
|  | ||||
|         # Construct so that arr2[1] < arr[1] < arr[2] < arr2[2] | ||||
|         arr = np.array(vals).view("M8[ns]") | ||||
|         arr2 = arr.view("M8[s]") | ||||
|  | ||||
|         left = DatetimeArray._simple_new(arr, dtype=arr.dtype) | ||||
|         right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype) | ||||
|  | ||||
|         if comparison_op is operator.eq: | ||||
|             expected = np.array([False, False, False]) | ||||
|         elif comparison_op is operator.ne: | ||||
|             expected = np.array([True, True, True]) | ||||
|         elif comparison_op in [operator.lt, operator.le]: | ||||
|             expected = np.array([False, False, True]) | ||||
|         else: | ||||
|             expected = np.array([False, True, False]) | ||||
|  | ||||
|         result = op(left, right) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = op(left[1], right) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         if op not in [operator.eq, operator.ne]: | ||||
|             # check that numpy still gets this wrong; if it is fixed we may be | ||||
|             #  able to remove compare_mismatched_resolutions | ||||
|             np_res = op(left._ndarray, right._ndarray) | ||||
|             tm.assert_numpy_array_equal(np_res[1:], ~expected[1:]) | ||||
|  | ||||
|     def test_add_mismatched_reso_doesnt_downcast(self): | ||||
|         # https://github.com/pandas-dev/pandas/pull/48748#issuecomment-1260181008 | ||||
|         td = pd.Timedelta(microseconds=1) | ||||
|         dti = pd.date_range("2016-01-01", periods=3) - td | ||||
|         dta = dti._data.as_unit("us") | ||||
|  | ||||
|         res = dta + td.as_unit("us") | ||||
|         # even though the result is an even number of days | ||||
|         #  (so we _could_ downcast to unit="s"), we do not. | ||||
|         assert res.unit == "us" | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "scalar", | ||||
|         [ | ||||
|             timedelta(hours=2), | ||||
|             pd.Timedelta(hours=2), | ||||
|             np.timedelta64(2, "h"), | ||||
|             np.timedelta64(2 * 3600 * 1000, "ms"), | ||||
|             pd.offsets.Minute(120), | ||||
|             pd.offsets.Hour(2), | ||||
|         ], | ||||
|     ) | ||||
|     def test_add_timedeltalike_scalar_mismatched_reso(self, dta_dti, scalar): | ||||
|         dta, dti = dta_dti | ||||
|  | ||||
|         td = pd.Timedelta(scalar) | ||||
|         exp_unit = tm.get_finest_unit(dta.unit, td.unit) | ||||
|  | ||||
|         expected = (dti + td)._data.as_unit(exp_unit) | ||||
|         result = dta + scalar | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = scalar + dta | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         expected = (dti - td)._data.as_unit(exp_unit) | ||||
|         result = dta - scalar | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_sub_datetimelike_scalar_mismatch(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3) | ||||
|         dta = dti._data.as_unit("us") | ||||
|  | ||||
|         ts = dta[0].as_unit("s") | ||||
|  | ||||
|         result = dta - ts | ||||
|         expected = (dti - dti[0])._data.as_unit("us") | ||||
|         assert result.dtype == "m8[us]" | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_sub_datetime64_reso_mismatch(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3) | ||||
|         left = dti._data.as_unit("s") | ||||
|         right = left.as_unit("ms") | ||||
|  | ||||
|         result = left - right | ||||
|         exp_values = np.array([0, 0, 0], dtype="m8[ms]") | ||||
|         expected = TimedeltaArray._simple_new( | ||||
|             exp_values, | ||||
|             dtype=exp_values.dtype, | ||||
|         ) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         result2 = right - left | ||||
|         tm.assert_extension_array_equal(result2, expected) | ||||
|  | ||||
|  | ||||
| class TestDatetimeArrayComparisons: | ||||
|     # TODO: merge this into tests/arithmetic/test_datetime64 once it is | ||||
|     #  sufficiently robust | ||||
|  | ||||
|     def test_cmp_dt64_arraylike_tznaive(self, comparison_op): | ||||
|         # arbitrary tz-naive DatetimeIndex | ||||
|         op = comparison_op | ||||
|  | ||||
|         dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None) | ||||
|         arr = dti._data | ||||
|         assert arr.freq == dti.freq | ||||
|         assert arr.tz == dti.tz | ||||
|  | ||||
|         right = dti | ||||
|  | ||||
|         expected = np.ones(len(arr), dtype=bool) | ||||
|         if comparison_op.__name__ in ["ne", "gt", "lt"]: | ||||
|             # for these the comparisons should be all-False | ||||
|             expected = ~expected | ||||
|  | ||||
|         result = op(arr, arr) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|         for other in [ | ||||
|             right, | ||||
|             np.array(right), | ||||
|             list(right), | ||||
|             tuple(right), | ||||
|             right.astype(object), | ||||
|         ]: | ||||
|             result = op(arr, other) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|             result = op(other, arr) | ||||
|             tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestDatetimeArray: | ||||
|     def test_astype_ns_to_ms_near_bounds(self): | ||||
|         # GH#55979 | ||||
|         ts = pd.Timestamp("1677-09-21 00:12:43.145225") | ||||
|         target = ts.as_unit("ms") | ||||
|  | ||||
|         dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]") | ||||
|         assert (dta.view("i8") == ts.as_unit("ns").value).all() | ||||
|  | ||||
|         result = dta.astype("M8[ms]") | ||||
|         assert result[0] == target | ||||
|  | ||||
|         expected = DatetimeArray._from_sequence([ts], dtype="M8[ms]") | ||||
|         assert (expected.view("i8") == target._value).all() | ||||
|  | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_non_nano_tznaive(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3) | ||||
|  | ||||
|         res = dti.astype("M8[s]") | ||||
|         assert res.dtype == "M8[s]" | ||||
|  | ||||
|         dta = dti._data | ||||
|         res = dta.astype("M8[s]") | ||||
|         assert res.dtype == "M8[s]" | ||||
|         assert isinstance(res, pd.core.arrays.DatetimeArray)  # used to be ndarray | ||||
|  | ||||
|     def test_astype_non_nano_tzaware(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3, tz="UTC") | ||||
|  | ||||
|         res = dti.astype("M8[s, US/Pacific]") | ||||
|         assert res.dtype == "M8[s, US/Pacific]" | ||||
|  | ||||
|         dta = dti._data | ||||
|         res = dta.astype("M8[s, US/Pacific]") | ||||
|         assert res.dtype == "M8[s, US/Pacific]" | ||||
|  | ||||
|         # from non-nano to non-nano, preserving reso | ||||
|         res2 = res.astype("M8[s, UTC]") | ||||
|         assert res2.dtype == "M8[s, UTC]" | ||||
|         assert not tm.shares_memory(res2, res) | ||||
|  | ||||
|         res3 = res.astype("M8[s, UTC]", copy=False) | ||||
|         assert res2.dtype == "M8[s, UTC]" | ||||
|         assert tm.shares_memory(res3, res) | ||||
|  | ||||
|     def test_astype_to_same(self): | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             ["2000"], dtype=DatetimeTZDtype(tz="US/Central") | ||||
|         ) | ||||
|         result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) | ||||
|         assert result is arr | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] | ||||
|     ) | ||||
|     def test_astype_copies(self, dtype, other): | ||||
|         # https://github.com/pandas-dev/pandas/pull/32490 | ||||
|         ser = pd.Series([1, 2], dtype=dtype) | ||||
|         orig = ser.copy() | ||||
|  | ||||
|         err = False | ||||
|         if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"): | ||||
|             # deprecated in favor of tz_localize | ||||
|             err = True | ||||
|  | ||||
|         if err: | ||||
|             if dtype == "datetime64[ns]": | ||||
|                 msg = "Use obj.tz_localize instead or series.dt.tz_localize instead" | ||||
|             else: | ||||
|                 msg = "from timezone-aware dtype to timezone-naive dtype" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 ser.astype(other) | ||||
|         else: | ||||
|             t = ser.astype(other) | ||||
|             t[:] = pd.NaT | ||||
|             tm.assert_series_equal(ser, orig) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) | ||||
|     def test_astype_int(self, dtype): | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             [pd.Timestamp("2000"), pd.Timestamp("2001")], dtype="M8[ns]" | ||||
|         ) | ||||
|  | ||||
|         if np.dtype(dtype) != np.int64: | ||||
|             with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): | ||||
|                 arr.astype(dtype) | ||||
|             return | ||||
|  | ||||
|         result = arr.astype(dtype) | ||||
|         expected = arr._ndarray.view("i8") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_to_sparse_dt64(self): | ||||
|         # GH#50082 | ||||
|         dti = pd.date_range("2016-01-01", periods=4) | ||||
|         dta = dti._data | ||||
|         result = dta.astype("Sparse[datetime64[ns]]") | ||||
|  | ||||
|         assert result.dtype == "Sparse[datetime64[ns]]" | ||||
|         assert (result == dta).all() | ||||
|  | ||||
|     def test_tz_setter_raises(self): | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             ["2000"], dtype=DatetimeTZDtype(tz="US/Central") | ||||
|         ) | ||||
|         with pytest.raises(AttributeError, match="tz_localize"): | ||||
|             arr.tz = "UTC" | ||||
|  | ||||
|     def test_setitem_str_impute_tz(self, tz_naive_fixture): | ||||
|         # Like for getitem, if we are passed a naive-like string, we impute | ||||
|         #  our own timezone. | ||||
|         tz = tz_naive_fixture | ||||
|  | ||||
|         data = np.array([1, 2, 3], dtype="M8[ns]") | ||||
|         dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz) | ||||
|         arr = DatetimeArray._from_sequence(data, dtype=dtype) | ||||
|         expected = arr.copy() | ||||
|  | ||||
|         ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz) | ||||
|         setter = str(ts.tz_localize(None)) | ||||
|  | ||||
|         # Setting a scalar tznaive string | ||||
|         expected[0] = ts | ||||
|         arr[0] = setter | ||||
|         tm.assert_equal(arr, expected) | ||||
|  | ||||
|         # Setting a listlike of tznaive strings | ||||
|         expected[1] = ts | ||||
|         arr[:2] = [setter, setter] | ||||
|         tm.assert_equal(arr, expected) | ||||
|  | ||||
|     def test_setitem_different_tz_raises(self): | ||||
|         # pre-2.0 we required exact tz match, in 2.0 we require only | ||||
|         #  tzawareness-match | ||||
|         data = np.array([1, 2, 3], dtype="M8[ns]") | ||||
|         arr = DatetimeArray._from_sequence( | ||||
|             data, copy=False, dtype=DatetimeTZDtype(tz="US/Central") | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): | ||||
|             arr[0] = pd.Timestamp("2000") | ||||
|  | ||||
|         ts = pd.Timestamp("2000", tz="US/Eastern") | ||||
|         arr[0] = ts | ||||
|         assert arr[0] == ts.tz_convert("US/Central") | ||||
|  | ||||
|     def test_setitem_clears_freq(self): | ||||
|         a = pd.date_range("2000", periods=2, freq="D", tz="US/Central")._data | ||||
|         a[0] = pd.Timestamp("2000", tz="US/Central") | ||||
|         assert a.freq is None | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "obj", | ||||
|         [ | ||||
|             pd.Timestamp("2021-01-01"), | ||||
|             pd.Timestamp("2021-01-01").to_datetime64(), | ||||
|             pd.Timestamp("2021-01-01").to_pydatetime(), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_objects(self, obj): | ||||
|         # make sure we accept datetime64 and datetime in addition to Timestamp | ||||
|         dti = pd.date_range("2000", periods=2, freq="D") | ||||
|         arr = dti._data | ||||
|  | ||||
|         arr[0] = obj | ||||
|         assert arr[0] == obj | ||||
|  | ||||
|     def test_repeat_preserves_tz(self): | ||||
|         dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") | ||||
|         arr = dti._data | ||||
|  | ||||
|         repeated = arr.repeat([1, 1]) | ||||
|  | ||||
|         # preserves tz and values, but not freq | ||||
|         expected = DatetimeArray._from_sequence(arr.asi8, dtype=arr.dtype) | ||||
|         tm.assert_equal(repeated, expected) | ||||
|  | ||||
|     def test_value_counts_preserves_tz(self): | ||||
|         dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") | ||||
|         arr = dti._data.repeat([4, 3]) | ||||
|  | ||||
|         result = arr.value_counts() | ||||
|  | ||||
|         # Note: not tm.assert_index_equal, since `freq`s do not match | ||||
|         assert result.index.equals(dti) | ||||
|  | ||||
|         arr[-2] = pd.NaT | ||||
|         result = arr.value_counts(dropna=False) | ||||
|         expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT], name="count") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["pad", "backfill"]) | ||||
|     def test_fillna_preserves_tz(self, method): | ||||
|         dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central") | ||||
|         arr = DatetimeArray._from_sequence(dti, copy=True) | ||||
|         arr[2] = pd.NaT | ||||
|  | ||||
|         fill_val = dti[1] if method == "pad" else dti[3] | ||||
|         expected = DatetimeArray._from_sequence( | ||||
|             [dti[0], dti[1], fill_val, dti[3], dti[4]], | ||||
|             dtype=DatetimeTZDtype(tz="US/Central"), | ||||
|         ) | ||||
|  | ||||
|         result = arr._pad_or_backfill(method=method) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         # assert that arr and dti were not modified in-place | ||||
|         assert arr[2] is pd.NaT | ||||
|         assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central") | ||||
|  | ||||
|     def test_fillna_2d(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") | ||||
|         dta = dti._data.reshape(3, 2).copy() | ||||
|         dta[0, 1] = pd.NaT | ||||
|         dta[1, 0] = pd.NaT | ||||
|  | ||||
|         res1 = dta._pad_or_backfill(method="pad") | ||||
|         expected1 = dta.copy() | ||||
|         expected1[1, 0] = dta[0, 0] | ||||
|         tm.assert_extension_array_equal(res1, expected1) | ||||
|  | ||||
|         res2 = dta._pad_or_backfill(method="backfill") | ||||
|         expected2 = dta.copy() | ||||
|         expected2 = dta.copy() | ||||
|         expected2[1, 0] = dta[2, 0] | ||||
|         expected2[0, 1] = dta[1, 1] | ||||
|         tm.assert_extension_array_equal(res2, expected2) | ||||
|  | ||||
|         # with different ordering for underlying ndarray; behavior should | ||||
|         #  be unchanged | ||||
|         dta2 = dta._from_backing_data(dta._ndarray.copy(order="F")) | ||||
|         assert dta2._ndarray.flags["F_CONTIGUOUS"] | ||||
|         assert not dta2._ndarray.flags["C_CONTIGUOUS"] | ||||
|         tm.assert_extension_array_equal(dta, dta2) | ||||
|  | ||||
|         res3 = dta2._pad_or_backfill(method="pad") | ||||
|         tm.assert_extension_array_equal(res3, expected1) | ||||
|  | ||||
|         res4 = dta2._pad_or_backfill(method="backfill") | ||||
|         tm.assert_extension_array_equal(res4, expected2) | ||||
|  | ||||
|         # test the DataFrame method while we're here | ||||
|         df = pd.DataFrame(dta) | ||||
|         res = df.ffill() | ||||
|         expected = pd.DataFrame(expected1) | ||||
|         tm.assert_frame_equal(res, expected) | ||||
|  | ||||
|         res = df.bfill() | ||||
|         expected = pd.DataFrame(expected2) | ||||
|         tm.assert_frame_equal(res, expected) | ||||
|  | ||||
|     def test_array_interface_tz(self): | ||||
|         tz = "US/Central" | ||||
|         data = pd.date_range("2017", periods=2, tz=tz)._data | ||||
|         result = np.asarray(data) | ||||
|  | ||||
|         expected = np.array( | ||||
|             [ | ||||
|                 pd.Timestamp("2017-01-01T00:00:00", tz=tz), | ||||
|                 pd.Timestamp("2017-01-02T00:00:00", tz=tz), | ||||
|             ], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.asarray(data, dtype=object) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.asarray(data, dtype="M8[ns]") | ||||
|  | ||||
|         expected = np.array( | ||||
|             ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]" | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_array_interface(self): | ||||
|         data = pd.date_range("2017", periods=2)._data | ||||
|         expected = np.array( | ||||
|             ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]" | ||||
|         ) | ||||
|  | ||||
|         result = np.asarray(data) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         result = np.asarray(data, dtype=object) | ||||
|         expected = np.array( | ||||
|             [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")], | ||||
|             dtype=object, | ||||
|         ) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("index", [True, False]) | ||||
|     def test_searchsorted_different_tz(self, index): | ||||
|         data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 | ||||
|         arr = pd.DatetimeIndex(data, freq="D")._data.tz_localize("Asia/Tokyo") | ||||
|         if index: | ||||
|             arr = pd.Index(arr) | ||||
|  | ||||
|         expected = arr.searchsorted(arr[2]) | ||||
|         result = arr.searchsorted(arr[2].tz_convert("UTC")) | ||||
|         assert result == expected | ||||
|  | ||||
|         expected = arr.searchsorted(arr[2:6]) | ||||
|         result = arr.searchsorted(arr[2:6].tz_convert("UTC")) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("index", [True, False]) | ||||
|     def test_searchsorted_tzawareness_compat(self, index): | ||||
|         data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 | ||||
|         arr = pd.DatetimeIndex(data, freq="D")._data | ||||
|         if index: | ||||
|             arr = pd.Index(arr) | ||||
|  | ||||
|         mismatch = arr.tz_localize("Asia/Tokyo") | ||||
|  | ||||
|         msg = "Cannot compare tz-naive and tz-aware datetime-like objects" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.searchsorted(mismatch[0]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.searchsorted(mismatch) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             mismatch.searchsorted(arr[0]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             mismatch.searchsorted(arr) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             1, | ||||
|             np.int64(1), | ||||
|             1.0, | ||||
|             np.timedelta64("NaT"), | ||||
|             pd.Timedelta(days=2), | ||||
|             "invalid", | ||||
|             np.arange(10, dtype="i8") * 24 * 3600 * 10**9, | ||||
|             np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9, | ||||
|             pd.Timestamp("2021-01-01").to_period("D"), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("index", [True, False]) | ||||
|     def test_searchsorted_invalid_types(self, other, index): | ||||
|         data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 | ||||
|         arr = pd.DatetimeIndex(data, freq="D")._data | ||||
|         if index: | ||||
|             arr = pd.Index(arr) | ||||
|  | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "searchsorted requires compatible dtype or scalar", | ||||
|                 "value should be a 'Timestamp', 'NaT', or array of those. Got", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.searchsorted(other) | ||||
|  | ||||
|     def test_shift_fill_value(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3) | ||||
|  | ||||
|         dta = dti._data | ||||
|         expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1)) | ||||
|  | ||||
|         fv = dta[-1] | ||||
|         for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: | ||||
|             result = dta.shift(1, fill_value=fill_value) | ||||
|             tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         dta = dta.tz_localize("UTC") | ||||
|         expected = expected.tz_localize("UTC") | ||||
|         fv = dta[-1] | ||||
|         for fill_value in [fv, fv.to_pydatetime()]: | ||||
|             result = dta.shift(1, fill_value=fill_value) | ||||
|             tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|     def test_shift_value_tzawareness_mismatch(self): | ||||
|         dti = pd.date_range("2016-01-01", periods=3) | ||||
|  | ||||
|         dta = dti._data | ||||
|  | ||||
|         fv = dta[-1].tz_localize("UTC") | ||||
|         for invalid in [fv, fv.to_pydatetime()]: | ||||
|             with pytest.raises(TypeError, match="Cannot compare"): | ||||
|                 dta.shift(1, fill_value=invalid) | ||||
|  | ||||
|         dta = dta.tz_localize("UTC") | ||||
|         fv = dta[-1].tz_localize(None) | ||||
|         for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]: | ||||
|             with pytest.raises(TypeError, match="Cannot compare"): | ||||
|                 dta.shift(1, fill_value=invalid) | ||||
|  | ||||
|     def test_shift_requires_tzmatch(self): | ||||
|         # pre-2.0 we required exact tz match, in 2.0 we require just | ||||
|         #  matching tzawareness | ||||
|         dti = pd.date_range("2016-01-01", periods=3, tz="UTC") | ||||
|         dta = dti._data | ||||
|  | ||||
|         fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific") | ||||
|  | ||||
|         result = dta.shift(1, fill_value=fill_value) | ||||
|         expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) | ||||
|         tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_tz_localize_t2d(self): | ||||
|         dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") | ||||
|         dta = dti._data.reshape(3, 4) | ||||
|         result = dta.tz_localize(None) | ||||
|  | ||||
|         expected = dta.ravel().tz_localize(None).reshape(dta.shape) | ||||
|         tm.assert_datetime_array_equal(result, expected) | ||||
|  | ||||
|         roundtrip = expected.tz_localize("US/Pacific") | ||||
|         tm.assert_datetime_array_equal(roundtrip, dta) | ||||
|  | ||||
|     easts = ["US/Eastern", "dateutil/US/Eastern"] | ||||
|     if ZoneInfo is not None: | ||||
|         try: | ||||
|             tz = ZoneInfo("US/Eastern") | ||||
|         except KeyError: | ||||
|             # no tzdata | ||||
|             pass | ||||
|         else: | ||||
|             # Argument 1 to "append" of "list" has incompatible type "ZoneInfo"; | ||||
|             # expected "str" | ||||
|             easts.append(tz)  # type: ignore[arg-type] | ||||
|  | ||||
|     @pytest.mark.parametrize("tz", easts) | ||||
|     def test_iter_zoneinfo_fold(self, tz): | ||||
|         # GH#49684 | ||||
|         utc_vals = np.array( | ||||
|             [1320552000, 1320555600, 1320559200, 1320562800], dtype=np.int64 | ||||
|         ) | ||||
|         utc_vals *= 1_000_000_000 | ||||
|  | ||||
|         dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz) | ||||
|  | ||||
|         left = dta[2] | ||||
|         right = list(dta)[2] | ||||
|         assert str(left) == str(right) | ||||
|         # previously there was a bug where with non-pytz right would be | ||||
|         #  Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern') | ||||
|         # while left would be | ||||
|         #  Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern') | ||||
|         # The .value's would match (so they would compare as equal), | ||||
|         #  but the folds would not | ||||
|         assert left.utcoffset() == right.utcoffset() | ||||
|  | ||||
|         # The same bug in ints_to_pydatetime affected .astype, so we test | ||||
|         #  that here. | ||||
|         right2 = dta.astype(object)[2] | ||||
|         assert str(left) == str(right2) | ||||
|         assert left.utcoffset() == right2.utcoffset() | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, freq_depr", | ||||
|         [ | ||||
|             ("2ME", "2M"), | ||||
|             ("2SME", "2SM"), | ||||
|             ("2SME", "2sm"), | ||||
|             ("2QE", "2Q"), | ||||
|             ("2QE-SEP", "2Q-SEP"), | ||||
|             ("1YE", "1Y"), | ||||
|             ("2YE-MAR", "2Y-MAR"), | ||||
|             ("1YE", "1A"), | ||||
|             ("2YE-MAR", "2A-MAR"), | ||||
|             ("2ME", "2m"), | ||||
|             ("2QE-SEP", "2q-sep"), | ||||
|             ("2YE-MAR", "2a-mar"), | ||||
|             ("2YE", "2y"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): | ||||
|         # GH#9586, GH#54275 | ||||
|         depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed " | ||||
|         f"in a future version, please use '{freq[1:]}' instead." | ||||
|  | ||||
|         expected = pd.date_range("1/1/2000", periods=4, freq=freq) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             result = pd.date_range("1/1/2000", periods=4, freq=freq_depr) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("freq_depr", ["2H", "2CBH", "2MIN", "2S", "2mS", "2Us"]) | ||||
|     def test_date_range_uppercase_frequency_deprecated(self, freq_depr): | ||||
|         # GH#9586, GH#54939 | ||||
|         depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a " | ||||
|         f"future version. Please use '{freq_depr.lower()[1:]}' instead." | ||||
|  | ||||
|         expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.lower()) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             result = pd.date_range("1/1/2000", periods=4, freq=freq_depr) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq_depr", | ||||
|         [ | ||||
|             "2ye-mar", | ||||
|             "2ys", | ||||
|             "2qe", | ||||
|             "2qs-feb", | ||||
|             "2bqs", | ||||
|             "2sms", | ||||
|             "2bms", | ||||
|             "2cbme", | ||||
|             "2me", | ||||
|             "2w", | ||||
|         ], | ||||
|     ) | ||||
|     def test_date_range_lowercase_frequency_deprecated(self, freq_depr): | ||||
|         # GH#9586, GH#54939 | ||||
|         depr_msg = f"'{freq_depr[1:]}' is deprecated and will be removed in a " | ||||
|         f"future version, please use '{freq_depr.upper()[1:]}' instead." | ||||
|  | ||||
|         expected = pd.date_range("1/1/2000", periods=4, freq=freq_depr.upper()) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             result = pd.date_range("1/1/2000", periods=4, freq=freq_depr) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_factorize_sort_without_freq(): | ||||
|     dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]") | ||||
|  | ||||
|     msg = r"call pd.factorize\(obj, sort=True\) instead" | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         dta.factorize(sort=True) | ||||
|  | ||||
|     # Do TimedeltaArray while we're here | ||||
|     tda = dta - dta[0] | ||||
|     with pytest.raises(NotImplementedError, match=msg): | ||||
|         tda.factorize(sort=True) | ||||
| @ -0,0 +1,75 @@ | ||||
| """ | ||||
| Tests for subclasses of NDArrayBackedExtensionArray | ||||
| """ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     date_range, | ||||
| ) | ||||
| from pandas.core.arrays import ( | ||||
|     Categorical, | ||||
|     DatetimeArray, | ||||
|     NumpyExtensionArray, | ||||
|     TimedeltaArray, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestEmpty: | ||||
|     def test_empty_categorical(self): | ||||
|         ci = CategoricalIndex(["a", "b", "c"], ordered=True) | ||||
|         dtype = ci.dtype | ||||
|  | ||||
|         # case with int8 codes | ||||
|         shape = (4,) | ||||
|         result = Categorical._empty(shape, dtype=dtype) | ||||
|         assert isinstance(result, Categorical) | ||||
|         assert result.shape == shape | ||||
|         assert result._ndarray.dtype == np.int8 | ||||
|  | ||||
|         # case where repr would segfault if we didn't override base implementation | ||||
|         result = Categorical._empty((4096,), dtype=dtype) | ||||
|         assert isinstance(result, Categorical) | ||||
|         assert result.shape == (4096,) | ||||
|         assert result._ndarray.dtype == np.int8 | ||||
|         repr(result) | ||||
|  | ||||
|         # case with int16 codes | ||||
|         ci = CategoricalIndex(list(range(512)) * 4, ordered=False) | ||||
|         dtype = ci.dtype | ||||
|         result = Categorical._empty(shape, dtype=dtype) | ||||
|         assert isinstance(result, Categorical) | ||||
|         assert result.shape == shape | ||||
|         assert result._ndarray.dtype == np.int16 | ||||
|  | ||||
|     def test_empty_dt64tz(self): | ||||
|         dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo") | ||||
|         dtype = dti.dtype | ||||
|  | ||||
|         shape = (0,) | ||||
|         result = DatetimeArray._empty(shape, dtype=dtype) | ||||
|         assert result.dtype == dtype | ||||
|         assert isinstance(result, DatetimeArray) | ||||
|         assert result.shape == shape | ||||
|  | ||||
|     def test_empty_dt64(self): | ||||
|         shape = (3, 9) | ||||
|         result = DatetimeArray._empty(shape, dtype="datetime64[ns]") | ||||
|         assert isinstance(result, DatetimeArray) | ||||
|         assert result.shape == shape | ||||
|  | ||||
|     def test_empty_td64(self): | ||||
|         shape = (3, 9) | ||||
|         result = TimedeltaArray._empty(shape, dtype="m8[ns]") | ||||
|         assert isinstance(result, TimedeltaArray) | ||||
|         assert result.shape == shape | ||||
|  | ||||
|     def test_empty_pandas_array(self): | ||||
|         arr = NumpyExtensionArray(np.array([1, 2])) | ||||
|         dtype = arr.dtype | ||||
|  | ||||
|         shape = (3, 9) | ||||
|         result = NumpyExtensionArray._empty(shape, dtype=dtype) | ||||
|         assert isinstance(result, NumpyExtensionArray) | ||||
|         assert result.dtype == dtype | ||||
|         assert result.shape == shape | ||||
							
								
								
									
										184
									
								
								lib/python3.11/site-packages/pandas/tests/arrays/test_period.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								lib/python3.11/site-packages/pandas/tests/arrays/test_period.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,184 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import iNaT | ||||
| from pandas._libs.tslibs.period import IncompatibleFrequency | ||||
|  | ||||
| from pandas.core.dtypes.base import _registry as registry | ||||
| from pandas.core.dtypes.dtypes import PeriodDtype | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import PeriodArray | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Dtype | ||||
|  | ||||
|  | ||||
| def test_registered(): | ||||
|     assert PeriodDtype in registry.dtypes | ||||
|     result = registry.find("Period[D]") | ||||
|     expected = PeriodDtype("D") | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # period_array | ||||
|  | ||||
|  | ||||
| def test_asi8(): | ||||
|     result = PeriodArray._from_sequence(["2000", "2001", None], dtype="period[D]").asi8 | ||||
|     expected = np.array([10957, 11323, iNaT]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_take_raises(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) | ||||
|  | ||||
|     msg = "value should be a 'Period' or 'NaT'. Got 'str' instead" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         arr.take([0, -1], allow_fill=True, fill_value="foo") | ||||
|  | ||||
|  | ||||
| def test_fillna_raises(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") | ||||
|     with pytest.raises(ValueError, match="Length"): | ||||
|         arr.fillna(arr[:2]) | ||||
|  | ||||
|  | ||||
| def test_fillna_copies(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") | ||||
|     result = arr.fillna(pd.Period("2000", "D")) | ||||
|     assert result is not arr | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # setitem | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "key, value, expected", | ||||
|     [ | ||||
|         ([0], pd.Period("2000", "D"), [10957, 1, 2]), | ||||
|         ([0], None, [iNaT, 1, 2]), | ||||
|         ([0], np.nan, [iNaT, 1, 2]), | ||||
|         ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), | ||||
|         ( | ||||
|             [0, 1, 2], | ||||
|             [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")], | ||||
|             [10957, 11323, 11688], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_setitem(key, value, expected): | ||||
|     arr = PeriodArray(np.arange(3), dtype="period[D]") | ||||
|     expected = PeriodArray(expected, dtype="period[D]") | ||||
|     arr[key] = value | ||||
|     tm.assert_period_array_equal(arr, expected) | ||||
|  | ||||
|  | ||||
| def test_setitem_raises_incompatible_freq(): | ||||
|     arr = PeriodArray(np.arange(3), dtype="period[D]") | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         arr[0] = pd.Period("2000", freq="Y") | ||||
|  | ||||
|     other = PeriodArray._from_sequence(["2000", "2001"], dtype="period[Y]") | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         arr[[0, 1]] = other | ||||
|  | ||||
|  | ||||
| def test_setitem_raises_length(): | ||||
|     arr = PeriodArray(np.arange(3), dtype="period[D]") | ||||
|     with pytest.raises(ValueError, match="length"): | ||||
|         arr[[0, 1]] = [pd.Period("2000", freq="D")] | ||||
|  | ||||
|  | ||||
| def test_setitem_raises_type(): | ||||
|     arr = PeriodArray(np.arange(3), dtype="period[D]") | ||||
|     with pytest.raises(TypeError, match="int"): | ||||
|         arr[0] = 1 | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Ops | ||||
|  | ||||
|  | ||||
| def test_sub_period(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") | ||||
|     other = pd.Period("2000", freq="M") | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         arr - other | ||||
|  | ||||
|  | ||||
| def test_sub_period_overflow(): | ||||
|     # GH#47538 | ||||
|     dti = pd.date_range("1677-09-22", periods=2, freq="D") | ||||
|     pi = dti.to_period("ns") | ||||
|  | ||||
|     per = pd.Period._from_ordinal(10**14, pi.freq) | ||||
|  | ||||
|     with pytest.raises(OverflowError, match="Overflow in int64 addition"): | ||||
|         pi - per | ||||
|  | ||||
|     with pytest.raises(OverflowError, match="Overflow in int64 addition"): | ||||
|         per - pi | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Methods | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "other", | ||||
|     [ | ||||
|         pd.Period("2000", freq="h"), | ||||
|         PeriodArray._from_sequence(["2000", "2001", "2000"], dtype="period[h]"), | ||||
|     ], | ||||
| ) | ||||
| def test_where_different_freq_raises(other): | ||||
|     # GH#45768 The PeriodArray method raises, the Series method coerces | ||||
|     ser = pd.Series( | ||||
|         PeriodArray._from_sequence(["2000", "2001", "2002"], dtype="period[D]") | ||||
|     ) | ||||
|     cond = np.array([True, False, True]) | ||||
|  | ||||
|     with pytest.raises(IncompatibleFrequency, match="freq"): | ||||
|         ser.array._where(cond, other) | ||||
|  | ||||
|     res = ser.where(cond, other) | ||||
|     expected = ser.astype(object).where(cond, other) | ||||
|     tm.assert_series_equal(res, expected) | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Printing | ||||
|  | ||||
|  | ||||
| def test_repr_small(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001"], dtype="period[D]") | ||||
|     result = str(arr) | ||||
|     expected = ( | ||||
|         "<PeriodArray>\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" | ||||
|     ) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_repr_large(): | ||||
|     arr = PeriodArray._from_sequence(["2000", "2001"] * 500, dtype="period[D]") | ||||
|     result = str(arr) | ||||
|     expected = ( | ||||
|         "<PeriodArray>\n" | ||||
|         "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " | ||||
|         "'2000-01-01',\n" | ||||
|         " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " | ||||
|         "'2001-01-01',\n" | ||||
|         " ...\n" | ||||
|         " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " | ||||
|         "'2000-01-01',\n" | ||||
|         " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " | ||||
|         "'2001-01-01']\n" | ||||
|         "Length: 1000, dtype: period[D]" | ||||
|     ) | ||||
|     assert result == expected | ||||
| @ -0,0 +1,313 @@ | ||||
| from datetime import timedelta | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Timedelta | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import ( | ||||
|     DatetimeArray, | ||||
|     TimedeltaArray, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestNonNano: | ||||
|     @pytest.fixture(params=["s", "ms", "us"]) | ||||
|     def unit(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def tda(self, unit): | ||||
|         arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") | ||||
|         return TimedeltaArray._simple_new(arr, dtype=arr.dtype) | ||||
|  | ||||
|     def test_non_nano(self, unit): | ||||
|         arr = np.arange(5, dtype=np.int64).view(f"m8[{unit}]") | ||||
|         tda = TimedeltaArray._simple_new(arr, dtype=arr.dtype) | ||||
|  | ||||
|         assert tda.dtype == arr.dtype | ||||
|         assert tda[0].unit == unit | ||||
|  | ||||
|     def test_as_unit_raises(self, tda): | ||||
|         # GH#50616 | ||||
|         with pytest.raises(ValueError, match="Supported units"): | ||||
|             tda.as_unit("D") | ||||
|  | ||||
|         tdi = pd.Index(tda) | ||||
|         with pytest.raises(ValueError, match="Supported units"): | ||||
|             tdi.as_unit("D") | ||||
|  | ||||
|     @pytest.mark.parametrize("field", TimedeltaArray._field_ops) | ||||
|     def test_fields(self, tda, field): | ||||
|         as_nano = tda._ndarray.astype("m8[ns]") | ||||
|         tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) | ||||
|  | ||||
|         result = getattr(tda, field) | ||||
|         expected = getattr(tda_nano, field) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_to_pytimedelta(self, tda): | ||||
|         as_nano = tda._ndarray.astype("m8[ns]") | ||||
|         tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) | ||||
|  | ||||
|         result = tda.to_pytimedelta() | ||||
|         expected = tda_nano.to_pytimedelta() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_total_seconds(self, unit, tda): | ||||
|         as_nano = tda._ndarray.astype("m8[ns]") | ||||
|         tda_nano = TimedeltaArray._simple_new(as_nano, dtype=as_nano.dtype) | ||||
|  | ||||
|         result = tda.total_seconds() | ||||
|         expected = tda_nano.total_seconds() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_timedelta_array_total_seconds(self): | ||||
|         # GH34290 | ||||
|         expected = Timedelta("2 min").total_seconds() | ||||
|  | ||||
|         result = pd.array([Timedelta("2 min")]).total_seconds()[0] | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_total_seconds_nanoseconds(self): | ||||
|         # issue #48521 | ||||
|         start_time = pd.Series(["2145-11-02 06:00:00"]).astype("datetime64[ns]") | ||||
|         end_time = pd.Series(["2145-11-02 07:06:00"]).astype("datetime64[ns]") | ||||
|         expected = (end_time - start_time).values / np.timedelta64(1, "s") | ||||
|         result = (end_time - start_time).dt.total_seconds().values | ||||
|         assert result == expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")] | ||||
|     ) | ||||
|     def test_add_nat_datetimelike_scalar(self, nat, tda): | ||||
|         result = tda + nat | ||||
|         assert isinstance(result, DatetimeArray) | ||||
|         assert result._creso == tda._creso | ||||
|         assert result.isna().all() | ||||
|  | ||||
|         result = nat + tda | ||||
|         assert isinstance(result, DatetimeArray) | ||||
|         assert result._creso == tda._creso | ||||
|         assert result.isna().all() | ||||
|  | ||||
|     def test_add_pdnat(self, tda): | ||||
|         result = tda + pd.NaT | ||||
|         assert isinstance(result, TimedeltaArray) | ||||
|         assert result._creso == tda._creso | ||||
|         assert result.isna().all() | ||||
|  | ||||
|         result = pd.NaT + tda | ||||
|         assert isinstance(result, TimedeltaArray) | ||||
|         assert result._creso == tda._creso | ||||
|         assert result.isna().all() | ||||
|  | ||||
|     # TODO: 2022-07-11 this is the only test that gets to DTA.tz_convert | ||||
|     #  or tz_localize with non-nano; implement tests specific to that. | ||||
|     def test_add_datetimelike_scalar(self, tda, tz_naive_fixture): | ||||
|         ts = pd.Timestamp("2016-01-01", tz=tz_naive_fixture).as_unit("ns") | ||||
|  | ||||
|         expected = tda.as_unit("ns") + ts | ||||
|         res = tda + ts | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|         res = ts + tda | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|         ts += Timedelta(1)  # case where we can't cast losslessly | ||||
|  | ||||
|         exp_values = tda._ndarray + ts.asm8 | ||||
|         expected = ( | ||||
|             DatetimeArray._simple_new(exp_values, dtype=exp_values.dtype) | ||||
|             .tz_localize("UTC") | ||||
|             .tz_convert(ts.tz) | ||||
|         ) | ||||
|  | ||||
|         result = tda + ts | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|         result = ts + tda | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     def test_mul_scalar(self, tda): | ||||
|         other = 2 | ||||
|         result = tda * other | ||||
|         expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         assert result._creso == tda._creso | ||||
|  | ||||
|     def test_mul_listlike(self, tda): | ||||
|         other = np.arange(len(tda)) | ||||
|         result = tda * other | ||||
|         expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         assert result._creso == tda._creso | ||||
|  | ||||
|     def test_mul_listlike_object(self, tda): | ||||
|         other = np.arange(len(tda)) | ||||
|         result = tda * other.astype(object) | ||||
|         expected = TimedeltaArray._simple_new(tda._ndarray * other, dtype=tda.dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         assert result._creso == tda._creso | ||||
|  | ||||
|     def test_div_numeric_scalar(self, tda): | ||||
|         other = 2 | ||||
|         result = tda / other | ||||
|         expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         assert result._creso == tda._creso | ||||
|  | ||||
|     def test_div_td_scalar(self, tda): | ||||
|         other = timedelta(seconds=1) | ||||
|         result = tda / other | ||||
|         expected = tda._ndarray / np.timedelta64(1, "s") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_div_numeric_array(self, tda): | ||||
|         other = np.arange(len(tda)) | ||||
|         result = tda / other | ||||
|         expected = TimedeltaArray._simple_new(tda._ndarray / other, dtype=tda.dtype) | ||||
|         tm.assert_extension_array_equal(result, expected) | ||||
|         assert result._creso == tda._creso | ||||
|  | ||||
|     def test_div_td_array(self, tda): | ||||
|         other = tda._ndarray + tda._ndarray[-1] | ||||
|         result = tda / other | ||||
|         expected = tda._ndarray / other | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_add_timedeltaarraylike(self, tda): | ||||
|         tda_nano = tda.astype("m8[ns]") | ||||
|  | ||||
|         expected = tda_nano * 2 | ||||
|         res = tda_nano + tda | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|         res = tda + tda_nano | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|         expected = tda_nano * 0 | ||||
|         res = tda - tda_nano | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|         res = tda_nano - tda | ||||
|         tm.assert_extension_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| class TestTimedeltaArray: | ||||
|     @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) | ||||
|     def test_astype_int(self, dtype): | ||||
|         arr = TimedeltaArray._from_sequence( | ||||
|             [Timedelta("1h"), Timedelta("2h")], dtype="m8[ns]" | ||||
|         ) | ||||
|  | ||||
|         if np.dtype(dtype) != np.int64: | ||||
|             with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): | ||||
|                 arr.astype(dtype) | ||||
|             return | ||||
|  | ||||
|         result = arr.astype(dtype) | ||||
|         expected = arr._ndarray.view("i8") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_setitem_clears_freq(self): | ||||
|         a = pd.timedelta_range("1h", periods=2, freq="h")._data | ||||
|         a[0] = Timedelta("1h") | ||||
|         assert a.freq is None | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "obj", | ||||
|         [ | ||||
|             Timedelta(seconds=1), | ||||
|             Timedelta(seconds=1).to_timedelta64(), | ||||
|             Timedelta(seconds=1).to_pytimedelta(), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setitem_objects(self, obj): | ||||
|         # make sure we accept timedelta64 and timedelta in addition to Timedelta | ||||
|         tdi = pd.timedelta_range("2 Days", periods=4, freq="h") | ||||
|         arr = tdi._data | ||||
|  | ||||
|         arr[0] = obj | ||||
|         assert arr[0] == Timedelta(seconds=1) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", | ||||
|         [ | ||||
|             1, | ||||
|             np.int64(1), | ||||
|             1.0, | ||||
|             np.datetime64("NaT"), | ||||
|             pd.Timestamp("2021-01-01"), | ||||
|             "invalid", | ||||
|             np.arange(10, dtype="i8") * 24 * 3600 * 10**9, | ||||
|             (np.arange(10) * 24 * 3600 * 10**9).view("datetime64[ns]"), | ||||
|             pd.Timestamp("2021-01-01").to_period("D"), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("index", [True, False]) | ||||
|     def test_searchsorted_invalid_types(self, other, index): | ||||
|         data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 | ||||
|         arr = pd.TimedeltaIndex(data, freq="D")._data | ||||
|         if index: | ||||
|             arr = pd.Index(arr) | ||||
|  | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "searchsorted requires compatible dtype or scalar", | ||||
|                 "value should be a 'Timedelta', 'NaT', or array of those. Got", | ||||
|             ] | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             arr.searchsorted(other) | ||||
|  | ||||
|  | ||||
| class TestUnaryOps: | ||||
|     def test_abs(self): | ||||
|         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") | ||||
|         arr = TimedeltaArray._from_sequence(vals) | ||||
|  | ||||
|         evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") | ||||
|         expected = TimedeltaArray._from_sequence(evals) | ||||
|  | ||||
|         result = abs(arr) | ||||
|         tm.assert_timedelta_array_equal(result, expected) | ||||
|  | ||||
|         result2 = np.abs(arr) | ||||
|         tm.assert_timedelta_array_equal(result2, expected) | ||||
|  | ||||
|     def test_pos(self): | ||||
|         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") | ||||
|         arr = TimedeltaArray._from_sequence(vals) | ||||
|  | ||||
|         result = +arr | ||||
|         tm.assert_timedelta_array_equal(result, arr) | ||||
|         assert not tm.shares_memory(result, arr) | ||||
|  | ||||
|         result2 = np.positive(arr) | ||||
|         tm.assert_timedelta_array_equal(result2, arr) | ||||
|         assert not tm.shares_memory(result2, arr) | ||||
|  | ||||
|     def test_neg(self): | ||||
|         vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") | ||||
|         arr = TimedeltaArray._from_sequence(vals) | ||||
|  | ||||
|         evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]") | ||||
|         expected = TimedeltaArray._from_sequence(evals) | ||||
|  | ||||
|         result = -arr | ||||
|         tm.assert_timedelta_array_equal(result, expected) | ||||
|  | ||||
|         result2 = np.negative(arr) | ||||
|         tm.assert_timedelta_array_equal(result2, expected) | ||||
|  | ||||
|     def test_neg_freq(self): | ||||
|         tdi = pd.timedelta_range("2 Days", periods=4, freq="h") | ||||
|         arr = tdi._data | ||||
|  | ||||
|         expected = -tdi._data | ||||
|  | ||||
|         result = -arr | ||||
|         tm.assert_timedelta_array_equal(result, expected) | ||||
|  | ||||
|         result2 = np.negative(arr) | ||||
|         tm.assert_timedelta_array_equal(result2, expected) | ||||
| @ -0,0 +1,103 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import TimedeltaArray | ||||
|  | ||||
|  | ||||
| class TestTimedeltaArrayConstructor: | ||||
|     def test_only_1dim_accepted(self): | ||||
|         # GH#25282 | ||||
|         arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") | ||||
|  | ||||
|         depr_msg = "TimedeltaArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Only 1-dimensional"): | ||||
|                 # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 | ||||
|                 TimedeltaArray(arr.reshape(2, 2, 1)) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="Only 1-dimensional"): | ||||
|                 # 0-dim | ||||
|                 TimedeltaArray(arr[[0]].squeeze()) | ||||
|  | ||||
|     def test_freq_validation(self): | ||||
|         # ensure that the public constructor cannot create an invalid instance | ||||
|         arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10**9 | ||||
|  | ||||
|         msg = ( | ||||
|             "Inferred frequency None from passed values does not " | ||||
|             "conform to passed frequency D" | ||||
|         ) | ||||
|         depr_msg = "TimedeltaArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") | ||||
|  | ||||
|     def test_non_array_raises(self): | ||||
|         depr_msg = "TimedeltaArray.__init__ is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match="list"): | ||||
|                 TimedeltaArray([1, 2, 3]) | ||||
|  | ||||
|     def test_other_type_raises(self): | ||||
|         msg = r"dtype bool cannot be converted to timedelta64\[ns\]" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             TimedeltaArray._from_sequence(np.array([1, 2, 3], dtype="bool")) | ||||
|  | ||||
|     def test_incorrect_dtype_raises(self): | ||||
|         msg = "dtype 'category' is invalid, should be np.timedelta64 dtype" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2, 3], dtype="i8"), dtype="category" | ||||
|             ) | ||||
|  | ||||
|         msg = "dtype 'int64' is invalid, should be np.timedelta64 dtype" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64") | ||||
|             ) | ||||
|  | ||||
|         msg = r"dtype 'datetime64\[ns\]' is invalid, should be np.timedelta64 dtype" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("M8[ns]") | ||||
|             ) | ||||
|  | ||||
|         msg = ( | ||||
|             r"dtype 'datetime64\[us, UTC\]' is invalid, should be np.timedelta64 dtype" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2, 3], dtype="i8"), dtype="M8[us, UTC]" | ||||
|             ) | ||||
|  | ||||
|         msg = "Supported timedelta64 resolutions are 's', 'ms', 'us', 'ns'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence( | ||||
|                 np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("m8[Y]") | ||||
|             ) | ||||
|  | ||||
|     def test_mismatched_values_dtype_units(self): | ||||
|         arr = np.array([1, 2, 3], dtype="m8[s]") | ||||
|         dtype = np.dtype("m8[ns]") | ||||
|         msg = r"Values resolution does not match dtype" | ||||
|         depr_msg = "TimedeltaArray.__init__ is deprecated" | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=depr_msg): | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 TimedeltaArray(arr, dtype=dtype) | ||||
|  | ||||
|     def test_copy(self): | ||||
|         data = np.array([1, 2, 3], dtype="m8[ns]") | ||||
|         arr = TimedeltaArray._from_sequence(data, copy=False) | ||||
|         assert arr._ndarray is data | ||||
|  | ||||
|         arr = TimedeltaArray._from_sequence(data, copy=True) | ||||
|         assert arr._ndarray is not data | ||||
|         assert arr._ndarray.base is not data | ||||
|  | ||||
|     def test_from_sequence_dtype(self): | ||||
|         msg = "dtype 'object' is invalid, should be np.timedelta64 dtype" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             TimedeltaArray._from_sequence([], dtype=object) | ||||
| @ -0,0 +1,20 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import TimedeltaArray | ||||
|  | ||||
|  | ||||
| class TestAccumulator: | ||||
|     def test_accumulators_disallowed(self): | ||||
|         # GH#50297 | ||||
|         arr = TimedeltaArray._from_sequence(["1D", "2D"], dtype="m8[ns]") | ||||
|         with pytest.raises(TypeError, match="cumprod not supported"): | ||||
|             arr._accumulate("cumprod") | ||||
|  | ||||
|     def test_cumsum(self, unit): | ||||
|         # GH#50297 | ||||
|         dtype = f"m8[{unit}]" | ||||
|         arr = TimedeltaArray._from_sequence(["1D", "2D"], dtype=dtype) | ||||
|         result = arr._accumulate("cumsum") | ||||
|         expected = TimedeltaArray._from_sequence(["1D", "3D"], dtype=dtype) | ||||
|         tm.assert_timedelta_array_equal(result, expected) | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user