done
This commit is contained in:
		| @ -0,0 +1,253 @@ | ||||
| import string | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestSeriesAccessor: | ||||
|     def test_to_dense(self): | ||||
|         ser = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]") | ||||
|         result = ser.sparse.to_dense() | ||||
|         expected = pd.Series([0, 1, 0, 10]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"]) | ||||
|     def test_get_attributes(self, attr): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         ser = pd.Series(arr) | ||||
|  | ||||
|         result = getattr(ser.sparse, attr) | ||||
|         expected = getattr(arr, attr) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_from_coo(self): | ||||
|         scipy_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         row = [0, 3, 1, 0] | ||||
|         col = [0, 3, 1, 2] | ||||
|         data = [4, 5, 7, 9] | ||||
|  | ||||
|         sp_array = scipy_sparse.coo_matrix((data, (row, col))) | ||||
|         result = pd.Series.sparse.from_coo(sp_array) | ||||
|  | ||||
|         index = pd.MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 np.array([0, 0, 1, 3], dtype=np.int32), | ||||
|                 np.array([0, 2, 1, 3], dtype=np.int32), | ||||
|             ], | ||||
|         ) | ||||
|         expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "sort_labels, expected_rows, expected_cols, expected_values_pos", | ||||
|         [ | ||||
|             ( | ||||
|                 False, | ||||
|                 [("b", 2), ("a", 2), ("b", 1), ("a", 1)], | ||||
|                 [("z", 1), ("z", 2), ("x", 2), ("z", 0)], | ||||
|                 {1: (1, 0), 3: (3, 3)}, | ||||
|             ), | ||||
|             ( | ||||
|                 True, | ||||
|                 [("a", 1), ("a", 2), ("b", 1), ("b", 2)], | ||||
|                 [("x", 2), ("z", 0), ("z", 1), ("z", 2)], | ||||
|                 {1: (1, 2), 3: (0, 1)}, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_coo( | ||||
|         self, sort_labels, expected_rows, expected_cols, expected_values_pos | ||||
|     ): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0) | ||||
|         index = pd.MultiIndex.from_tuples( | ||||
|             [ | ||||
|                 ("b", 2, "z", 1), | ||||
|                 ("a", 2, "z", 2), | ||||
|                 ("a", 2, "z", 1), | ||||
|                 ("a", 2, "x", 2), | ||||
|                 ("b", 1, "z", 1), | ||||
|                 ("a", 1, "z", 0), | ||||
|             ] | ||||
|         ) | ||||
|         ss = pd.Series(values, index=index) | ||||
|  | ||||
|         expected_A = np.zeros((4, 4)) | ||||
|         for value, (row, col) in expected_values_pos.items(): | ||||
|             expected_A[row, col] = value | ||||
|  | ||||
|         A, rows, cols = ss.sparse.to_coo( | ||||
|             row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels | ||||
|         ) | ||||
|         assert isinstance(A, sp_sparse.coo_matrix) | ||||
|         tm.assert_numpy_array_equal(A.toarray(), expected_A) | ||||
|         assert rows == expected_rows | ||||
|         assert cols == expected_cols | ||||
|  | ||||
|     def test_non_sparse_raises(self): | ||||
|         ser = pd.Series([1, 2, 3]) | ||||
|         with pytest.raises(AttributeError, match=".sparse"): | ||||
|             ser.sparse.density | ||||
|  | ||||
|  | ||||
| class TestFrameAccessor: | ||||
|     def test_accessor_raises(self): | ||||
|         df = pd.DataFrame({"A": [0, 1]}) | ||||
|         with pytest.raises(AttributeError, match="sparse"): | ||||
|             df.sparse | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) | ||||
|     @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])]) | ||||
|     @pytest.mark.parametrize("dtype", ["float64", "int64"]) | ||||
|     def test_from_spmatrix(self, format, labels, dtype): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) | ||||
|  | ||||
|         mat = sp_sparse.eye(10, format=format, dtype=dtype) | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) | ||||
|         expected = pd.DataFrame( | ||||
|             np.eye(10, dtype=dtype), index=labels, columns=labels | ||||
|         ).astype(sp_dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) | ||||
|     def test_from_spmatrix_including_explicit_zero(self, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(10, 2, density=0.5, format=format) | ||||
|         mat.data[0] = 0 | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat) | ||||
|         dtype = SparseDtype("float64", 0.0) | ||||
|         expected = pd.DataFrame(mat.todense()).astype(dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "columns", | ||||
|         [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], | ||||
|     ) | ||||
|     def test_from_spmatrix_columns(self, columns): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         dtype = SparseDtype("float64", 0.0) | ||||
|  | ||||
|         mat = sp_sparse.random(10, 2, density=0.5) | ||||
|         result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) | ||||
|         expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)] | ||||
|     ) | ||||
|     def test_to_coo(self, colnames): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         df = pd.DataFrame( | ||||
|             {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]" | ||||
|         ) | ||||
|         result = df.sparse.to_coo() | ||||
|         expected = sp_sparse.coo_matrix(np.asarray(df)) | ||||
|         assert (result != expected).nnz == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [1, np.nan]) | ||||
|     def test_to_coo_nonzero_fill_val_raises(self, fill_value): | ||||
|         pytest.importorskip("scipy") | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray( | ||||
|                     [fill_value, fill_value, fill_value, 2], fill_value=fill_value | ||||
|                 ), | ||||
|                 "B": SparseArray( | ||||
|                     [fill_value, 2, fill_value, fill_value], fill_value=fill_value | ||||
|                 ), | ||||
|             } | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match="fill value must be 0"): | ||||
|             df.sparse.to_coo() | ||||
|  | ||||
|     def test_to_coo_midx_categorical(self): | ||||
|         # GH#50996 | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         midx = pd.MultiIndex.from_arrays( | ||||
|             [ | ||||
|                 pd.CategoricalIndex(list("ab"), name="x"), | ||||
|                 pd.CategoricalIndex([0, 1], name="y"), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         ser = pd.Series(1, index=midx, dtype="Sparse[int]") | ||||
|         result = ser.sparse.to_coo(row_levels=["x"], column_levels=["y"])[0] | ||||
|         expected = sp_sparse.coo_matrix( | ||||
|             (np.array([1, 1]), (np.array([0, 1]), np.array([0, 1]))), shape=(2, 2) | ||||
|         ) | ||||
|         assert (result != expected).nnz == 0 | ||||
|  | ||||
|     def test_to_dense(self): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), | ||||
|                 "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), | ||||
|                 "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), | ||||
|             }, | ||||
|             index=["b", "a"], | ||||
|         ) | ||||
|         result = df.sparse.to_dense() | ||||
|         expected = pd.DataFrame( | ||||
|             {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"] | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     def test_density(self): | ||||
|         df = pd.DataFrame( | ||||
|             { | ||||
|                 "A": SparseArray([1, 0, 2, 1], fill_value=0), | ||||
|                 "B": SparseArray([0, 1, 1, 1], fill_value=0), | ||||
|             } | ||||
|         ) | ||||
|         res = df.sparse.density | ||||
|         expected = 0.75 | ||||
|         assert res == expected | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["int64", "float64"]) | ||||
|     @pytest.mark.parametrize("dense_index", [True, False]) | ||||
|     def test_series_from_coo(self, dtype, dense_index): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         A = sp_sparse.eye(3, format="coo", dtype=dtype) | ||||
|         result = pd.Series.sparse.from_coo(A, dense_index=dense_index) | ||||
|  | ||||
|         index = pd.MultiIndex.from_tuples( | ||||
|             [ | ||||
|                 np.array([0, 0], dtype=np.int32), | ||||
|                 np.array([1, 1], dtype=np.int32), | ||||
|                 np.array([2, 2], dtype=np.int32), | ||||
|             ], | ||||
|         ) | ||||
|         expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) | ||||
|         if dense_index: | ||||
|             expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_from_coo_incorrect_format_raises(self): | ||||
|         # gh-26554 | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         m = sp_sparse.csr_matrix(np.array([[0, 1], [0, 0]])) | ||||
|         with pytest.raises( | ||||
|             TypeError, match="Expected coo_matrix. Got csr_matrix instead." | ||||
|         ): | ||||
|             pd.Series.sparse.from_coo(m) | ||||
|  | ||||
|     def test_with_column_named_sparse(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/30758 | ||||
|         df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])}) | ||||
|         assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor) | ||||
| @ -0,0 +1,514 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["integer", "block"]) | ||||
| def kind(request): | ||||
|     """kind kwarg to pass to SparseArray""" | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def mix(request): | ||||
|     """ | ||||
|     Fixture returning True or False, determining whether to operate | ||||
|     op(sparse, dense) instead of op(sparse, sparse) | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSparseArrayArithmetics: | ||||
|     def _assert(self, a, b): | ||||
|         # We have to use tm.assert_sp_array_equal. See GH #45126 | ||||
|         tm.assert_numpy_array_equal(a, b) | ||||
|  | ||||
|     def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op): | ||||
|         # Check that arithmetic behavior matches non-Sparse Series arithmetic | ||||
|  | ||||
|         if isinstance(a_dense, np.ndarray): | ||||
|             expected = op(pd.Series(a_dense), b_dense).values | ||||
|         elif isinstance(b_dense, np.ndarray): | ||||
|             expected = op(a_dense, pd.Series(b_dense)).values | ||||
|         else: | ||||
|             raise NotImplementedError | ||||
|  | ||||
|         with np.errstate(invalid="ignore", divide="ignore"): | ||||
|             if mix: | ||||
|                 result = op(a, b_dense).to_dense() | ||||
|             else: | ||||
|                 result = op(a, b).to_dense() | ||||
|  | ||||
|         self._assert(result, expected) | ||||
|  | ||||
|     def _check_bool_result(self, res): | ||||
|         assert isinstance(res, SparseArray) | ||||
|         assert isinstance(res.dtype, SparseDtype) | ||||
|         assert res.dtype.subtype == np.bool_ | ||||
|         assert isinstance(res.fill_value, bool) | ||||
|  | ||||
|     def _check_comparison_ops(self, a, b, a_dense, b_dense): | ||||
|         with np.errstate(invalid="ignore"): | ||||
|             # Unfortunately, trying to wrap the computation of each expected | ||||
|             # value is with np.errstate() is too tedious. | ||||
|             # | ||||
|             # sparse & sparse | ||||
|             self._check_bool_result(a == b) | ||||
|             self._assert((a == b).to_dense(), a_dense == b_dense) | ||||
|  | ||||
|             self._check_bool_result(a != b) | ||||
|             self._assert((a != b).to_dense(), a_dense != b_dense) | ||||
|  | ||||
|             self._check_bool_result(a >= b) | ||||
|             self._assert((a >= b).to_dense(), a_dense >= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a <= b) | ||||
|             self._assert((a <= b).to_dense(), a_dense <= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a > b) | ||||
|             self._assert((a > b).to_dense(), a_dense > b_dense) | ||||
|  | ||||
|             self._check_bool_result(a < b) | ||||
|             self._assert((a < b).to_dense(), a_dense < b_dense) | ||||
|  | ||||
|             # sparse & dense | ||||
|             self._check_bool_result(a == b_dense) | ||||
|             self._assert((a == b_dense).to_dense(), a_dense == b_dense) | ||||
|  | ||||
|             self._check_bool_result(a != b_dense) | ||||
|             self._assert((a != b_dense).to_dense(), a_dense != b_dense) | ||||
|  | ||||
|             self._check_bool_result(a >= b_dense) | ||||
|             self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a <= b_dense) | ||||
|             self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) | ||||
|  | ||||
|             self._check_bool_result(a > b_dense) | ||||
|             self._assert((a > b_dense).to_dense(), a_dense > b_dense) | ||||
|  | ||||
|             self._check_bool_result(a < b_dense) | ||||
|             self._assert((a < b_dense).to_dense(), a_dense < b_dense) | ||||
|  | ||||
|     def _check_logical_ops(self, a, b, a_dense, b_dense): | ||||
|         # sparse & sparse | ||||
|         self._check_bool_result(a & b) | ||||
|         self._assert((a & b).to_dense(), a_dense & b_dense) | ||||
|  | ||||
|         self._check_bool_result(a | b) | ||||
|         self._assert((a | b).to_dense(), a_dense | b_dense) | ||||
|         # sparse & dense | ||||
|         self._check_bool_result(a & b_dense) | ||||
|         self._assert((a & b_dense).to_dense(), a_dense & b_dense) | ||||
|  | ||||
|         self._check_bool_result(a | b_dense) | ||||
|         self._assert((a | b_dense).to_dense(), a_dense | b_dense) | ||||
|  | ||||
|     @pytest.mark.parametrize("scalar", [0, 1, 3]) | ||||
|     @pytest.mark.parametrize("fill_value", [None, 0, 2]) | ||||
|     def test_float_scalar( | ||||
|         self, kind, mix, all_arithmetic_functions, fill_value, scalar, request | ||||
|     ): | ||||
|         op = all_arithmetic_functions | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         a = SparseArray(values, kind=kind, fill_value=fill_value) | ||||
|         self._check_numeric_ops(a, scalar, values, scalar, mix, op) | ||||
|  | ||||
|     def test_float_scalar_comparison(self, kind): | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, 1, values, 1) | ||||
|         self._check_comparison_ops(a, 0, values, 0) | ||||
|         self._check_comparison_ops(a, 3, values, 3) | ||||
|  | ||||
|     def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions): | ||||
|         # when sp_index are the same | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) | ||||
|         rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_same_index_with_nans( | ||||
|         self, kind, mix, all_arithmetic_functions, request | ||||
|     ): | ||||
|         # when sp_index are the same | ||||
|         op = all_arithmetic_functions | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_same_index_comparison(self, kind): | ||||
|         # when sp_index are the same | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) | ||||
|         rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_float_array(self, kind, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_array_different_kind(self, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer") | ||||
|         b = SparseArray(rvalues, kind="block") | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=0) | ||||
|         b = SparseArray(rvalues, kind="block") | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=0) | ||||
|         b = SparseArray(rvalues, kind="block", fill_value=0) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind="integer", fill_value=1) | ||||
|         b = SparseArray(rvalues, kind="block", fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_float_array_comparison(self, kind): | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_int_array(self, kind, mix, all_arithmetic_functions): | ||||
|         op = all_arithmetic_functions | ||||
|  | ||||
|         # have to specify dtype explicitly until fixing GH 667 | ||||
|         dtype = np.int64 | ||||
|  | ||||
|         values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype) | ||||
|         b = SparseArray(rvalues, fill_value=0, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, fill_value=1, dtype=dtype, kind=kind) | ||||
|         assert a.dtype == SparseDtype(dtype, fill_value=1) | ||||
|         b = SparseArray(rvalues, fill_value=2, dtype=dtype, kind=kind) | ||||
|         assert b.dtype == SparseDtype(dtype, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_int_array_comparison(self, kind): | ||||
|         dtype = "int64" | ||||
|         # int32 NI ATM | ||||
|  | ||||
|         values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=0) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, dtype=dtype, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [True, False, np.nan]) | ||||
|     def test_bool_same_index(self, kind, fill_value): | ||||
|         # GH 14000 | ||||
|         # when sp_index are the same | ||||
|         values = np.array([True, False, True, True], dtype=np.bool_) | ||||
|         rvalues = np.array([True, False, True, True], dtype=np.bool_) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         self._check_logical_ops(a, b, values, rvalues) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [True, False, np.nan]) | ||||
|     def test_bool_array_logical(self, kind, fill_value): | ||||
|         # GH 14000 | ||||
|         # when sp_index are the same | ||||
|         values = np.array([True, False, True, False, True, True], dtype=np.bool_) | ||||
|         rvalues = np.array([True, False, False, True, False, True], dtype=np.bool_) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value) | ||||
|         self._check_logical_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request): | ||||
|         op = all_arithmetic_functions | ||||
|         rdtype = "int64" | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|  | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|         self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         assert b.dtype == SparseDtype(rdtype, fill_value=2) | ||||
|         self._check_numeric_ops(a, b, values, rvalues, mix, op) | ||||
|  | ||||
|     def test_mixed_array_comparison(self, kind): | ||||
|         rdtype = "int64" | ||||
|         # int32 NI ATM | ||||
|  | ||||
|         values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) | ||||
|         rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|  | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|         self._check_comparison_ops(a, b * 0, values, rvalues * 0) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=0) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=0) | ||||
|         assert b.dtype == SparseDtype(rdtype) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|         a = SparseArray(values, kind=kind, fill_value=1) | ||||
|         b = SparseArray(rvalues, kind=kind, fill_value=2) | ||||
|         assert b.dtype == SparseDtype(rdtype, fill_value=2) | ||||
|         self._check_comparison_ops(a, b, values, rvalues) | ||||
|  | ||||
|     def test_xor(self): | ||||
|         s = SparseArray([True, True, False, False]) | ||||
|         t = SparseArray([True, False, True, False]) | ||||
|         result = s ^ t | ||||
|         sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32")) | ||||
|         expected = SparseArray([False, True, True], sparse_index=sp_index) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", [operator.eq, operator.add]) | ||||
| def test_with_list(op): | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|     result = op(arr, [0, 1]) | ||||
|     expected = op(arr, SparseArray([0, 1])) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_with_dataframe(): | ||||
|     # GH#27910 | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|     df = pd.DataFrame([[1, 2], [3, 4]]) | ||||
|     result = arr.__add__(df) | ||||
|     assert result is NotImplemented | ||||
|  | ||||
|  | ||||
| def test_with_zerodim_ndarray(): | ||||
|     # GH#27910 | ||||
|     arr = SparseArray([0, 1], fill_value=0) | ||||
|  | ||||
|     result = arr * np.array(2) | ||||
|     expected = arr * 2 | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] | ||||
| ) | ||||
| def test_ufuncs(ufunc, arr): | ||||
|     result = ufunc(arr) | ||||
|     fill_value = ufunc(arr.fill_value) | ||||
|     expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     [ | ||||
|         (SparseArray([0, 0, 0]), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|         (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("ufunc", [np.add, np.greater]) | ||||
| def test_binary_ufuncs(ufunc, a, b): | ||||
|     # can't say anything about fill value here. | ||||
|     result = ufunc(a, b) | ||||
|     expected = ufunc(np.asarray(a), np.asarray(b)) | ||||
|     assert isinstance(result, SparseArray) | ||||
|     tm.assert_numpy_array_equal(np.asarray(result), expected) | ||||
|  | ||||
|  | ||||
| def test_ndarray_inplace(): | ||||
|     sparray = SparseArray([0, 2, 0, 0]) | ||||
|     ndarray = np.array([0, 1, 2, 3]) | ||||
|     ndarray += sparray | ||||
|     expected = np.array([0, 3, 2, 3]) | ||||
|     tm.assert_numpy_array_equal(ndarray, expected) | ||||
|  | ||||
|  | ||||
| def test_sparray_inplace(): | ||||
|     sparray = SparseArray([0, 2, 0, 0]) | ||||
|     ndarray = np.array([0, 1, 2, 3]) | ||||
|     sparray += ndarray | ||||
|     expected = SparseArray([0, 3, 2, 3], fill_value=0) | ||||
|     tm.assert_sp_array_equal(sparray, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("cons", [list, np.array, SparseArray]) | ||||
| def test_mismatched_length_cmp_op(cons): | ||||
|     left = SparseArray([True, True]) | ||||
|     right = cons([True, True, True]) | ||||
|     with pytest.raises(ValueError, match="operands have mismatched length"): | ||||
|         left & right | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 3]) | ||||
| def test_binary_operators(op, fill_value): | ||||
|     op = getattr(operator, op) | ||||
|     data1 = np.random.default_rng(2).standard_normal(20) | ||||
|     data2 = np.random.default_rng(2).standard_normal(20) | ||||
|  | ||||
|     data1[::2] = fill_value | ||||
|     data2[::3] = fill_value | ||||
|  | ||||
|     first = SparseArray(data1, fill_value=fill_value) | ||||
|     second = SparseArray(data2, fill_value=fill_value) | ||||
|  | ||||
|     with np.errstate(all="ignore"): | ||||
|         res = op(first, second) | ||||
|         exp = SparseArray( | ||||
|             op(first.to_dense(), second.to_dense()), fill_value=first.fill_value | ||||
|         ) | ||||
|         assert isinstance(res, SparseArray) | ||||
|         tm.assert_almost_equal(res.to_dense(), exp.to_dense()) | ||||
|  | ||||
|         res2 = op(first, second.to_dense()) | ||||
|         assert isinstance(res2, SparseArray) | ||||
|         tm.assert_sp_array_equal(res, res2) | ||||
|  | ||||
|         res3 = op(first.to_dense(), second) | ||||
|         assert isinstance(res3, SparseArray) | ||||
|         tm.assert_sp_array_equal(res, res3) | ||||
|  | ||||
|         res4 = op(first, 4) | ||||
|         assert isinstance(res4, SparseArray) | ||||
|  | ||||
|         # Ignore this if the actual op raises (e.g. pow). | ||||
|         try: | ||||
|             exp = op(first.to_dense(), 4) | ||||
|             exp_fv = op(first.fill_value, 4) | ||||
|         except ValueError: | ||||
|             pass | ||||
|         else: | ||||
|             tm.assert_almost_equal(res4.fill_value, exp_fv) | ||||
|             tm.assert_almost_equal(res4.to_dense(), exp) | ||||
| @ -0,0 +1,511 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
| from pandas.compat.numpy import np_version_gt2 | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr_data(): | ||||
|     """Fixture returning numpy array with valid and missing entries""" | ||||
|     return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr(arr_data): | ||||
|     """Fixture returning SparseArray from 'arr_data'""" | ||||
|     return SparseArray(arr_data) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def zarr(): | ||||
|     """Fixture returning SparseArray with integer entries and 'fill_value=0'""" | ||||
|     return SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|  | ||||
|  | ||||
| class TestSparseArray: | ||||
|     @pytest.mark.parametrize("fill_value", [0, None, np.nan]) | ||||
|     def test_shift_fill_value(self, fill_value): | ||||
|         # GH #24128 | ||||
|         sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) | ||||
|         res = sparse.shift(1, fill_value=fill_value) | ||||
|         if isna(fill_value): | ||||
|             fill_value = res.dtype.na_value | ||||
|         exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_set_fill_value(self): | ||||
|         arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) | ||||
|         arr.fill_value = 2 | ||||
|         assert arr.fill_value == 2 | ||||
|  | ||||
|         arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) | ||||
|         arr.fill_value = 2 | ||||
|         assert arr.fill_value == 2 | ||||
|  | ||||
|         msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr.fill_value = 3.1 | ||||
|         assert arr.fill_value == 3.1 | ||||
|  | ||||
|         arr.fill_value = np.nan | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         arr.fill_value = True | ||||
|         assert arr.fill_value is True | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr.fill_value = 0 | ||||
|  | ||||
|         arr.fill_value = np.nan | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) | ||||
|     def test_set_fill_invalid_non_scalar(self, val): | ||||
|         arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         msg = "fill_value must be a scalar" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.fill_value = val | ||||
|  | ||||
|     def test_copy(self, arr): | ||||
|         arr2 = arr.copy() | ||||
|         assert arr2.sp_values is not arr.sp_values | ||||
|         assert arr2.sp_index is arr.sp_index | ||||
|  | ||||
|     def test_values_asarray(self, arr_data, arr): | ||||
|         tm.assert_almost_equal(arr.to_dense(), arr_data) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,shape,dtype", | ||||
|         [ | ||||
|             ([0, 0, 0, 0, 0], (5,), None), | ||||
|             ([], (0,), None), | ||||
|             ([0], (1,), None), | ||||
|             (["A", "A", np.nan, "B"], (4,), object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_shape(self, data, shape, dtype): | ||||
|         # GH 21126 | ||||
|         out = SparseArray(data, dtype=dtype) | ||||
|         assert out.shape == shape | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "vals", | ||||
|         [ | ||||
|             [np.nan, np.nan, np.nan, np.nan, np.nan], | ||||
|             [1, np.nan, np.nan, 3, np.nan], | ||||
|             [1, np.nan, 0, 3, 0], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("fill_value", [None, 0]) | ||||
|     def test_dense_repr(self, vals, fill_value): | ||||
|         vals = np.array(vals) | ||||
|         arr = SparseArray(vals, fill_value=fill_value) | ||||
|  | ||||
|         res = arr.to_dense() | ||||
|         tm.assert_numpy_array_equal(res, vals) | ||||
|  | ||||
|     @pytest.mark.parametrize("fix", ["arr", "zarr"]) | ||||
|     def test_pickle(self, fix, request): | ||||
|         obj = request.getfixturevalue(fix) | ||||
|         unpickled = tm.round_trip_pickle(obj) | ||||
|         tm.assert_sp_array_equal(unpickled, obj) | ||||
|  | ||||
|     def test_generator_warnings(self): | ||||
|         sp_arr = SparseArray([1, 2, 3]) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             for _ in sp_arr: | ||||
|                 pass | ||||
|  | ||||
|     def test_where_retain_fill_value(self): | ||||
|         # GH#45691 don't lose fill_value on _where | ||||
|         arr = SparseArray([np.nan, 1.0], fill_value=0) | ||||
|  | ||||
|         mask = np.array([True, False]) | ||||
|  | ||||
|         res = arr._where(~mask, 1) | ||||
|         exp = SparseArray([1, 1.0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         ser = pd.Series(arr) | ||||
|         res = ser.where(~mask, 1) | ||||
|         tm.assert_series_equal(res, pd.Series(exp)) | ||||
|  | ||||
|     def test_fillna(self): | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, 0, 3, 0]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([np.nan, np.nan, np.nan, np.nan]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # float dtype's fill_value is np.nan, replaced by -1 | ||||
|         s = SparseArray([0.0, 0.0, 0.0, 0.0]) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # int dtype shouldn't have missing. No changes. | ||||
|         s = SparseArray([0, 0, 0, 0]) | ||||
|         assert s.dtype == SparseDtype(np.int64) | ||||
|         assert s.fill_value == 0 | ||||
|         res = s.fillna(-1) | ||||
|         tm.assert_sp_array_equal(res, s) | ||||
|  | ||||
|         s = SparseArray([0, 0, 0, 0], fill_value=0) | ||||
|         assert s.dtype == SparseDtype(np.int64) | ||||
|         assert s.fill_value == 0 | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0, 0, 0, 0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         # fill_value can be nan if there is no missing hole. | ||||
|         # only fill_value will be changed | ||||
|         s = SparseArray([0, 0, 0, 0], fill_value=np.nan) | ||||
|         assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) | ||||
|         assert np.isnan(s.fill_value) | ||||
|         res = s.fillna(-1) | ||||
|         exp = SparseArray([0, 0, 0, 0], fill_value=-1) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_fillna_overlap(self): | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan]) | ||||
|         # filling with existing value doesn't replace existing value with | ||||
|         # fill_value, i.e. existing 3 remains in sp_values | ||||
|         res = s.fillna(3) | ||||
|         exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) | ||||
|         tm.assert_numpy_array_equal(res.to_dense(), exp) | ||||
|  | ||||
|         s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) | ||||
|         res = s.fillna(3) | ||||
|         exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_nonzero(self): | ||||
|         # Tests regression #21172. | ||||
|         sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) | ||||
|         expected = np.array([2, 5, 9], dtype=np.int32) | ||||
|         (result,) = sa.nonzero() | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|         sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) | ||||
|         (result,) = sa.nonzero() | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|  | ||||
| class TestSparseArrayAnalytics: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 np.array([1, 2, 3, 4, 5], dtype=float),  # non-null data | ||||
|                 SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])), | ||||
|             ), | ||||
|             ( | ||||
|                 np.array([1, 2, np.nan, 4, 5], dtype=float),  # null data | ||||
|                 SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("numpy", [True, False]) | ||||
|     def test_cumsum(self, data, expected, numpy): | ||||
|         cumsum = np.cumsum if numpy else lambda s: s.cumsum() | ||||
|  | ||||
|         out = cumsum(SparseArray(data)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         out = cumsum(SparseArray(data, fill_value=np.nan)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         out = cumsum(SparseArray(data, fill_value=2)) | ||||
|         tm.assert_sp_array_equal(out, expected) | ||||
|  | ||||
|         if numpy:  # numpy compatibility checks. | ||||
|             msg = "the 'dtype' parameter is not supported" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 np.cumsum(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|             msg = "the 'out' parameter is not supported" | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 np.cumsum(SparseArray(data), out=out) | ||||
|         else: | ||||
|             axis = 1  # SparseArray currently 1-D, so only axis = 0 is valid. | ||||
|             msg = re.escape(f"axis(={axis}) out of bounds") | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 SparseArray(data).cumsum(axis=axis) | ||||
|  | ||||
|     def test_ufunc(self): | ||||
|         # GH 13853 make sure ufunc is applied to fill_value | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray([1, np.nan, 2, np.nan, 2]) | ||||
|         tm.assert_sp_array_equal(abs(sparse), result) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) | ||||
|         tm.assert_sp_array_equal(abs(sparse), result) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=-1) | ||||
|         exp = SparseArray([1, 1, 2, 2], fill_value=1) | ||||
|         tm.assert_sp_array_equal(abs(sparse), exp) | ||||
|         tm.assert_sp_array_equal(np.abs(sparse), exp) | ||||
|  | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 0, -2], fill_value=0) | ||||
|         result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) | ||||
|         tm.assert_sp_array_equal(np.sin(sparse), result) | ||||
|  | ||||
|     def test_ufunc_args(self): | ||||
|         # GH 13853 make sure ufunc is applied to fill_value, including its arg | ||||
|         sparse = SparseArray([1, np.nan, 2, np.nan, -2]) | ||||
|         result = SparseArray([2, np.nan, 3, np.nan, -1]) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 2, -2], fill_value=1) | ||||
|         result = SparseArray([2, 0, 3, -1], fill_value=2) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|         sparse = SparseArray([1, -1, 0, -2], fill_value=0) | ||||
|         result = SparseArray([2, 0, 1, -1], fill_value=1) | ||||
|         tm.assert_sp_array_equal(np.add(sparse, 1), result) | ||||
|  | ||||
|     @pytest.mark.parametrize("fill_value", [0.0, np.nan]) | ||||
|     def test_modf(self, fill_value): | ||||
|         # https://github.com/pandas-dev/pandas/issues/26946 | ||||
|         sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) | ||||
|         r1, r2 = np.modf(sparse) | ||||
|         e1, e2 = np.modf(np.asarray(sparse)) | ||||
|         tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) | ||||
|         tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) | ||||
|  | ||||
|     def test_nbytes_integer(self): | ||||
|         arr = SparseArray([1, 0, 0, 0, 2], kind="integer") | ||||
|         result = arr.nbytes | ||||
|         # (2 * 8) + 2 * 4 | ||||
|         assert result == 24 | ||||
|  | ||||
|     def test_nbytes_block(self): | ||||
|         arr = SparseArray([1, 2, 0, 0, 0], kind="block") | ||||
|         result = arr.nbytes | ||||
|         # (2 * 8) + 4 + 4 | ||||
|         # sp_values, blocs, blengths | ||||
|         assert result == 24 | ||||
|  | ||||
|     def test_asarray_datetime64(self): | ||||
|         s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) | ||||
|         np.asarray(s) | ||||
|  | ||||
|     def test_density(self): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         assert arr.density == 0.5 | ||||
|  | ||||
|     def test_npoints(self): | ||||
|         arr = SparseArray([0, 1]) | ||||
|         assert arr.npoints == 1 | ||||
|  | ||||
|  | ||||
| def test_setting_fill_value_fillna_still_works(): | ||||
|     # This is why letting users update fill_value / dtype is bad | ||||
|     # astype has the same problem. | ||||
|     arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0) | ||||
|     arr.fill_value = np.nan | ||||
|     result = arr.isna() | ||||
|     # Can't do direct comparison, since the sp_index will be different | ||||
|     # So let's convert to ndarray and check there. | ||||
|     result = np.asarray(result) | ||||
|  | ||||
|     expected = np.array([False, True, False]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_setting_fill_value_updates(): | ||||
|     arr = SparseArray([0.0, np.nan], fill_value=0) | ||||
|     arr.fill_value = np.nan | ||||
|     # use private constructor to get the index right | ||||
|     # otherwise both nans would be un-stored. | ||||
|     expected = SparseArray._simple_new( | ||||
|         sparse_array=np.array([np.nan]), | ||||
|         sparse_index=IntIndex(2, [1]), | ||||
|         dtype=SparseDtype(float, np.nan), | ||||
|     ) | ||||
|     tm.assert_sp_array_equal(arr, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr,fill_value,loc", | ||||
|     [ | ||||
|         ([None, 1, 2], None, 0), | ||||
|         ([0, None, 2], None, 1), | ||||
|         ([0, 1, None], None, 2), | ||||
|         ([0, 1, 1, None, None], None, 3), | ||||
|         ([1, 1, 1, 2], None, -1), | ||||
|         ([], None, -1), | ||||
|         ([None, 1, 0, 0, None, 2], None, 0), | ||||
|         ([None, 1, 0, 0, None, 2], 1, 1), | ||||
|         ([None, 1, 0, 0, None, 2], 2, 5), | ||||
|         ([None, 1, 0, 0, None, 2], 3, -1), | ||||
|         ([None, 0, 0, 1, 2, 1], 0, 1), | ||||
|         ([None, 0, 0, 1, 2, 1], 1, 3), | ||||
|     ], | ||||
| ) | ||||
| def test_first_fill_value_loc(arr, fill_value, loc): | ||||
|     result = SparseArray(arr, fill_value=fill_value)._first_fill_value_loc() | ||||
|     assert result == loc | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr", | ||||
|     [ | ||||
|         [1, 2, np.nan, np.nan], | ||||
|         [1, np.nan, 2, np.nan], | ||||
|         [1, 2, np.nan], | ||||
|         [np.nan, 1, 0, 0, np.nan, 2], | ||||
|         [np.nan, 0, 0, 1, 2, 1], | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) | ||||
| def test_unique_na_fill(arr, fill_value): | ||||
|     a = SparseArray(arr, fill_value=fill_value).unique() | ||||
|     b = pd.Series(arr).unique() | ||||
|     assert isinstance(a, SparseArray) | ||||
|     a = np.asarray(a) | ||||
|     tm.assert_numpy_array_equal(a, b) | ||||
|  | ||||
|  | ||||
| def test_unique_all_sparse(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/23168 | ||||
|     arr = SparseArray([0, 0]) | ||||
|     result = arr.unique() | ||||
|     expected = SparseArray([0]) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map(): | ||||
|     arr = SparseArray([0, 1, 2]) | ||||
|     expected = SparseArray([10, 11, 12], fill_value=10) | ||||
|  | ||||
|     # dict | ||||
|     result = arr.map({0: 10, 1: 11, 2: 12}) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     # series | ||||
|     result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     # function | ||||
|     result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) | ||||
|     expected = SparseArray([10, 11, 12], fill_value=10) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_missing(): | ||||
|     arr = SparseArray([0, 1, 2]) | ||||
|     expected = SparseArray([10, 11, None], fill_value=10) | ||||
|  | ||||
|     result = arr.map({0: 10, 1: 11}) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fill_value", [np.nan, 1]) | ||||
| def test_dropna(fill_value): | ||||
|     # GH-28287 | ||||
|     arr = SparseArray([np.nan, 1], fill_value=fill_value) | ||||
|     exp = SparseArray([1.0], fill_value=fill_value) | ||||
|     tm.assert_sp_array_equal(arr.dropna(), exp) | ||||
|  | ||||
|     df = pd.DataFrame({"a": [0, 1], "b": arr}) | ||||
|     expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Index([1])) | ||||
|     tm.assert_equal(df.dropna(), expected_df) | ||||
|  | ||||
|  | ||||
| def test_drop_duplicates_fill_value(): | ||||
|     # GH 11726 | ||||
|     df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0)) | ||||
|     result = df.drop_duplicates() | ||||
|     expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_zero_sparse_column(): | ||||
|     # GH 27781 | ||||
|     df1 = pd.DataFrame({"A": SparseArray([0, 0, 0]), "B": [1, 2, 3]}) | ||||
|     df2 = pd.DataFrame({"A": SparseArray([0, 1, 0]), "B": [1, 2, 3]}) | ||||
|     result = df1.loc[df1["B"] != 2] | ||||
|     expected = df2.loc[df2["B"] != 2] | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_array_interface(arr_data, arr): | ||||
|     # https://github.com/pandas-dev/pandas/pull/60046 | ||||
|     result = np.asarray(arr) | ||||
|     tm.assert_numpy_array_equal(result, arr_data) | ||||
|  | ||||
|     # it always gives a copy by default | ||||
|     result_copy1 = np.asarray(arr) | ||||
|     result_copy2 = np.asarray(arr) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     # or with explicit copy=True | ||||
|     result_copy1 = np.array(arr, copy=True) | ||||
|     result_copy2 = np.array(arr, copy=True) | ||||
|     assert not np.may_share_memory(result_copy1, result_copy2) | ||||
|  | ||||
|     if not np_version_gt2: | ||||
|         # copy=False semantics are only supported in NumPy>=2. | ||||
|         return | ||||
|  | ||||
|     msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         np.array(arr, copy=False) | ||||
|  | ||||
|     # except when there are actually no sparse filled values | ||||
|     arr2 = SparseArray(np.array([1, 2, 3])) | ||||
|     result_nocopy1 = np.array(arr2, copy=False) | ||||
|     result_nocopy2 = np.array(arr2, copy=False) | ||||
|     assert np.may_share_memory(result_nocopy1, result_nocopy2) | ||||
| @ -0,0 +1,133 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
|  | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     def test_astype(self): | ||||
|         # float -> float | ||||
|         arr = SparseArray([None, None, 0, 2]) | ||||
|         result = arr.astype("Sparse[float32]") | ||||
|         expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         dtype = SparseDtype("float64", fill_value=0) | ||||
|         result = arr.astype(dtype) | ||||
|         expected = SparseArray._simple_new( | ||||
|             np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         dtype = SparseDtype("int64", 0) | ||||
|         result = arr.astype(dtype) | ||||
|         expected = SparseArray._simple_new( | ||||
|             np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         arr = SparseArray([0, np.nan, 0, 1], fill_value=0) | ||||
|         with pytest.raises(ValueError, match="NA"): | ||||
|             arr.astype("Sparse[i8]") | ||||
|  | ||||
|     def test_astype_bool(self): | ||||
|         a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) | ||||
|         result = a.astype(bool) | ||||
|         expected = np.array([1, 0, 0, 1], dtype=bool) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # update fill value | ||||
|         result = a.astype(SparseDtype(bool, False)) | ||||
|         expected = SparseArray( | ||||
|             [True, False, False, True], dtype=SparseDtype(bool, False) | ||||
|         ) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_all(self, any_real_numpy_dtype): | ||||
|         vals = np.array([1, 2, 3]) | ||||
|         arr = SparseArray(vals, fill_value=1) | ||||
|         typ = np.dtype(any_real_numpy_dtype) | ||||
|         res = arr.astype(typ) | ||||
|         tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype)) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr, dtype, expected", | ||||
|         [ | ||||
|             ( | ||||
|                 SparseArray([0, 1]), | ||||
|                 "float", | ||||
|                 SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), | ||||
|             ), | ||||
|             (SparseArray([0, 1]), bool, SparseArray([False, True])), | ||||
|             ( | ||||
|                 SparseArray([0, 1], fill_value=1), | ||||
|                 bool, | ||||
|                 SparseArray([False, True], dtype=SparseDtype(bool, True)), | ||||
|             ), | ||||
|             pytest.param( | ||||
|                 SparseArray([0, 1]), | ||||
|                 "datetime64[ns]", | ||||
|                 SparseArray( | ||||
|                     np.array([0, 1], dtype="datetime64[ns]"), | ||||
|                     dtype=SparseDtype("datetime64[ns]", Timestamp("1970")), | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 SparseArray([0, 1, 10]), | ||||
|                 np.str_, | ||||
|                 SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")), | ||||
|             ), | ||||
|             (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), | ||||
|             ( | ||||
|                 SparseArray([0, 1, 0]), | ||||
|                 object, | ||||
|                 SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_more(self, arr, dtype, expected): | ||||
|         result = arr.astype(arr.dtype.update_dtype(dtype)) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_nan_raises(self): | ||||
|         arr = SparseArray([1.0, np.nan]) | ||||
|         with pytest.raises(ValueError, match="Cannot convert non-finite"): | ||||
|             arr.astype(int) | ||||
|  | ||||
|     def test_astype_copy_false(self): | ||||
|         # GH#34456 bug caused by using .view instead of .astype in astype_nansafe | ||||
|         arr = SparseArray([1, 2, 3]) | ||||
|  | ||||
|         dtype = SparseDtype(float, 0) | ||||
|  | ||||
|         result = arr.astype(dtype, copy=False) | ||||
|         expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_astype_dt64_to_int64(self): | ||||
|         # GH#49631 match non-sparse behavior | ||||
|         values = np.array(["NaT", "2016-01-02", "2016-01-03"], dtype="M8[ns]") | ||||
|  | ||||
|         arr = SparseArray(values) | ||||
|         result = arr.astype("int64") | ||||
|         expected = values.astype("int64") | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|         # we should also be able to cast to equivalent Sparse[int64] | ||||
|         dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min) | ||||
|         result2 = arr.astype(dtype_int64) | ||||
|         tm.assert_numpy_array_equal(result2.to_numpy(), expected) | ||||
|  | ||||
|         # GH#50087 we should match the non-sparse behavior regardless of | ||||
|         #  if we have a fill_value other than NaT | ||||
|         dtype = SparseDtype("datetime64[ns]", values[1]) | ||||
|         arr3 = SparseArray(values, dtype=dtype) | ||||
|         result3 = arr3.astype("int64") | ||||
|         tm.assert_numpy_array_equal(result3, expected) | ||||
| @ -0,0 +1,62 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestSparseArrayConcat: | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_basic(self, kind): | ||||
|         a = SparseArray([1, 0, 0, 2], kind=kind) | ||||
|         b = SparseArray([1, 0, 2, 2], kind=kind) | ||||
|  | ||||
|         result = SparseArray._concat_same_type([a, b]) | ||||
|         # Can't make any assertions about the sparse index itself | ||||
|         # since we aren't don't merge sparse blocs across arrays | ||||
|         # in to_concat | ||||
|         expected = np.array([1, 2, 1, 2, 2], dtype="int64") | ||||
|         tm.assert_numpy_array_equal(result.sp_values, expected) | ||||
|         assert result.kind == kind | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_uses_first_kind(self, kind): | ||||
|         other = "integer" if kind == "block" else "block" | ||||
|         a = SparseArray([1, 0, 0, 2], kind=kind) | ||||
|         b = SparseArray([1, 0, 2, 2], kind=other) | ||||
|  | ||||
|         result = SparseArray._concat_same_type([a, b]) | ||||
|         expected = np.array([1, 2, 1, 2, 2], dtype="int64") | ||||
|         tm.assert_numpy_array_equal(result.sp_values, expected) | ||||
|         assert result.kind == kind | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "other, expected_dtype", | ||||
|     [ | ||||
|         # compatible dtype -> preserve sparse | ||||
|         (pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)), | ||||
|         # (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)), | ||||
|         # incompatible dtype -> Sparse[common dtype] | ||||
|         (pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)), | ||||
|         # incompatible dtype -> Sparse[object] dtype | ||||
|         (pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)), | ||||
|         # categorical with compatible categories -> dtype of the categories | ||||
|         (pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")), | ||||
|         (pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")), | ||||
|         # categorical with incompatible categories -> object dtype | ||||
|         (pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)), | ||||
|     ], | ||||
| ) | ||||
| def test_concat_with_non_sparse(other, expected_dtype): | ||||
|     # https://github.com/pandas-dev/pandas/issues/34336 | ||||
|     s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0)) | ||||
|  | ||||
|     result = pd.concat([s_sparse, other], ignore_index=True) | ||||
|     expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = pd.concat([other, s_sparse], ignore_index=True) | ||||
|     expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,285 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.sparse import IntIndex | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     SparseDtype, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestConstructors: | ||||
|     def test_constructor_dtype(self): | ||||
|         arr = SparseArray([np.nan, 1, 2, np.nan]) | ||||
|         assert arr.dtype == SparseDtype(np.float64, np.nan) | ||||
|         assert arr.dtype.subtype == np.float64 | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) | ||||
|         assert arr.dtype == SparseDtype(np.float64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=np.float64) | ||||
|         assert arr.dtype == SparseDtype(np.float64, np.nan) | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=np.int64) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.int64, 0) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     def test_constructor_dtype_str(self): | ||||
|         result = SparseArray([1, 2, 3], dtype="int") | ||||
|         expected = SparseArray([1, 2, 3], dtype=int) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_sparse_dtype(self): | ||||
|         result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) | ||||
|         expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|         assert result.sp_values.dtype == np.dtype("int64") | ||||
|  | ||||
|     def test_constructor_sparse_dtype_str(self): | ||||
|         result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") | ||||
|         expected = SparseArray([1, 0, 0, 1], dtype=np.int32) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|         assert result.sp_values.dtype == np.dtype("int32") | ||||
|  | ||||
|     def test_constructor_object_dtype(self): | ||||
|         # GH#11856 | ||||
|         arr = SparseArray(["A", "A", np.nan, "B"], dtype=object) | ||||
|         assert arr.dtype == SparseDtype(object) | ||||
|         assert np.isnan(arr.fill_value) | ||||
|  | ||||
|         arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A") | ||||
|         assert arr.dtype == SparseDtype(object, "A") | ||||
|         assert arr.fill_value == "A" | ||||
|  | ||||
|     def test_constructor_object_dtype_bool_fill(self): | ||||
|         # GH#17574 | ||||
|         data = [False, 0, 100.0, 0.0] | ||||
|         arr = SparseArray(data, dtype=object, fill_value=False) | ||||
|         assert arr.dtype == SparseDtype(object, False) | ||||
|         assert arr.fill_value is False | ||||
|         arr_expected = np.array(data, dtype=object) | ||||
|         it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) | ||||
|         assert np.fromiter(it, dtype=np.bool_).all() | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) | ||||
|     def test_constructor_na_dtype(self, dtype): | ||||
|         with pytest.raises(ValueError, match="Cannot convert"): | ||||
|             SparseArray([0, 1, np.nan], dtype=dtype) | ||||
|  | ||||
|     def test_constructor_warns_when_losing_timezone(self): | ||||
|         # GH#32501 warn when losing timezone information | ||||
|         dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") | ||||
|  | ||||
|         expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]")) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             result = SparseArray(dti) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         with tm.assert_produces_warning(UserWarning): | ||||
|             result = SparseArray(pd.Series(dti)) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_constructor_spindex_dtype(self): | ||||
|         arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) | ||||
|         # TODO: actionable? | ||||
|         # XXX: Behavior change: specifying SparseIndex no longer changes the | ||||
|         # fill_value | ||||
|         expected = SparseArray([0, 1, 2, 0], kind="integer") | ||||
|         tm.assert_sp_array_equal(arr, expected) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2, 3], | ||||
|             sparse_index=IntIndex(4, [1, 2, 3]), | ||||
|             dtype=np.int64, | ||||
|             fill_value=0, | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64 | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2, 3], | ||||
|             sparse_index=IntIndex(4, [1, 2, 3]), | ||||
|             dtype=None, | ||||
|             fill_value=0, | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 3], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) | ||||
|     def test_constructor_spindex_dtype_scalar(self, sparse_index): | ||||
|         # scalar input | ||||
|         msg = "Constructing SparseArray with scalar data is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) | ||||
|         exp = SparseArray([1], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) | ||||
|         exp = SparseArray([1], dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     def test_constructor_spindex_dtype_scalar_broadcasts(self): | ||||
|         arr = SparseArray( | ||||
|             data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None | ||||
|         ) | ||||
|         exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) | ||||
|         tm.assert_sp_array_equal(arr, exp) | ||||
|         assert arr.dtype == SparseDtype(np.int64) | ||||
|         assert arr.fill_value == 0 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, fill_value", | ||||
|         [ | ||||
|             (np.array([1, 2]), 0), | ||||
|             (np.array([1.0, 2.0]), np.nan), | ||||
|             ([True, False], False), | ||||
|             ([pd.Timestamp("2017-01-01")], pd.NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_constructor_inferred_fill_value(self, data, fill_value): | ||||
|         result = SparseArray(data).fill_value | ||||
|  | ||||
|         if isna(fill_value): | ||||
|             assert isna(result) | ||||
|         else: | ||||
|             assert result == fill_value | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) | ||||
|     @pytest.mark.parametrize("size", [0, 10]) | ||||
|     def test_from_spmatrix(self, size, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(size, 1, density=0.5, format=format) | ||||
|         result = SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|         result = np.asarray(result) | ||||
|         expected = mat.toarray().ravel() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) | ||||
|     def test_from_spmatrix_including_explicit_zero(self, format): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.random(10, 1, density=0.5, format=format) | ||||
|         mat.data[0] = 0 | ||||
|         result = SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|         result = np.asarray(result) | ||||
|         expected = mat.toarray().ravel() | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_from_spmatrix_raises(self): | ||||
|         sp_sparse = pytest.importorskip("scipy.sparse") | ||||
|  | ||||
|         mat = sp_sparse.eye(5, 4, format="csc") | ||||
|  | ||||
|         with pytest.raises(ValueError, match="not '4'"): | ||||
|             SparseArray.from_spmatrix(mat) | ||||
|  | ||||
|     def test_constructor_from_too_large_array(self): | ||||
|         with pytest.raises(TypeError, match="expected dimension <= 1 data"): | ||||
|             SparseArray(np.arange(10).reshape((2, 5))) | ||||
|  | ||||
|     def test_constructor_from_sparse(self): | ||||
|         zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|         res = SparseArray(zarr) | ||||
|         assert res.fill_value == 0 | ||||
|         tm.assert_almost_equal(res.sp_values, zarr.sp_values) | ||||
|  | ||||
|     def test_constructor_copy(self): | ||||
|         arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|         arr = SparseArray(arr_data) | ||||
|  | ||||
|         cp = SparseArray(arr, copy=True) | ||||
|         cp.sp_values[:3] = 0 | ||||
|         assert not (arr.sp_values[:3] == 0).any() | ||||
|  | ||||
|         not_copy = SparseArray(arr) | ||||
|         not_copy.sp_values[:3] = 0 | ||||
|         assert (arr.sp_values[:3] == 0).all() | ||||
|  | ||||
|     def test_constructor_bool(self): | ||||
|         # GH#10648 | ||||
|         data = np.array([False, False, True, True, False, False]) | ||||
|         arr = SparseArray(data, fill_value=False, dtype=bool) | ||||
|  | ||||
|         assert arr.dtype == SparseDtype(bool) | ||||
|         tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) | ||||
|         # Behavior change: np.asarray densifies. | ||||
|         # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) | ||||
|         tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) | ||||
|  | ||||
|         dense = arr.to_dense() | ||||
|         assert dense.dtype == bool | ||||
|         tm.assert_numpy_array_equal(dense, data) | ||||
|  | ||||
|     def test_constructor_bool_fill_value(self): | ||||
|         arr = SparseArray([True, False, True], dtype=None) | ||||
|         assert arr.dtype == SparseDtype(np.bool_) | ||||
|         assert not arr.fill_value | ||||
|  | ||||
|         arr = SparseArray([True, False, True], dtype=np.bool_) | ||||
|         assert arr.dtype == SparseDtype(np.bool_) | ||||
|         assert not arr.fill_value | ||||
|  | ||||
|         arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True) | ||||
|         assert arr.dtype == SparseDtype(np.bool_, True) | ||||
|         assert arr.fill_value | ||||
|  | ||||
|     def test_constructor_float32(self): | ||||
|         # GH#10648 | ||||
|         data = np.array([1.0, np.nan, 3], dtype=np.float32) | ||||
|         arr = SparseArray(data, dtype=np.float32) | ||||
|  | ||||
|         assert arr.dtype == SparseDtype(np.float32) | ||||
|         tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) | ||||
|         # Behavior change: np.asarray densifies. | ||||
|         # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) | ||||
|         tm.assert_numpy_array_equal( | ||||
|             arr.sp_index.indices, np.array([0, 2], dtype=np.int32) | ||||
|         ) | ||||
|  | ||||
|         dense = arr.to_dense() | ||||
|         assert dense.dtype == np.float32 | ||||
|         tm.assert_numpy_array_equal(dense, data) | ||||
| @ -0,0 +1,224 @@ | ||||
| import re | ||||
| import warnings | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, fill_value", | ||||
|     [ | ||||
|         ("int", 0), | ||||
|         ("float", np.nan), | ||||
|         ("bool", False), | ||||
|         ("object", np.nan), | ||||
|         ("datetime64[ns]", np.datetime64("NaT", "ns")), | ||||
|         ("timedelta64[ns]", np.timedelta64("NaT", "ns")), | ||||
|     ], | ||||
| ) | ||||
| def test_inferred_dtype(dtype, fill_value): | ||||
|     sparse_dtype = SparseDtype(dtype) | ||||
|     result = sparse_dtype.fill_value | ||||
|     if pd.isna(fill_value): | ||||
|         assert pd.isna(result) and type(result) == type(fill_value) | ||||
|     else: | ||||
|         assert result == fill_value | ||||
|  | ||||
|  | ||||
| def test_from_sparse_dtype(): | ||||
|     dtype = SparseDtype("float", 0) | ||||
|     result = SparseDtype(dtype) | ||||
|     assert result.fill_value == 0 | ||||
|  | ||||
|  | ||||
| def test_from_sparse_dtype_fill_value(): | ||||
|     dtype = SparseDtype("int", 1) | ||||
|     result = SparseDtype(dtype, fill_value=2) | ||||
|     expected = SparseDtype("int", 2) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, fill_value", | ||||
|     [ | ||||
|         ("int", None), | ||||
|         ("float", None), | ||||
|         ("bool", None), | ||||
|         ("object", None), | ||||
|         ("datetime64[ns]", None), | ||||
|         ("timedelta64[ns]", None), | ||||
|         ("int", np.nan), | ||||
|         ("float", 0), | ||||
|     ], | ||||
| ) | ||||
| def test_equal(dtype, fill_value): | ||||
|     a = SparseDtype(dtype, fill_value) | ||||
|     b = SparseDtype(dtype, fill_value) | ||||
|     assert a == b | ||||
|     assert b == a | ||||
|  | ||||
|  | ||||
| def test_nans_equal(): | ||||
|     a = SparseDtype(float, float("nan")) | ||||
|     b = SparseDtype(float, np.nan) | ||||
|     assert a == b | ||||
|     assert b == a | ||||
|  | ||||
|  | ||||
| with warnings.catch_warnings(): | ||||
|     msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated" | ||||
|     warnings.filterwarnings("ignore", msg, category=FutureWarning) | ||||
|  | ||||
|     tups = [ | ||||
|         (SparseDtype("float64"), SparseDtype("float32")), | ||||
|         (SparseDtype("float64"), SparseDtype("float64", 0)), | ||||
|         (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), | ||||
|         (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), | ||||
|         (SparseDtype("float64"), np.dtype("float64")), | ||||
|     ] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b", | ||||
|     tups, | ||||
| ) | ||||
| def test_not_equal(a, b): | ||||
|     assert a != b | ||||
|  | ||||
|  | ||||
| def test_construct_from_string_raises(): | ||||
|     with pytest.raises( | ||||
|         TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'" | ||||
|     ): | ||||
|         SparseDtype.construct_from_string("not a dtype") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype, expected", | ||||
|     [ | ||||
|         (SparseDtype(int), True), | ||||
|         (SparseDtype(float), True), | ||||
|         (SparseDtype(bool), True), | ||||
|         (SparseDtype(object), False), | ||||
|         (SparseDtype(str), False), | ||||
|     ], | ||||
| ) | ||||
| def test_is_numeric(dtype, expected): | ||||
|     assert dtype._is_numeric is expected | ||||
|  | ||||
|  | ||||
| def test_str_uses_object(): | ||||
|     result = SparseDtype(str).subtype | ||||
|     assert result == np.dtype("object") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string, expected", | ||||
|     [ | ||||
|         ("Sparse[float64]", SparseDtype(np.dtype("float64"))), | ||||
|         ("Sparse[float32]", SparseDtype(np.dtype("float32"))), | ||||
|         ("Sparse[int]", SparseDtype(np.dtype("int"))), | ||||
|         ("Sparse[str]", SparseDtype(np.dtype("str"))), | ||||
|         ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))), | ||||
|         ("Sparse", SparseDtype(np.dtype("float"), np.nan)), | ||||
|     ], | ||||
| ) | ||||
| def test_construct_from_string(string, expected): | ||||
|     result = SparseDtype.construct_from_string(string) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "a, b, expected", | ||||
|     [ | ||||
|         (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True), | ||||
|         (SparseDtype(int, 0), SparseDtype(int, 0), True), | ||||
|         (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True), | ||||
|         (SparseDtype(float, 0), SparseDtype(float, np.nan), False), | ||||
|         (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False), | ||||
|     ], | ||||
| ) | ||||
| def test_hash_equal(a, b, expected): | ||||
|     result = a == b | ||||
|     assert result is expected | ||||
|  | ||||
|     result = hash(a) == hash(b) | ||||
|     assert result is expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string, expected", | ||||
|     [ | ||||
|         ("Sparse[int]", "int"), | ||||
|         ("Sparse[int, 0]", "int"), | ||||
|         ("Sparse[int64]", "int64"), | ||||
|         ("Sparse[int64, 0]", "int64"), | ||||
|         ("Sparse[datetime64[ns], 0]", "datetime64[ns]"), | ||||
|     ], | ||||
| ) | ||||
| def test_parse_subtype(string, expected): | ||||
|     subtype, _ = SparseDtype._parse_subtype(string) | ||||
|     assert subtype == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"] | ||||
| ) | ||||
| def test_construct_from_string_fill_value_raises(string): | ||||
|     with pytest.raises(TypeError, match="fill_value in the string is not"): | ||||
|         SparseDtype.construct_from_string(string) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "original, dtype, expected", | ||||
|     [ | ||||
|         (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), | ||||
|         (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), | ||||
|         (SparseDtype(int, 1), np.str_, SparseDtype(object, "1")), | ||||
|         (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), | ||||
|     ], | ||||
| ) | ||||
| def test_update_dtype(original, dtype, expected): | ||||
|     result = original.update_dtype(dtype) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "original, dtype, expected_error_msg", | ||||
|     [ | ||||
|         ( | ||||
|             SparseDtype(float, np.nan), | ||||
|             int, | ||||
|             re.escape("Cannot convert non-finite values (NA or inf) to integer"), | ||||
|         ), | ||||
|         ( | ||||
|             SparseDtype(str, "abc"), | ||||
|             int, | ||||
|             r"invalid literal for int\(\) with base 10: ('abc'|np\.str_\('abc'\))", | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_update_dtype_raises(original, dtype, expected_error_msg): | ||||
|     with pytest.raises(ValueError, match=expected_error_msg): | ||||
|         original.update_dtype(dtype) | ||||
|  | ||||
|  | ||||
| def test_repr(): | ||||
|     # GH-34352 | ||||
|     result = str(SparseDtype("int64", fill_value=0)) | ||||
|     expected = "Sparse[int64, 0]" | ||||
|     assert result == expected | ||||
|  | ||||
|     result = str(SparseDtype(object, fill_value="0")) | ||||
|     expected = "Sparse[object, '0']" | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_sparse_dtype_subtype_must_be_numpy_dtype(): | ||||
|     # GH#53160 | ||||
|     msg = "SparseDtype subtype must be a numpy dtype" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         SparseDtype("category", fill_value="c") | ||||
| @ -0,0 +1,302 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import SparseDtype | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr_data(): | ||||
|     return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def arr(arr_data): | ||||
|     return SparseArray(arr_data) | ||||
|  | ||||
|  | ||||
| class TestGetitem: | ||||
|     def test_getitem(self, arr): | ||||
|         dense = arr.to_dense() | ||||
|         for i, value in enumerate(arr): | ||||
|             tm.assert_almost_equal(value, dense[i]) | ||||
|             tm.assert_almost_equal(arr[-i], dense[-i]) | ||||
|  | ||||
|     def test_getitem_arraylike_mask(self, arr): | ||||
|         arr = SparseArray([0, 1, 2]) | ||||
|         result = arr[[True, False, True]] | ||||
|         expected = SparseArray([0, 2]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "slc", | ||||
|         [ | ||||
|             np.s_[:], | ||||
|             np.s_[1:10], | ||||
|             np.s_[1:100], | ||||
|             np.s_[10:1], | ||||
|             np.s_[:-3], | ||||
|             np.s_[-5:-4], | ||||
|             np.s_[:-12], | ||||
|             np.s_[-12:], | ||||
|             np.s_[2:], | ||||
|             np.s_[2::3], | ||||
|             np.s_[::2], | ||||
|             np.s_[::-1], | ||||
|             np.s_[::-2], | ||||
|             np.s_[1:6:2], | ||||
|             np.s_[:-6:-2], | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []] | ||||
|     ) | ||||
|     def test_getslice(self, slc, as_dense): | ||||
|         as_dense = np.array(as_dense) | ||||
|         arr = SparseArray(as_dense) | ||||
|  | ||||
|         result = arr[slc] | ||||
|         expected = SparseArray(as_dense[slc]) | ||||
|  | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     def test_getslice_tuple(self): | ||||
|         dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) | ||||
|  | ||||
|         sparse = SparseArray(dense) | ||||
|         res = sparse[(slice(4, None),)] | ||||
|         exp = SparseArray(dense[4:]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         sparse = SparseArray(dense, fill_value=0) | ||||
|         res = sparse[(slice(4, None),)] | ||||
|         exp = SparseArray(dense[4:], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         msg = "too many indices for array" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse[4:, :] | ||||
|  | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             # check numpy compat | ||||
|             dense[4:, :] | ||||
|  | ||||
|     def test_boolean_slice_empty(self): | ||||
|         arr = SparseArray([0, 1, 2]) | ||||
|         res = arr[[False, False, False]] | ||||
|         assert res.dtype == arr.dtype | ||||
|  | ||||
|     def test_getitem_bool_sparse_array(self, arr): | ||||
|         # GH 23122 | ||||
|         spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) | ||||
|         exp = SparseArray([np.nan, 2, np.nan, 5, 6]) | ||||
|         tm.assert_sp_array_equal(arr[spar_bool], exp) | ||||
|  | ||||
|         spar_bool = ~spar_bool | ||||
|         res = arr[spar_bool] | ||||
|         exp = SparseArray([np.nan, 1, 3, 4, np.nan]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         spar_bool = SparseArray( | ||||
|             [False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan | ||||
|         ) | ||||
|         res = arr[spar_bool] | ||||
|         exp = SparseArray([np.nan, 3, 5]) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_getitem_bool_sparse_array_as_comparison(self): | ||||
|         # GH 45110 | ||||
|         arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan) | ||||
|         res = arr[arr > 2] | ||||
|         exp = SparseArray([3.0, 4.0], fill_value=np.nan) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|     def test_get_item(self, arr): | ||||
|         zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) | ||||
|  | ||||
|         assert np.isnan(arr[1]) | ||||
|         assert arr[2] == 1 | ||||
|         assert arr[7] == 5 | ||||
|  | ||||
|         assert zarr[0] == 0 | ||||
|         assert zarr[2] == 1 | ||||
|         assert zarr[7] == 5 | ||||
|  | ||||
|         errmsg = "must be an integer between -10 and 10" | ||||
|  | ||||
|         with pytest.raises(IndexError, match=errmsg): | ||||
|             arr[11] | ||||
|  | ||||
|         with pytest.raises(IndexError, match=errmsg): | ||||
|             arr[-11] | ||||
|  | ||||
|         assert arr[-1] == arr[len(arr) - 1] | ||||
|  | ||||
|  | ||||
| class TestSetitem: | ||||
|     def test_set_item(self, arr_data): | ||||
|         arr = SparseArray(arr_data).copy() | ||||
|  | ||||
|         def setitem(): | ||||
|             arr[5] = 3 | ||||
|  | ||||
|         def setslice(): | ||||
|             arr[1:5] = 2 | ||||
|  | ||||
|         with pytest.raises(TypeError, match="assignment via setitem"): | ||||
|             setitem() | ||||
|  | ||||
|         with pytest.raises(TypeError, match="assignment via setitem"): | ||||
|             setslice() | ||||
|  | ||||
|  | ||||
| class TestTake: | ||||
|     def test_take_scalar_raises(self, arr): | ||||
|         msg = "'indices' must be an array, not a scalar '2'." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.take(2) | ||||
|  | ||||
|     def test_take(self, arr_data, arr): | ||||
|         exp = SparseArray(np.take(arr_data, [2, 3])) | ||||
|         tm.assert_sp_array_equal(arr.take([2, 3]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(arr_data, [0, 1, 2])) | ||||
|         tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp) | ||||
|  | ||||
|     def test_take_all_empty(self): | ||||
|         sparse = pd.array([0, 0], dtype=SparseDtype("int64")) | ||||
|         result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan) | ||||
|         tm.assert_sp_array_equal(sparse, result) | ||||
|  | ||||
|     def test_take_different_fill_value(self): | ||||
|         # Take with a different fill value shouldn't overwrite the original | ||||
|         sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0)) | ||||
|         result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan) | ||||
|         expected = pd.array([0, np.nan], dtype=sparse.dtype) | ||||
|         tm.assert_sp_array_equal(expected, result) | ||||
|  | ||||
|     def test_take_fill_value(self): | ||||
|         data = np.array([1, np.nan, 0, 3, 0]) | ||||
|         sparse = SparseArray(data, fill_value=0) | ||||
|  | ||||
|         exp = SparseArray(np.take(data, [0]), fill_value=0) | ||||
|         tm.assert_sp_array_equal(sparse.take([0]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) | ||||
|         tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) | ||||
|  | ||||
|     def test_take_negative(self, arr_data, arr): | ||||
|         exp = SparseArray(np.take(arr_data, [-1])) | ||||
|         tm.assert_sp_array_equal(arr.take([-1]), exp) | ||||
|  | ||||
|         exp = SparseArray(np.take(arr_data, [-4, -3, -2])) | ||||
|         tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp) | ||||
|  | ||||
|     def test_bad_take(self, arr): | ||||
|         with pytest.raises(IndexError, match="bounds"): | ||||
|             arr.take([11]) | ||||
|  | ||||
|     def test_take_filling(self): | ||||
|         # similar tests as GH 12631 | ||||
|         sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([np.nan, np.nan, 4]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # TODO: actionable? | ||||
|         # XXX: test change: fill_value=True -> allow_fill=True | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=True) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = SparseArray([np.nan, np.nan, 4]) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "Invalid value in 'indices'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -2]), allow_fill=True) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -5]), allow_fill=True) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), allow_fill=True) | ||||
|  | ||||
|     def test_take_filling_fill_value(self): | ||||
|         # same tests as GH#12631 | ||||
|         sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([0, np.nan, 4], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # fill_value | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=True) | ||||
|         # TODO: actionable? | ||||
|         # XXX: behavior change. | ||||
|         # the old way of filling self.fill_value doesn't follow EA rules. | ||||
|         # It's supposed to be self.dtype.na_value (nan in this case) | ||||
|         expected = SparseArray([0, np.nan, np.nan], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         # allow_fill=False | ||||
|         result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) | ||||
|         expected = SparseArray([0, np.nan, 4], fill_value=0) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "Invalid value in 'indices'." | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -2]), allow_fill=True) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             sparse.take(np.array([1, 0, -5]), allow_fill=True) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), fill_value=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["block", "integer"]) | ||||
|     def test_take_filling_all_nan(self, kind): | ||||
|         sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind) | ||||
|         result = sparse.take(np.array([1, 0, -1])) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         result = sparse.take(np.array([1, 0, -1]), fill_value=True) | ||||
|         expected = SparseArray([np.nan, np.nan, np.nan], kind=kind) | ||||
|         tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|         msg = "out of bounds value in 'indices'" | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, -6])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5])) | ||||
|         with pytest.raises(IndexError, match=msg): | ||||
|             sparse.take(np.array([1, 5]), fill_value=True) | ||||
|  | ||||
|  | ||||
| class TestWhere: | ||||
|     def test_where_retain_fill_value(self): | ||||
|         # GH#45691 don't lose fill_value on _where | ||||
|         arr = SparseArray([np.nan, 1.0], fill_value=0) | ||||
|  | ||||
|         mask = np.array([True, False]) | ||||
|  | ||||
|         res = arr._where(~mask, 1) | ||||
|         exp = SparseArray([1, 1.0], fill_value=0) | ||||
|         tm.assert_sp_array_equal(res, exp) | ||||
|  | ||||
|         ser = pd.Series(arr) | ||||
|         res = ser.where(~mask, 1) | ||||
|         tm.assert_series_equal(res, pd.Series(exp)) | ||||
| @ -0,0 +1,551 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas._libs.sparse as splib | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays.sparse import ( | ||||
|     BlockIndex, | ||||
|     IntIndex, | ||||
|     make_sparse_index, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def test_length(): | ||||
|     return 20 | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         [ | ||||
|             [0, 7, 15], | ||||
|             [3, 5, 5], | ||||
|             [2, 9, 14], | ||||
|             [2, 3, 5], | ||||
|             [2, 9, 15], | ||||
|             [1, 3, 4], | ||||
|         ], | ||||
|         [ | ||||
|             [0, 5], | ||||
|             [4, 4], | ||||
|             [1], | ||||
|             [4], | ||||
|             [1], | ||||
|             [3], | ||||
|         ], | ||||
|         [ | ||||
|             [0], | ||||
|             [10], | ||||
|             [0, 5], | ||||
|             [3, 7], | ||||
|             [0, 5], | ||||
|             [3, 5], | ||||
|         ], | ||||
|         [ | ||||
|             [10], | ||||
|             [5], | ||||
|             [0, 12], | ||||
|             [5, 3], | ||||
|             [12], | ||||
|             [3], | ||||
|         ], | ||||
|         [ | ||||
|             [0, 10], | ||||
|             [4, 6], | ||||
|             [5, 17], | ||||
|             [4, 2], | ||||
|             [], | ||||
|             [], | ||||
|         ], | ||||
|         [ | ||||
|             [0], | ||||
|             [5], | ||||
|             [], | ||||
|             [], | ||||
|             [], | ||||
|             [], | ||||
|         ], | ||||
|     ], | ||||
|     ids=[ | ||||
|         "plain_case", | ||||
|         "delete_blocks", | ||||
|         "split_blocks", | ||||
|         "skip_block", | ||||
|         "no_intersect", | ||||
|         "one_empty", | ||||
|     ], | ||||
| ) | ||||
| def cases(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSparseIndexUnion: | ||||
|     @pytest.mark.parametrize( | ||||
|         "xloc, xlen, yloc, ylen, eloc, elen", | ||||
|         [ | ||||
|             [[0], [5], [5], [4], [0], [9]], | ||||
|             [[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]], | ||||
|             [[1], [5], [3], [5], [1], [7]], | ||||
|             [[2, 10], [4, 4], [4], [8], [2], [12]], | ||||
|             [[0, 5], [3, 5], [0], [7], [0], [10]], | ||||
|             [[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]], | ||||
|             [[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]], | ||||
|             [[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]], | ||||
|         ], | ||||
|     ) | ||||
|     def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen, test_length): | ||||
|         # Case 1 | ||||
|         # x: ---- | ||||
|         # y:     ---- | ||||
|         # r: -------- | ||||
|         # Case 2 | ||||
|         # x: -----     ----- | ||||
|         # y:   -----          -- | ||||
|         # Case 3 | ||||
|         # x: ------ | ||||
|         # y:    ------- | ||||
|         # r: ---------- | ||||
|         # Case 4 | ||||
|         # x: ------  ----- | ||||
|         # y:    ------- | ||||
|         # r: ------------- | ||||
|         # Case 5 | ||||
|         # x: ---  ----- | ||||
|         # y: ------- | ||||
|         # r: ------------- | ||||
|         # Case 6 | ||||
|         # x: ------  ----- | ||||
|         # y:    -------  --- | ||||
|         # r: ------------- | ||||
|         # Case 7 | ||||
|         # x: ---------------------- | ||||
|         # y:   ----  ----   --- | ||||
|         # r: ---------------------- | ||||
|         # Case 8 | ||||
|         # x: ----       --- | ||||
|         # y:       ---       --- | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|         bresult = xindex.make_union(yindex) | ||||
|         assert isinstance(bresult, BlockIndex) | ||||
|         tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32)) | ||||
|  | ||||
|         ixindex = xindex.to_int_index() | ||||
|         iyindex = yindex.to_int_index() | ||||
|         iresult = ixindex.make_union(iyindex) | ||||
|         assert isinstance(iresult, IntIndex) | ||||
|         tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) | ||||
|  | ||||
|     def test_int_index_make_union(self): | ||||
|         a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 2], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 2], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 2], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) | ||||
|         b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) | ||||
|         res = a.make_union(b) | ||||
|         exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) | ||||
|         assert res.equals(exp) | ||||
|  | ||||
|         a = IntIndex(5, np.array([0, 1], dtype=np.int32)) | ||||
|         b = IntIndex(4, np.array([0, 1], dtype=np.int32)) | ||||
|  | ||||
|         msg = "Indices must reference same underlying length" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             a.make_union(b) | ||||
|  | ||||
|  | ||||
| class TestSparseIndexIntersect: | ||||
|     @td.skip_if_windows | ||||
|     def test_intersect(self, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, eloc, elen = cases | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|         expected = BlockIndex(test_length, eloc, elen) | ||||
|         longer_index = BlockIndex(test_length + 1, yloc, ylen) | ||||
|  | ||||
|         result = xindex.intersect(yindex) | ||||
|         assert result.equals(expected) | ||||
|         result = xindex.to_int_index().intersect(yindex.to_int_index()) | ||||
|         assert result.equals(expected.to_int_index()) | ||||
|  | ||||
|         msg = "Indices must reference same underlying length" | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             xindex.intersect(longer_index) | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             xindex.to_int_index().intersect(longer_index.to_int_index()) | ||||
|  | ||||
|     def test_intersect_empty(self): | ||||
|         xindex = IntIndex(4, np.array([], dtype=np.int32)) | ||||
|         yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) | ||||
|         assert xindex.intersect(yindex).equals(xindex) | ||||
|         assert yindex.intersect(xindex).equals(xindex) | ||||
|  | ||||
|         xindex = xindex.to_block_index() | ||||
|         yindex = yindex.to_block_index() | ||||
|         assert xindex.intersect(yindex).equals(xindex) | ||||
|         assert yindex.intersect(xindex).equals(xindex) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", | ||||
|         [ | ||||
|             # Argument 2 to "IntIndex" has incompatible type "ndarray[Any, | ||||
|             # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]" | ||||
|             IntIndex(5, np.array([1, 2], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(0, np.array([], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|             IntIndex(5, np.array([], dtype=np.int32)),  # type: ignore[arg-type] | ||||
|         ], | ||||
|     ) | ||||
|     def test_intersect_identical(self, case): | ||||
|         assert case.intersect(case).equals(case) | ||||
|         case = case.to_block_index() | ||||
|         assert case.intersect(case).equals(case) | ||||
|  | ||||
|  | ||||
| class TestSparseIndexCommon: | ||||
|     def test_int_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index( | ||||
|             4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" | ||||
|         ) | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) | ||||
|  | ||||
|     def test_block_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 3 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_lookup(self, kind): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == -1 | ||||
|         assert idx.lookup(1) == -1 | ||||
|         assert idx.lookup(2) == 0 | ||||
|         assert idx.lookup(3) == 1 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) | ||||
|  | ||||
|         for i in range(-1, 5): | ||||
|             assert idx.lookup(i) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == 0 | ||||
|         assert idx.lookup(1) == 1 | ||||
|         assert idx.lookup(2) == 2 | ||||
|         assert idx.lookup(3) == 3 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) | ||||
|         assert idx.lookup(-1) == -1 | ||||
|         assert idx.lookup(0) == 0 | ||||
|         assert idx.lookup(1) == -1 | ||||
|         assert idx.lookup(2) == 1 | ||||
|         assert idx.lookup(3) == 2 | ||||
|         assert idx.lookup(4) == -1 | ||||
|  | ||||
|     @pytest.mark.parametrize("kind", ["integer", "block"]) | ||||
|     def test_lookup_array(self, kind): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, 0], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) | ||||
|         exp = np.array([-1, 0, -1, 1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, -1, -1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) | ||||
|         exp = np.array([-1, 0, 2], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) | ||||
|         exp = np.array([-1, 2, 1, 3], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) | ||||
|         res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) | ||||
|         exp = np.array([1, -1, 2, 0], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|         res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) | ||||
|         exp = np.array([-1, -1, 1, -1], dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "idx, expected", | ||||
|         [ | ||||
|             [0, -1], | ||||
|             [5, 0], | ||||
|             [7, 2], | ||||
|             [8, -1], | ||||
|             [9, -1], | ||||
|             [10, -1], | ||||
|             [11, -1], | ||||
|             [12, 3], | ||||
|             [17, 8], | ||||
|             [18, -1], | ||||
|         ], | ||||
|     ) | ||||
|     def test_lookup_basics(self, idx, expected): | ||||
|         bindex = BlockIndex(20, [5, 12], [3, 6]) | ||||
|         assert bindex.lookup(idx) == expected | ||||
|  | ||||
|         iindex = bindex.to_int_index() | ||||
|         assert iindex.lookup(idx) == expected | ||||
|  | ||||
|  | ||||
| class TestBlockIndex: | ||||
|     def test_block_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") | ||||
|         assert isinstance(idx, BlockIndex) | ||||
|         assert idx.npoints == 3 | ||||
|         tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) | ||||
|  | ||||
|     @pytest.mark.parametrize("i", [5, 10, 100, 101]) | ||||
|     def test_make_block_boundary(self, i): | ||||
|         idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") | ||||
|  | ||||
|         exp = np.arange(0, i, 2, dtype=np.int32) | ||||
|         tm.assert_numpy_array_equal(idx.blocs, exp) | ||||
|         tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) | ||||
|  | ||||
|     def test_equals(self): | ||||
|         index = BlockIndex(10, [0, 4], [2, 5]) | ||||
|  | ||||
|         assert index.equals(index) | ||||
|         assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) | ||||
|  | ||||
|     def test_check_integrity(self): | ||||
|         locs = [] | ||||
|         lengths = [] | ||||
|  | ||||
|         # 0-length OK | ||||
|         BlockIndex(0, locs, lengths) | ||||
|  | ||||
|         # also OK even though empty | ||||
|         BlockIndex(1, locs, lengths) | ||||
|  | ||||
|         msg = "Block 0 extends beyond end" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             BlockIndex(10, [5], [10]) | ||||
|  | ||||
|         msg = "Block 0 overlaps" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             BlockIndex(10, [2, 5], [5, 3]) | ||||
|  | ||||
|     def test_to_int_index(self): | ||||
|         locs = [0, 10] | ||||
|         lengths = [4, 6] | ||||
|         exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] | ||||
|  | ||||
|         block = BlockIndex(20, locs, lengths) | ||||
|         dense = block.to_int_index() | ||||
|  | ||||
|         tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32)) | ||||
|  | ||||
|     def test_to_block_index(self): | ||||
|         index = BlockIndex(10, [0, 5], [4, 5]) | ||||
|         assert index.to_block_index() is index | ||||
|  | ||||
|  | ||||
| class TestIntIndex: | ||||
|     def test_check_integrity(self): | ||||
|         # Too many indices than specified in self.length | ||||
|         msg = "Too many indices" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=1, indices=[1, 2, 3]) | ||||
|  | ||||
|         # No index can be negative. | ||||
|         msg = "No index can be less than zero" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, -2, 3]) | ||||
|  | ||||
|         # No index can be negative. | ||||
|         msg = "No index can be less than zero" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, -2, 3]) | ||||
|  | ||||
|         # All indices must be less than the length. | ||||
|         msg = "All indices must be less than the length" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 2, 5]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 2, 6]) | ||||
|  | ||||
|         # Indices must be strictly ascending. | ||||
|         msg = "Indices must be strictly increasing" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 3, 2]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             IntIndex(length=5, indices=[1, 3, 3]) | ||||
|  | ||||
|     def test_int_internal(self): | ||||
|         idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 2 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer") | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 0 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) | ||||
|  | ||||
|         idx = make_sparse_index( | ||||
|             4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer" | ||||
|         ) | ||||
|         assert isinstance(idx, IntIndex) | ||||
|         assert idx.npoints == 4 | ||||
|         tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) | ||||
|  | ||||
|     def test_equals(self): | ||||
|         index = IntIndex(10, [0, 1, 2, 3, 4]) | ||||
|         assert index.equals(index) | ||||
|         assert not index.equals(IntIndex(10, [0, 1, 2, 3])) | ||||
|  | ||||
|     def test_to_block_index(self, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, _, _ = cases | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|  | ||||
|         # see if survive the round trip | ||||
|         xbindex = xindex.to_int_index().to_block_index() | ||||
|         ybindex = yindex.to_int_index().to_block_index() | ||||
|         assert isinstance(xbindex, BlockIndex) | ||||
|         assert xbindex.equals(xindex) | ||||
|         assert ybindex.equals(yindex) | ||||
|  | ||||
|     def test_to_int_index(self): | ||||
|         index = IntIndex(10, [2, 3, 4, 5, 6]) | ||||
|         assert index.to_int_index() is index | ||||
|  | ||||
|  | ||||
| class TestSparseOperators: | ||||
|     @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) | ||||
|     def test_op(self, opname, cases, test_length): | ||||
|         xloc, xlen, yloc, ylen, _, _ = cases | ||||
|         sparse_op = getattr(splib, f"sparse_{opname}_float64") | ||||
|         python_op = getattr(operator, opname) | ||||
|  | ||||
|         xindex = BlockIndex(test_length, xloc, xlen) | ||||
|         yindex = BlockIndex(test_length, yloc, ylen) | ||||
|  | ||||
|         xdindex = xindex.to_int_index() | ||||
|         ydindex = yindex.to_int_index() | ||||
|  | ||||
|         x = np.arange(xindex.npoints) * 10.0 + 1 | ||||
|         y = np.arange(yindex.npoints) * 100.0 + 1 | ||||
|  | ||||
|         xfill = 0 | ||||
|         yfill = 2 | ||||
|  | ||||
|         result_block_vals, rb_index, bfill = sparse_op( | ||||
|             x, xindex, xfill, y, yindex, yfill | ||||
|         ) | ||||
|         result_int_vals, ri_index, ifill = sparse_op( | ||||
|             x, xdindex, xfill, y, ydindex, yfill | ||||
|         ) | ||||
|  | ||||
|         assert rb_index.to_int_index().equals(ri_index) | ||||
|         tm.assert_numpy_array_equal(result_block_vals, result_int_vals) | ||||
|         assert bfill == ifill | ||||
|  | ||||
|         # check versus Series... | ||||
|         xseries = Series(x, xdindex.indices) | ||||
|         xseries = xseries.reindex(np.arange(test_length)).fillna(xfill) | ||||
|  | ||||
|         yseries = Series(y, ydindex.indices) | ||||
|         yseries = yseries.reindex(np.arange(test_length)).fillna(yfill) | ||||
|  | ||||
|         series_result = python_op(xseries, yseries) | ||||
|         series_result = series_result.reindex(ri_index.indices) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result_block_vals, series_result.values) | ||||
|         tm.assert_numpy_array_equal(result_int_vals, series_result.values) | ||||
| @ -0,0 +1,306 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NaT, | ||||
|     SparseDtype, | ||||
|     Timestamp, | ||||
|     isna, | ||||
| ) | ||||
| from pandas.core.arrays.sparse import SparseArray | ||||
|  | ||||
|  | ||||
| class TestReductions: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([True, True, True], True, False), | ||||
|             ([1, 2, 1], 1, 0), | ||||
|             ([1.0, 2.0, 1.0], 1.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_all(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = SparseArray(data).all() | ||||
|         assert out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).all() | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = SparseArray(data).all() | ||||
|         assert not out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).all() | ||||
|         assert not out | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([True, True, True], True, False), | ||||
|             ([1, 2, 1], 1, 0), | ||||
|             ([1.0, 2.0, 1.0], 1.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numpy_all(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = np.all(SparseArray(data)) | ||||
|         assert out | ||||
|  | ||||
|         out = np.all(SparseArray(data, fill_value=pos)) | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = np.all(SparseArray(data)) | ||||
|         assert not out | ||||
|  | ||||
|         out = np.all(SparseArray(data, fill_value=pos)) | ||||
|         assert not out | ||||
|  | ||||
|         # raises with a different message on py2. | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.all(SparseArray(data), out=np.array([])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([False, True, False], True, False), | ||||
|             ([0, 2, 0], 2, 0), | ||||
|             ([0.0, 2.0, 0.0], 2.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_any(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = SparseArray(data).any() | ||||
|         assert out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).any() | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = SparseArray(data).any() | ||||
|         assert not out | ||||
|  | ||||
|         out = SparseArray(data, fill_value=pos).any() | ||||
|         assert not out | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,pos,neg", | ||||
|         [ | ||||
|             ([False, True, False], True, False), | ||||
|             ([0, 2, 0], 2, 0), | ||||
|             ([0.0, 2.0, 0.0], 2.0, 0.0), | ||||
|         ], | ||||
|     ) | ||||
|     def test_numpy_any(self, data, pos, neg): | ||||
|         # GH#17570 | ||||
|         out = np.any(SparseArray(data)) | ||||
|         assert out | ||||
|  | ||||
|         out = np.any(SparseArray(data, fill_value=pos)) | ||||
|         assert out | ||||
|  | ||||
|         data[1] = neg | ||||
|         out = np.any(SparseArray(data)) | ||||
|         assert not out | ||||
|  | ||||
|         out = np.any(SparseArray(data, fill_value=pos)) | ||||
|         assert not out | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.any(SparseArray(data), out=out) | ||||
|  | ||||
|     def test_sum(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = SparseArray(data).sum() | ||||
|         assert out == 45.0 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = SparseArray(data, fill_value=2).sum() | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         out = SparseArray(data, fill_value=np.nan).sum() | ||||
|         assert out == 40.0 | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr", | ||||
|         [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("fill_value", [0, 1, np.nan]) | ||||
|     @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) | ||||
|     def test_sum_min_count(self, arr, fill_value, min_count, expected): | ||||
|         # GH#25777 | ||||
|         sparray = SparseArray(arr, fill_value=fill_value) | ||||
|         result = sparray.sum(min_count=min_count) | ||||
|         if np.isnan(expected): | ||||
|             assert np.isnan(result) | ||||
|         else: | ||||
|             assert result == expected | ||||
|  | ||||
|     def test_bool_sum_min_count(self): | ||||
|         spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True) | ||||
|         res = spar_bool.sum(min_count=1) | ||||
|         assert res == 5 | ||||
|         res = spar_bool.sum(min_count=11) | ||||
|         assert isna(res) | ||||
|  | ||||
|     def test_numpy_sum(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = np.sum(SparseArray(data)) | ||||
|         assert out == 45.0 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = np.sum(SparseArray(data, fill_value=2)) | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         out = np.sum(SparseArray(data, fill_value=np.nan)) | ||||
|         assert out == 40.0 | ||||
|  | ||||
|         msg = "the 'dtype' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.sum(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.sum(SparseArray(data), out=out) | ||||
|  | ||||
|     def test_mean(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = SparseArray(data).mean() | ||||
|         assert out == 4.5 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = SparseArray(data).mean() | ||||
|         assert out == 40.0 / 9 | ||||
|  | ||||
|     def test_numpy_mean(self): | ||||
|         data = np.arange(10).astype(float) | ||||
|         out = np.mean(SparseArray(data)) | ||||
|         assert out == 4.5 | ||||
|  | ||||
|         data[5] = np.nan | ||||
|         out = np.mean(SparseArray(data)) | ||||
|         assert out == 40.0 / 9 | ||||
|  | ||||
|         msg = "the 'dtype' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.mean(SparseArray(data), dtype=np.int64) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.mean(SparseArray(data), out=out) | ||||
|  | ||||
|  | ||||
| class TestMinMax: | ||||
|     @pytest.mark.parametrize( | ||||
|         "raw_data,max_expected,min_expected", | ||||
|         [ | ||||
|             (np.arange(5.0), [4], [0]), | ||||
|             (-np.arange(5.0), [0], [-4]), | ||||
|             (np.array([0, 1, 2, np.nan, 4]), [4], [0]), | ||||
|             (np.array([np.nan] * 5), [np.nan], [np.nan]), | ||||
|             (np.array([]), [np.nan], [np.nan]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_nan_fill_value(self, raw_data, max_expected, min_expected): | ||||
|         arr = SparseArray(raw_data) | ||||
|         max_result = arr.max() | ||||
|         min_result = arr.min() | ||||
|         assert max_result in max_expected | ||||
|         assert min_result in min_expected | ||||
|  | ||||
|         max_result = arr.max(skipna=False) | ||||
|         min_result = arr.min(skipna=False) | ||||
|         if np.isnan(raw_data).any(): | ||||
|             assert np.isnan(max_result) | ||||
|             assert np.isnan(min_result) | ||||
|         else: | ||||
|             assert max_result in max_expected | ||||
|             assert min_result in min_expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "fill_value,max_expected,min_expected", | ||||
|         [ | ||||
|             (100, 100, 0), | ||||
|             (-100, 1, -100), | ||||
|         ], | ||||
|     ) | ||||
|     def test_fill_value(self, fill_value, max_expected, min_expected): | ||||
|         arr = SparseArray( | ||||
|             np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value) | ||||
|         ) | ||||
|         max_result = arr.max() | ||||
|         assert max_result == max_expected | ||||
|  | ||||
|         min_result = arr.min() | ||||
|         assert min_result == min_expected | ||||
|  | ||||
|     def test_only_fill_value(self): | ||||
|         fv = 100 | ||||
|         arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv)) | ||||
|         assert len(arr._valid_sp_values) == 0 | ||||
|  | ||||
|         assert arr.max() == fv | ||||
|         assert arr.min() == fv | ||||
|         assert arr.max(skipna=False) == fv | ||||
|         assert arr.min(skipna=False) == fv | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["min", "max"]) | ||||
|     @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype,expected", | ||||
|         [ | ||||
|             (SparseDtype(np.float64, np.nan), np.nan), | ||||
|             (SparseDtype(np.float64, 5.0), np.nan), | ||||
|             (SparseDtype("datetime64[ns]", NaT), NaT), | ||||
|             (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT), | ||||
|         ], | ||||
|     ) | ||||
|     def test_na_value_if_no_valid_values(self, func, data, dtype, expected): | ||||
|         arr = SparseArray(data, dtype=dtype) | ||||
|         result = getattr(arr, func)() | ||||
|         if expected is NaT: | ||||
|             # TODO: pin down whether we wrap datetime64("NaT") | ||||
|             assert result is NaT or np.isnat(result) | ||||
|         else: | ||||
|             assert np.isnan(result) | ||||
|  | ||||
|  | ||||
| class TestArgmaxArgmin: | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr,argmax_expected,argmin_expected", | ||||
|         [ | ||||
|             (SparseArray([1, 2, 0, 1, 2]), 1, 2), | ||||
|             (SparseArray([-1, -2, 0, -1, -2]), 2, 1), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2), | ||||
|             (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10), | ||||
|             (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10), | ||||
|             (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0), | ||||
|             (SparseArray([1] + [0] * 10, fill_value=0), 0, 1), | ||||
|             (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0), | ||||
|             (SparseArray([1] + [0] * 10, fill_value=1), 0, 1), | ||||
|         ], | ||||
|     ) | ||||
|     def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): | ||||
|         argmax_result = arr.argmax() | ||||
|         argmin_result = arr.argmin() | ||||
|         assert argmax_result == argmax_expected | ||||
|         assert argmin_result == argmin_expected | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arr,method", | ||||
|         [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], | ||||
|     ) | ||||
|     def test_empty_array(self, arr, method): | ||||
|         msg = f"attempt to get {method} of an empty sequence" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.argmax() if method == "argmax" else arr.argmin() | ||||
| @ -0,0 +1,79 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import SparseArray | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") | ||||
| @pytest.mark.parametrize("fill_value", [0, np.nan]) | ||||
| @pytest.mark.parametrize("op", [operator.pos, operator.neg]) | ||||
| def test_unary_op(op, fill_value): | ||||
|     arr = np.array([0, 1, np.nan, 2]) | ||||
|     sparray = SparseArray(arr, fill_value=fill_value) | ||||
|     result = op(sparray) | ||||
|     expected = SparseArray(op(arr), fill_value=op(fill_value)) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fill_value", [True, False]) | ||||
| def test_invert(fill_value): | ||||
|     arr = np.array([True, False, False, True]) | ||||
|     sparray = SparseArray(arr, fill_value=fill_value) | ||||
|     result = ~sparray | ||||
|     expected = SparseArray(~arr, fill_value=not fill_value) | ||||
|     tm.assert_sp_array_equal(result, expected) | ||||
|  | ||||
|     result = ~pd.Series(sparray) | ||||
|     expected = pd.Series(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = ~pd.DataFrame({"A": sparray}) | ||||
|     expected = pd.DataFrame({"A": expected}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestUnaryMethods: | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_neg_operator(self): | ||||
|         arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         res = -arr | ||||
|         exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) | ||||
|         res = -arr | ||||
|         exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:invalid value encountered in cast:RuntimeWarning" | ||||
|     ) | ||||
|     def test_abs_operator(self): | ||||
|         arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         res = abs(arr) | ||||
|         exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8) | ||||
|         res = abs(arr) | ||||
|         exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|     def test_invert_operator(self): | ||||
|         arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool_) | ||||
|         exp = SparseArray( | ||||
|             np.invert([False, True, False, True]), fill_value=True, dtype=np.bool_ | ||||
|         ) | ||||
|         res = ~arr | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
|  | ||||
|         arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32) | ||||
|         res = ~arr | ||||
|         exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32) | ||||
|         tm.assert_sp_array_equal(exp, res) | ||||
		Reference in New Issue
	
	Block a user