done
This commit is contained in:
@ -0,0 +1,253 @@
|
||||
import string
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestSeriesAccessor:
|
||||
def test_to_dense(self):
|
||||
ser = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
|
||||
result = ser.sparse.to_dense()
|
||||
expected = pd.Series([0, 1, 0, 10])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"])
|
||||
def test_get_attributes(self, attr):
|
||||
arr = SparseArray([0, 1])
|
||||
ser = pd.Series(arr)
|
||||
|
||||
result = getattr(ser.sparse, attr)
|
||||
expected = getattr(arr, attr)
|
||||
assert result == expected
|
||||
|
||||
def test_from_coo(self):
|
||||
scipy_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
row = [0, 3, 1, 0]
|
||||
col = [0, 3, 1, 2]
|
||||
data = [4, 5, 7, 9]
|
||||
|
||||
sp_array = scipy_sparse.coo_matrix((data, (row, col)))
|
||||
result = pd.Series.sparse.from_coo(sp_array)
|
||||
|
||||
index = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
np.array([0, 0, 1, 3], dtype=np.int32),
|
||||
np.array([0, 2, 1, 3], dtype=np.int32),
|
||||
],
|
||||
)
|
||||
expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort_labels, expected_rows, expected_cols, expected_values_pos",
|
||||
[
|
||||
(
|
||||
False,
|
||||
[("b", 2), ("a", 2), ("b", 1), ("a", 1)],
|
||||
[("z", 1), ("z", 2), ("x", 2), ("z", 0)],
|
||||
{1: (1, 0), 3: (3, 3)},
|
||||
),
|
||||
(
|
||||
True,
|
||||
[("a", 1), ("a", 2), ("b", 1), ("b", 2)],
|
||||
[("x", 2), ("z", 0), ("z", 1), ("z", 2)],
|
||||
{1: (1, 2), 3: (0, 1)},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_coo(
|
||||
self, sort_labels, expected_rows, expected_cols, expected_values_pos
|
||||
):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
values = SparseArray([0, np.nan, 1, 0, None, 3], fill_value=0)
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("b", 2, "z", 1),
|
||||
("a", 2, "z", 2),
|
||||
("a", 2, "z", 1),
|
||||
("a", 2, "x", 2),
|
||||
("b", 1, "z", 1),
|
||||
("a", 1, "z", 0),
|
||||
]
|
||||
)
|
||||
ss = pd.Series(values, index=index)
|
||||
|
||||
expected_A = np.zeros((4, 4))
|
||||
for value, (row, col) in expected_values_pos.items():
|
||||
expected_A[row, col] = value
|
||||
|
||||
A, rows, cols = ss.sparse.to_coo(
|
||||
row_levels=(0, 1), column_levels=(2, 3), sort_labels=sort_labels
|
||||
)
|
||||
assert isinstance(A, sp_sparse.coo_matrix)
|
||||
tm.assert_numpy_array_equal(A.toarray(), expected_A)
|
||||
assert rows == expected_rows
|
||||
assert cols == expected_cols
|
||||
|
||||
def test_non_sparse_raises(self):
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with pytest.raises(AttributeError, match=".sparse"):
|
||||
ser.sparse.density
|
||||
|
||||
|
||||
class TestFrameAccessor:
|
||||
def test_accessor_raises(self):
|
||||
df = pd.DataFrame({"A": [0, 1]})
|
||||
with pytest.raises(AttributeError, match="sparse"):
|
||||
df.sparse
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
@pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int64"])
|
||||
def test_from_spmatrix(self, format, labels, dtype):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
|
||||
|
||||
mat = sp_sparse.eye(10, format=format, dtype=dtype)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
|
||||
expected = pd.DataFrame(
|
||||
np.eye(10, dtype=dtype), index=labels, columns=labels
|
||||
).astype(sp_dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("format", ["csc", "csr", "coo"])
|
||||
def test_from_spmatrix_including_explicit_zero(self, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(10, 2, density=0.5, format=format)
|
||||
mat.data[0] = 0
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat)
|
||||
dtype = SparseDtype("float64", 0.0)
|
||||
expected = pd.DataFrame(mat.todense()).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"columns",
|
||||
[["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
|
||||
)
|
||||
def test_from_spmatrix_columns(self, columns):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
dtype = SparseDtype("float64", 0.0)
|
||||
|
||||
mat = sp_sparse.random(10, 2, density=0.5)
|
||||
result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
|
||||
expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
|
||||
)
|
||||
def test_to_coo(self, colnames):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
df = pd.DataFrame(
|
||||
{colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]"
|
||||
)
|
||||
result = df.sparse.to_coo()
|
||||
expected = sp_sparse.coo_matrix(np.asarray(df))
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [1, np.nan])
|
||||
def test_to_coo_nonzero_fill_val_raises(self, fill_value):
|
||||
pytest.importorskip("scipy")
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray(
|
||||
[fill_value, fill_value, fill_value, 2], fill_value=fill_value
|
||||
),
|
||||
"B": SparseArray(
|
||||
[fill_value, 2, fill_value, fill_value], fill_value=fill_value
|
||||
),
|
||||
}
|
||||
)
|
||||
with pytest.raises(ValueError, match="fill value must be 0"):
|
||||
df.sparse.to_coo()
|
||||
|
||||
def test_to_coo_midx_categorical(self):
|
||||
# GH#50996
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
midx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.CategoricalIndex(list("ab"), name="x"),
|
||||
pd.CategoricalIndex([0, 1], name="y"),
|
||||
]
|
||||
)
|
||||
|
||||
ser = pd.Series(1, index=midx, dtype="Sparse[int]")
|
||||
result = ser.sparse.to_coo(row_levels=["x"], column_levels=["y"])[0]
|
||||
expected = sp_sparse.coo_matrix(
|
||||
(np.array([1, 1]), (np.array([0, 1]), np.array([0, 1]))), shape=(2, 2)
|
||||
)
|
||||
assert (result != expected).nnz == 0
|
||||
|
||||
def test_to_dense(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)),
|
||||
"B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)),
|
||||
"C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)),
|
||||
},
|
||||
index=["b", "a"],
|
||||
)
|
||||
result = df.sparse.to_dense()
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_density(self):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": SparseArray([1, 0, 2, 1], fill_value=0),
|
||||
"B": SparseArray([0, 1, 1, 1], fill_value=0),
|
||||
}
|
||||
)
|
||||
res = df.sparse.density
|
||||
expected = 0.75
|
||||
assert res == expected
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "float64"])
|
||||
@pytest.mark.parametrize("dense_index", [True, False])
|
||||
def test_series_from_coo(self, dtype, dense_index):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
A = sp_sparse.eye(3, format="coo", dtype=dtype)
|
||||
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
|
||||
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
np.array([0, 0], dtype=np.int32),
|
||||
np.array([1, 1], dtype=np.int32),
|
||||
np.array([2, 2], dtype=np.int32),
|
||||
],
|
||||
)
|
||||
expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
|
||||
if dense_index:
|
||||
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_from_coo_incorrect_format_raises(self):
|
||||
# gh-26554
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
m = sp_sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
|
||||
with pytest.raises(
|
||||
TypeError, match="Expected coo_matrix. Got csr_matrix instead."
|
||||
):
|
||||
pd.Series.sparse.from_coo(m)
|
||||
|
||||
def test_with_column_named_sparse(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/30758
|
||||
df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
|
||||
assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor)
|
@ -0,0 +1,514 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture(params=["integer", "block"])
|
||||
def kind(request):
|
||||
"""kind kwarg to pass to SparseArray"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def mix(request):
|
||||
"""
|
||||
Fixture returning True or False, determining whether to operate
|
||||
op(sparse, dense) instead of op(sparse, sparse)
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSparseArrayArithmetics:
|
||||
def _assert(self, a, b):
|
||||
# We have to use tm.assert_sp_array_equal. See GH #45126
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
def _check_numeric_ops(self, a, b, a_dense, b_dense, mix: bool, op):
|
||||
# Check that arithmetic behavior matches non-Sparse Series arithmetic
|
||||
|
||||
if isinstance(a_dense, np.ndarray):
|
||||
expected = op(pd.Series(a_dense), b_dense).values
|
||||
elif isinstance(b_dense, np.ndarray):
|
||||
expected = op(a_dense, pd.Series(b_dense)).values
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
with np.errstate(invalid="ignore", divide="ignore"):
|
||||
if mix:
|
||||
result = op(a, b_dense).to_dense()
|
||||
else:
|
||||
result = op(a, b).to_dense()
|
||||
|
||||
self._assert(result, expected)
|
||||
|
||||
def _check_bool_result(self, res):
|
||||
assert isinstance(res, SparseArray)
|
||||
assert isinstance(res.dtype, SparseDtype)
|
||||
assert res.dtype.subtype == np.bool_
|
||||
assert isinstance(res.fill_value, bool)
|
||||
|
||||
def _check_comparison_ops(self, a, b, a_dense, b_dense):
|
||||
with np.errstate(invalid="ignore"):
|
||||
# Unfortunately, trying to wrap the computation of each expected
|
||||
# value is with np.errstate() is too tedious.
|
||||
#
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a == b)
|
||||
self._assert((a == b).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b)
|
||||
self._assert((a != b).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b)
|
||||
self._assert((a >= b).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b)
|
||||
self._assert((a <= b).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b)
|
||||
self._assert((a > b).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b)
|
||||
self._assert((a < b).to_dense(), a_dense < b_dense)
|
||||
|
||||
# sparse & dense
|
||||
self._check_bool_result(a == b_dense)
|
||||
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
|
||||
|
||||
self._check_bool_result(a != b_dense)
|
||||
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
|
||||
|
||||
self._check_bool_result(a >= b_dense)
|
||||
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
|
||||
|
||||
self._check_bool_result(a <= b_dense)
|
||||
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
|
||||
|
||||
self._check_bool_result(a > b_dense)
|
||||
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
|
||||
|
||||
self._check_bool_result(a < b_dense)
|
||||
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
|
||||
|
||||
def _check_logical_ops(self, a, b, a_dense, b_dense):
|
||||
# sparse & sparse
|
||||
self._check_bool_result(a & b)
|
||||
self._assert((a & b).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b)
|
||||
self._assert((a | b).to_dense(), a_dense | b_dense)
|
||||
# sparse & dense
|
||||
self._check_bool_result(a & b_dense)
|
||||
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
|
||||
|
||||
self._check_bool_result(a | b_dense)
|
||||
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [0, 1, 3])
|
||||
@pytest.mark.parametrize("fill_value", [None, 0, 2])
|
||||
def test_float_scalar(
|
||||
self, kind, mix, all_arithmetic_functions, fill_value, scalar, request
|
||||
):
|
||||
op = all_arithmetic_functions
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
a = SparseArray(values, kind=kind, fill_value=fill_value)
|
||||
self._check_numeric_ops(a, scalar, values, scalar, mix, op)
|
||||
|
||||
def test_float_scalar_comparison(self, kind):
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, 1, values, 1)
|
||||
self._check_comparison_ops(a, 0, values, 0)
|
||||
self._check_comparison_ops(a, 3, values, 3)
|
||||
|
||||
def test_float_same_index_without_nans(self, kind, mix, all_arithmetic_functions):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_with_nans(
|
||||
self, kind, mix, all_arithmetic_functions, request
|
||||
):
|
||||
# when sp_index are the same
|
||||
op = all_arithmetic_functions
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_same_index_comparison(self, kind):
|
||||
# when sp_index are the same
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
values = np.array([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0])
|
||||
rvalues = np.array([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0])
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_float_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_different_kind(self, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind="integer")
|
||||
b = SparseArray(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=0)
|
||||
b = SparseArray(rvalues, kind="block")
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=0)
|
||||
b = SparseArray(rvalues, kind="block", fill_value=0)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind="integer", fill_value=1)
|
||||
b = SparseArray(rvalues, kind="block", fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_float_array_comparison(self, kind):
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_int_array(self, kind, mix, all_arithmetic_functions):
|
||||
op = all_arithmetic_functions
|
||||
|
||||
# have to specify dtype explicitly until fixing GH 667
|
||||
dtype = np.int64
|
||||
|
||||
values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype)
|
||||
b = SparseArray(rvalues, fill_value=0, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, fill_value=1, dtype=dtype, kind=kind)
|
||||
assert a.dtype == SparseDtype(dtype, fill_value=1)
|
||||
b = SparseArray(rvalues, fill_value=2, dtype=dtype, kind=kind)
|
||||
assert b.dtype == SparseDtype(dtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_int_array_comparison(self, kind):
|
||||
dtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = np.array([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=0)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, dtype=dtype, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, dtype=dtype, kind=kind, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_same_index(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = np.array([True, False, True, True], dtype=np.bool_)
|
||||
rvalues = np.array([True, False, True, True], dtype=np.bool_)
|
||||
|
||||
a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False, np.nan])
|
||||
def test_bool_array_logical(self, kind, fill_value):
|
||||
# GH 14000
|
||||
# when sp_index are the same
|
||||
values = np.array([True, False, True, False, True, True], dtype=np.bool_)
|
||||
rvalues = np.array([True, False, False, True, False, True], dtype=np.bool_)
|
||||
|
||||
a = SparseArray(values, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
b = SparseArray(rvalues, kind=kind, dtype=np.bool_, fill_value=fill_value)
|
||||
self._check_logical_ops(a, b, values, rvalues)
|
||||
|
||||
def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions, request):
|
||||
op = all_arithmetic_functions
|
||||
rdtype = "int64"
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_numeric_ops(a, b, values, rvalues, mix, op)
|
||||
|
||||
def test_mixed_array_comparison(self, kind):
|
||||
rdtype = "int64"
|
||||
# int32 NI ATM
|
||||
|
||||
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
|
||||
rvalues = np.array([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype)
|
||||
|
||||
a = SparseArray(values, kind=kind)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=0)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=0)
|
||||
assert b.dtype == SparseDtype(rdtype)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
a = SparseArray(values, kind=kind, fill_value=1)
|
||||
b = SparseArray(rvalues, kind=kind, fill_value=2)
|
||||
assert b.dtype == SparseDtype(rdtype, fill_value=2)
|
||||
self._check_comparison_ops(a, b, values, rvalues)
|
||||
|
||||
def test_xor(self):
|
||||
s = SparseArray([True, True, False, False])
|
||||
t = SparseArray([True, False, True, False])
|
||||
result = s ^ t
|
||||
sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32"))
|
||||
expected = SparseArray([False, True, True], sparse_index=sp_index)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.eq, operator.add])
|
||||
def test_with_list(op):
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
result = op(arr, [0, 1])
|
||||
expected = op(arr, SparseArray([0, 1]))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_with_dataframe():
|
||||
# GH#27910
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
df = pd.DataFrame([[1, 2], [3, 4]])
|
||||
result = arr.__add__(df)
|
||||
assert result is NotImplemented
|
||||
|
||||
|
||||
def test_with_zerodim_ndarray():
|
||||
# GH#27910
|
||||
arr = SparseArray([0, 1], fill_value=0)
|
||||
|
||||
result = arr * np.array(2)
|
||||
expected = arr * 2
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.exp])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])]
|
||||
)
|
||||
def test_ufuncs(ufunc, arr):
|
||||
result = ufunc(arr)
|
||||
fill_value = ufunc(arr.fill_value)
|
||||
expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
(SparseArray([0, 0, 0]), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
(SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.greater])
|
||||
def test_binary_ufuncs(ufunc, a, b):
|
||||
# can't say anything about fill value here.
|
||||
result = ufunc(a, b)
|
||||
expected = ufunc(np.asarray(a), np.asarray(b))
|
||||
assert isinstance(result, SparseArray)
|
||||
tm.assert_numpy_array_equal(np.asarray(result), expected)
|
||||
|
||||
|
||||
def test_ndarray_inplace():
|
||||
sparray = SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
ndarray += sparray
|
||||
expected = np.array([0, 3, 2, 3])
|
||||
tm.assert_numpy_array_equal(ndarray, expected)
|
||||
|
||||
|
||||
def test_sparray_inplace():
|
||||
sparray = SparseArray([0, 2, 0, 0])
|
||||
ndarray = np.array([0, 1, 2, 3])
|
||||
sparray += ndarray
|
||||
expected = SparseArray([0, 3, 2, 3], fill_value=0)
|
||||
tm.assert_sp_array_equal(sparray, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [list, np.array, SparseArray])
|
||||
def test_mismatched_length_cmp_op(cons):
|
||||
left = SparseArray([True, True])
|
||||
right = cons([True, True, True])
|
||||
with pytest.raises(ValueError, match="operands have mismatched length"):
|
||||
left & right
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"])
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 3])
|
||||
def test_binary_operators(op, fill_value):
|
||||
op = getattr(operator, op)
|
||||
data1 = np.random.default_rng(2).standard_normal(20)
|
||||
data2 = np.random.default_rng(2).standard_normal(20)
|
||||
|
||||
data1[::2] = fill_value
|
||||
data2[::3] = fill_value
|
||||
|
||||
first = SparseArray(data1, fill_value=fill_value)
|
||||
second = SparseArray(data2, fill_value=fill_value)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
res = op(first, second)
|
||||
exp = SparseArray(
|
||||
op(first.to_dense(), second.to_dense()), fill_value=first.fill_value
|
||||
)
|
||||
assert isinstance(res, SparseArray)
|
||||
tm.assert_almost_equal(res.to_dense(), exp.to_dense())
|
||||
|
||||
res2 = op(first, second.to_dense())
|
||||
assert isinstance(res2, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res2)
|
||||
|
||||
res3 = op(first.to_dense(), second)
|
||||
assert isinstance(res3, SparseArray)
|
||||
tm.assert_sp_array_equal(res, res3)
|
||||
|
||||
res4 = op(first, 4)
|
||||
assert isinstance(res4, SparseArray)
|
||||
|
||||
# Ignore this if the actual op raises (e.g. pow).
|
||||
try:
|
||||
exp = op(first.to_dense(), 4)
|
||||
exp_fv = op(first.fill_value, 4)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
tm.assert_almost_equal(res4.fill_value, exp_fv)
|
||||
tm.assert_almost_equal(res4.to_dense(), exp)
|
@ -0,0 +1,511 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr_data():
|
||||
"""Fixture returning numpy array with valid and missing entries"""
|
||||
return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr(arr_data):
|
||||
"""Fixture returning SparseArray from 'arr_data'"""
|
||||
return SparseArray(arr_data)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zarr():
|
||||
"""Fixture returning SparseArray with integer entries and 'fill_value=0'"""
|
||||
return SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
|
||||
|
||||
class TestSparseArray:
|
||||
@pytest.mark.parametrize("fill_value", [0, None, np.nan])
|
||||
def test_shift_fill_value(self, fill_value):
|
||||
# GH #24128
|
||||
sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0)
|
||||
res = sparse.shift(1, fill_value=fill_value)
|
||||
if isna(fill_value):
|
||||
fill_value = res.dtype.na_value
|
||||
exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_set_fill_value(self):
|
||||
arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
|
||||
arr.fill_value = 2
|
||||
assert arr.fill_value == 2
|
||||
|
||||
msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr.fill_value = 3.1
|
||||
assert arr.fill_value == 3.1
|
||||
|
||||
arr.fill_value = np.nan
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_)
|
||||
arr.fill_value = True
|
||||
assert arr.fill_value is True
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr.fill_value = 0
|
||||
|
||||
arr.fill_value = np.nan
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
@pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)])
|
||||
def test_set_fill_invalid_non_scalar(self, val):
|
||||
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool_)
|
||||
msg = "fill_value must be a scalar"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.fill_value = val
|
||||
|
||||
def test_copy(self, arr):
|
||||
arr2 = arr.copy()
|
||||
assert arr2.sp_values is not arr.sp_values
|
||||
assert arr2.sp_index is arr.sp_index
|
||||
|
||||
def test_values_asarray(self, arr_data, arr):
|
||||
tm.assert_almost_equal(arr.to_dense(), arr_data)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,shape,dtype",
|
||||
[
|
||||
([0, 0, 0, 0, 0], (5,), None),
|
||||
([], (0,), None),
|
||||
([0], (1,), None),
|
||||
(["A", "A", np.nan, "B"], (4,), object),
|
||||
],
|
||||
)
|
||||
def test_shape(self, data, shape, dtype):
|
||||
# GH 21126
|
||||
out = SparseArray(data, dtype=dtype)
|
||||
assert out.shape == shape
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals",
|
||||
[
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
[1, np.nan, np.nan, 3, np.nan],
|
||||
[1, np.nan, 0, 3, 0],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [None, 0])
|
||||
def test_dense_repr(self, vals, fill_value):
|
||||
vals = np.array(vals)
|
||||
arr = SparseArray(vals, fill_value=fill_value)
|
||||
|
||||
res = arr.to_dense()
|
||||
tm.assert_numpy_array_equal(res, vals)
|
||||
|
||||
@pytest.mark.parametrize("fix", ["arr", "zarr"])
|
||||
def test_pickle(self, fix, request):
|
||||
obj = request.getfixturevalue(fix)
|
||||
unpickled = tm.round_trip_pickle(obj)
|
||||
tm.assert_sp_array_equal(unpickled, obj)
|
||||
|
||||
def test_generator_warnings(self):
|
||||
sp_arr = SparseArray([1, 2, 3])
|
||||
with tm.assert_produces_warning(None):
|
||||
for _ in sp_arr:
|
||||
pass
|
||||
|
||||
def test_where_retain_fill_value(self):
|
||||
# GH#45691 don't lose fill_value on _where
|
||||
arr = SparseArray([np.nan, 1.0], fill_value=0)
|
||||
|
||||
mask = np.array([True, False])
|
||||
|
||||
res = arr._where(~mask, 1)
|
||||
exp = SparseArray([1, 1.0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
ser = pd.Series(arr)
|
||||
res = ser.where(~mask, 1)
|
||||
tm.assert_series_equal(res, pd.Series(exp))
|
||||
|
||||
def test_fillna(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# float dtype's fill_value is np.nan, replaced by -1
|
||||
s = SparseArray([0.0, 0.0, 0.0, 0.0])
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# int dtype shouldn't have missing. No changes.
|
||||
s = SparseArray([0, 0, 0, 0])
|
||||
assert s.dtype == SparseDtype(np.int64)
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
tm.assert_sp_array_equal(res, s)
|
||||
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
assert s.dtype == SparseDtype(np.int64)
|
||||
assert s.fill_value == 0
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
# fill_value can be nan if there is no missing hole.
|
||||
# only fill_value will be changed
|
||||
s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
|
||||
assert s.dtype == SparseDtype(np.int64, fill_value=np.nan)
|
||||
assert np.isnan(s.fill_value)
|
||||
res = s.fillna(-1)
|
||||
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_fillna_overlap(self):
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
|
||||
# filling with existing value doesn't replace existing value with
|
||||
# fill_value, i.e. existing 3 remains in sp_values
|
||||
res = s.fillna(3)
|
||||
exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
|
||||
tm.assert_numpy_array_equal(res.to_dense(), exp)
|
||||
|
||||
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
|
||||
res = s.fillna(3)
|
||||
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_nonzero(self):
|
||||
# Tests regression #21172.
|
||||
sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
|
||||
expected = np.array([2, 5, 9], dtype=np.int32)
|
||||
(result,) = sa.nonzero()
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
|
||||
(result,) = sa.nonzero()
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
class TestSparseArrayAnalytics:
|
||||
@pytest.mark.parametrize(
|
||||
"data,expected",
|
||||
[
|
||||
(
|
||||
np.array([1, 2, 3, 4, 5], dtype=float), # non-null data
|
||||
SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])),
|
||||
),
|
||||
(
|
||||
np.array([1, 2, np.nan, 4, 5], dtype=float), # null data
|
||||
SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("numpy", [True, False])
|
||||
def test_cumsum(self, data, expected, numpy):
|
||||
cumsum = np.cumsum if numpy else lambda s: s.cumsum()
|
||||
|
||||
out = cumsum(SparseArray(data))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = cumsum(SparseArray(data, fill_value=np.nan))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
out = cumsum(SparseArray(data, fill_value=2))
|
||||
tm.assert_sp_array_equal(out, expected)
|
||||
|
||||
if numpy: # numpy compatibility checks.
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.cumsum(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.cumsum(SparseArray(data), out=out)
|
||||
else:
|
||||
axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
|
||||
msg = re.escape(f"axis(={axis}) out of bounds")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
SparseArray(data).cumsum(axis=axis)
|
||||
|
||||
def test_ufunc(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([1, np.nan, 2, np.nan, 2])
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), result)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
|
||||
exp = SparseArray([1, 1, 2, 2], fill_value=1)
|
||||
tm.assert_sp_array_equal(abs(sparse), exp)
|
||||
tm.assert_sp_array_equal(np.abs(sparse), exp)
|
||||
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
|
||||
tm.assert_sp_array_equal(np.sin(sparse), result)
|
||||
|
||||
def test_ufunc_args(self):
|
||||
# GH 13853 make sure ufunc is applied to fill_value, including its arg
|
||||
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
|
||||
result = SparseArray([2, np.nan, 3, np.nan, -1])
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
|
||||
result = SparseArray([2, 0, 3, -1], fill_value=2)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
|
||||
result = SparseArray([2, 0, 1, -1], fill_value=1)
|
||||
tm.assert_sp_array_equal(np.add(sparse, 1), result)
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [0.0, np.nan])
|
||||
def test_modf(self, fill_value):
|
||||
# https://github.com/pandas-dev/pandas/issues/26946
|
||||
sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value)
|
||||
r1, r2 = np.modf(sparse)
|
||||
e1, e2 = np.modf(np.asarray(sparse))
|
||||
tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value))
|
||||
tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value))
|
||||
|
||||
def test_nbytes_integer(self):
|
||||
arr = SparseArray([1, 0, 0, 0, 2], kind="integer")
|
||||
result = arr.nbytes
|
||||
# (2 * 8) + 2 * 4
|
||||
assert result == 24
|
||||
|
||||
def test_nbytes_block(self):
|
||||
arr = SparseArray([1, 2, 0, 0, 0], kind="block")
|
||||
result = arr.nbytes
|
||||
# (2 * 8) + 4 + 4
|
||||
# sp_values, blocs, blengths
|
||||
assert result == 24
|
||||
|
||||
def test_asarray_datetime64(self):
|
||||
s = SparseArray(pd.to_datetime(["2012", None, None, "2013"]))
|
||||
np.asarray(s)
|
||||
|
||||
def test_density(self):
|
||||
arr = SparseArray([0, 1])
|
||||
assert arr.density == 0.5
|
||||
|
||||
def test_npoints(self):
|
||||
arr = SparseArray([0, 1])
|
||||
assert arr.npoints == 1
|
||||
|
||||
|
||||
def test_setting_fill_value_fillna_still_works():
|
||||
# This is why letting users update fill_value / dtype is bad
|
||||
# astype has the same problem.
|
||||
arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0)
|
||||
arr.fill_value = np.nan
|
||||
result = arr.isna()
|
||||
# Can't do direct comparison, since the sp_index will be different
|
||||
# So let's convert to ndarray and check there.
|
||||
result = np.asarray(result)
|
||||
|
||||
expected = np.array([False, True, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_setting_fill_value_updates():
|
||||
arr = SparseArray([0.0, np.nan], fill_value=0)
|
||||
arr.fill_value = np.nan
|
||||
# use private constructor to get the index right
|
||||
# otherwise both nans would be un-stored.
|
||||
expected = SparseArray._simple_new(
|
||||
sparse_array=np.array([np.nan]),
|
||||
sparse_index=IntIndex(2, [1]),
|
||||
dtype=SparseDtype(float, np.nan),
|
||||
)
|
||||
tm.assert_sp_array_equal(arr, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr,fill_value,loc",
|
||||
[
|
||||
([None, 1, 2], None, 0),
|
||||
([0, None, 2], None, 1),
|
||||
([0, 1, None], None, 2),
|
||||
([0, 1, 1, None, None], None, 3),
|
||||
([1, 1, 1, 2], None, -1),
|
||||
([], None, -1),
|
||||
([None, 1, 0, 0, None, 2], None, 0),
|
||||
([None, 1, 0, 0, None, 2], 1, 1),
|
||||
([None, 1, 0, 0, None, 2], 2, 5),
|
||||
([None, 1, 0, 0, None, 2], 3, -1),
|
||||
([None, 0, 0, 1, 2, 1], 0, 1),
|
||||
([None, 0, 0, 1, 2, 1], 1, 3),
|
||||
],
|
||||
)
|
||||
def test_first_fill_value_loc(arr, fill_value, loc):
|
||||
result = SparseArray(arr, fill_value=fill_value)._first_fill_value_loc()
|
||||
assert result == loc
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[
|
||||
[1, 2, np.nan, np.nan],
|
||||
[1, np.nan, 2, np.nan],
|
||||
[1, 2, np.nan],
|
||||
[np.nan, 1, 0, 0, np.nan, 2],
|
||||
[np.nan, 0, 0, 1, 2, 1],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 0, 1])
|
||||
def test_unique_na_fill(arr, fill_value):
|
||||
a = SparseArray(arr, fill_value=fill_value).unique()
|
||||
b = pd.Series(arr).unique()
|
||||
assert isinstance(a, SparseArray)
|
||||
a = np.asarray(a)
|
||||
tm.assert_numpy_array_equal(a, b)
|
||||
|
||||
|
||||
def test_unique_all_sparse():
|
||||
# https://github.com/pandas-dev/pandas/issues/23168
|
||||
arr = SparseArray([0, 0])
|
||||
result = arr.unique()
|
||||
expected = SparseArray([0])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_map():
|
||||
arr = SparseArray([0, 1, 2])
|
||||
expected = SparseArray([10, 11, 12], fill_value=10)
|
||||
|
||||
# dict
|
||||
result = arr.map({0: 10, 1: 11, 2: 12})
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# series
|
||||
result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# function
|
||||
result = arr.map(pd.Series({0: 10, 1: 11, 2: 12}))
|
||||
expected = SparseArray([10, 11, 12], fill_value=10)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_missing():
|
||||
arr = SparseArray([0, 1, 2])
|
||||
expected = SparseArray([10, 11, None], fill_value=10)
|
||||
|
||||
result = arr.map({0: 10, 1: 11})
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [np.nan, 1])
|
||||
def test_dropna(fill_value):
|
||||
# GH-28287
|
||||
arr = SparseArray([np.nan, 1], fill_value=fill_value)
|
||||
exp = SparseArray([1.0], fill_value=fill_value)
|
||||
tm.assert_sp_array_equal(arr.dropna(), exp)
|
||||
|
||||
df = pd.DataFrame({"a": [0, 1], "b": arr})
|
||||
expected_df = pd.DataFrame({"a": [1], "b": exp}, index=pd.Index([1]))
|
||||
tm.assert_equal(df.dropna(), expected_df)
|
||||
|
||||
|
||||
def test_drop_duplicates_fill_value():
|
||||
# GH 11726
|
||||
df = pd.DataFrame(np.zeros((5, 5))).apply(lambda x: SparseArray(x, fill_value=0))
|
||||
result = df.drop_duplicates()
|
||||
expected = pd.DataFrame({i: SparseArray([0.0], fill_value=0) for i in range(5)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_zero_sparse_column():
|
||||
# GH 27781
|
||||
df1 = pd.DataFrame({"A": SparseArray([0, 0, 0]), "B": [1, 2, 3]})
|
||||
df2 = pd.DataFrame({"A": SparseArray([0, 1, 0]), "B": [1, 2, 3]})
|
||||
result = df1.loc[df1["B"] != 2]
|
||||
expected = df2.loc[df2["B"] != 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.DataFrame({"A": SparseArray([0, 0]), "B": [1, 3]}, index=[0, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_interface(arr_data, arr):
|
||||
# https://github.com/pandas-dev/pandas/pull/60046
|
||||
result = np.asarray(arr)
|
||||
tm.assert_numpy_array_equal(result, arr_data)
|
||||
|
||||
# it always gives a copy by default
|
||||
result_copy1 = np.asarray(arr)
|
||||
result_copy2 = np.asarray(arr)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
# or with explicit copy=True
|
||||
result_copy1 = np.array(arr, copy=True)
|
||||
result_copy2 = np.array(arr, copy=True)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
if not np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
return
|
||||
|
||||
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
np.array(arr, copy=False)
|
||||
|
||||
# except when there are actually no sparse filled values
|
||||
arr2 = SparseArray(np.array([1, 2, 3]))
|
||||
result_nocopy1 = np.array(arr2, copy=False)
|
||||
result_nocopy2 = np.array(arr2, copy=False)
|
||||
assert np.may_share_memory(result_nocopy1, result_nocopy2)
|
@ -0,0 +1,133 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
# float -> float
|
||||
arr = SparseArray([None, None, 0, 2])
|
||||
result = arr.astype("Sparse[float32]")
|
||||
expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
dtype = SparseDtype("float64", fill_value=0)
|
||||
result = arr.astype(dtype)
|
||||
expected = SparseArray._simple_new(
|
||||
np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
dtype = SparseDtype("int64", 0)
|
||||
result = arr.astype(dtype)
|
||||
expected = SparseArray._simple_new(
|
||||
np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
|
||||
with pytest.raises(ValueError, match="NA"):
|
||||
arr.astype("Sparse[i8]")
|
||||
|
||||
def test_astype_bool(self):
|
||||
a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
|
||||
result = a.astype(bool)
|
||||
expected = np.array([1, 0, 0, 1], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# update fill value
|
||||
result = a.astype(SparseDtype(bool, False))
|
||||
expected = SparseArray(
|
||||
[True, False, False, True], dtype=SparseDtype(bool, False)
|
||||
)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_all(self, any_real_numpy_dtype):
|
||||
vals = np.array([1, 2, 3])
|
||||
arr = SparseArray(vals, fill_value=1)
|
||||
typ = np.dtype(any_real_numpy_dtype)
|
||||
res = arr.astype(typ)
|
||||
tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, dtype, expected",
|
||||
[
|
||||
(
|
||||
SparseArray([0, 1]),
|
||||
"float",
|
||||
SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)),
|
||||
),
|
||||
(SparseArray([0, 1]), bool, SparseArray([False, True])),
|
||||
(
|
||||
SparseArray([0, 1], fill_value=1),
|
||||
bool,
|
||||
SparseArray([False, True], dtype=SparseDtype(bool, True)),
|
||||
),
|
||||
pytest.param(
|
||||
SparseArray([0, 1]),
|
||||
"datetime64[ns]",
|
||||
SparseArray(
|
||||
np.array([0, 1], dtype="datetime64[ns]"),
|
||||
dtype=SparseDtype("datetime64[ns]", Timestamp("1970")),
|
||||
),
|
||||
),
|
||||
(
|
||||
SparseArray([0, 1, 10]),
|
||||
np.str_,
|
||||
SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")),
|
||||
),
|
||||
(SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
|
||||
(
|
||||
SparseArray([0, 1, 0]),
|
||||
object,
|
||||
SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_astype_more(self, arr, dtype, expected):
|
||||
result = arr.astype(arr.dtype.update_dtype(dtype))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_nan_raises(self):
|
||||
arr = SparseArray([1.0, np.nan])
|
||||
with pytest.raises(ValueError, match="Cannot convert non-finite"):
|
||||
arr.astype(int)
|
||||
|
||||
def test_astype_copy_false(self):
|
||||
# GH#34456 bug caused by using .view instead of .astype in astype_nansafe
|
||||
arr = SparseArray([1, 2, 3])
|
||||
|
||||
dtype = SparseDtype(float, 0)
|
||||
|
||||
result = arr.astype(dtype, copy=False)
|
||||
expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_astype_dt64_to_int64(self):
|
||||
# GH#49631 match non-sparse behavior
|
||||
values = np.array(["NaT", "2016-01-02", "2016-01-03"], dtype="M8[ns]")
|
||||
|
||||
arr = SparseArray(values)
|
||||
result = arr.astype("int64")
|
||||
expected = values.astype("int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# we should also be able to cast to equivalent Sparse[int64]
|
||||
dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min)
|
||||
result2 = arr.astype(dtype_int64)
|
||||
tm.assert_numpy_array_equal(result2.to_numpy(), expected)
|
||||
|
||||
# GH#50087 we should match the non-sparse behavior regardless of
|
||||
# if we have a fill_value other than NaT
|
||||
dtype = SparseDtype("datetime64[ns]", values[1])
|
||||
arr3 = SparseArray(values, dtype=dtype)
|
||||
result3 = arr3.astype("int64")
|
||||
tm.assert_numpy_array_equal(result3, expected)
|
@ -0,0 +1,62 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestSparseArrayConcat:
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_basic(self, kind):
|
||||
a = SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = SparseArray([1, 0, 2, 2], kind=kind)
|
||||
|
||||
result = SparseArray._concat_same_type([a, b])
|
||||
# Can't make any assertions about the sparse index itself
|
||||
# since we aren't don't merge sparse blocs across arrays
|
||||
# in to_concat
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_uses_first_kind(self, kind):
|
||||
other = "integer" if kind == "block" else "block"
|
||||
a = SparseArray([1, 0, 0, 2], kind=kind)
|
||||
b = SparseArray([1, 0, 2, 2], kind=other)
|
||||
|
||||
result = SparseArray._concat_same_type([a, b])
|
||||
expected = np.array([1, 2, 1, 2, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result.sp_values, expected)
|
||||
assert result.kind == kind
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected_dtype",
|
||||
[
|
||||
# compatible dtype -> preserve sparse
|
||||
(pd.Series([3, 4, 5], dtype="int64"), pd.SparseDtype("int64", 0)),
|
||||
# (pd.Series([3, 4, 5], dtype="Int64"), pd.SparseDtype("int64", 0)),
|
||||
# incompatible dtype -> Sparse[common dtype]
|
||||
(pd.Series([1.5, 2.5, 3.5], dtype="float64"), pd.SparseDtype("float64", 0)),
|
||||
# incompatible dtype -> Sparse[object] dtype
|
||||
(pd.Series(["a", "b", "c"], dtype=object), pd.SparseDtype(object, 0)),
|
||||
# categorical with compatible categories -> dtype of the categories
|
||||
(pd.Series([3, 4, 5], dtype="category"), np.dtype("int64")),
|
||||
(pd.Series([1.5, 2.5, 3.5], dtype="category"), np.dtype("float64")),
|
||||
# categorical with incompatible categories -> object dtype
|
||||
(pd.Series(["a", "b", "c"], dtype="category"), np.dtype(object)),
|
||||
],
|
||||
)
|
||||
def test_concat_with_non_sparse(other, expected_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/34336
|
||||
s_sparse = pd.Series([1, 0, 2], dtype=pd.SparseDtype("int64", 0))
|
||||
|
||||
result = pd.concat([s_sparse, other], ignore_index=True)
|
||||
expected = pd.Series(list(s_sparse) + list(other)).astype(expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = pd.concat([other, s_sparse], ignore_index=True)
|
||||
expected = pd.Series(list(other) + list(s_sparse)).astype(expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,285 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.sparse import IntIndex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
SparseDtype,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestConstructors:
|
||||
def test_constructor_dtype(self):
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan])
|
||||
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
||||
assert arr.dtype.subtype == np.float64
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
|
||||
assert arr.dtype == SparseDtype(np.float64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
|
||||
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.int64, 0)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
def test_constructor_dtype_str(self):
|
||||
result = SparseArray([1, 2, 3], dtype="int")
|
||||
expected = SparseArray([1, 2, 3], dtype=int)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_constructor_sparse_dtype(self):
|
||||
result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
|
||||
expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
assert result.sp_values.dtype == np.dtype("int64")
|
||||
|
||||
def test_constructor_sparse_dtype_str(self):
|
||||
result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
|
||||
expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
assert result.sp_values.dtype == np.dtype("int32")
|
||||
|
||||
def test_constructor_object_dtype(self):
|
||||
# GH#11856
|
||||
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
|
||||
assert arr.dtype == SparseDtype(object)
|
||||
assert np.isnan(arr.fill_value)
|
||||
|
||||
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A")
|
||||
assert arr.dtype == SparseDtype(object, "A")
|
||||
assert arr.fill_value == "A"
|
||||
|
||||
def test_constructor_object_dtype_bool_fill(self):
|
||||
# GH#17574
|
||||
data = [False, 0, 100.0, 0.0]
|
||||
arr = SparseArray(data, dtype=object, fill_value=False)
|
||||
assert arr.dtype == SparseDtype(object, False)
|
||||
assert arr.fill_value is False
|
||||
arr_expected = np.array(data, dtype=object)
|
||||
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
|
||||
assert np.fromiter(it, dtype=np.bool_).all()
|
||||
|
||||
@pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
|
||||
def test_constructor_na_dtype(self, dtype):
|
||||
with pytest.raises(ValueError, match="Cannot convert"):
|
||||
SparseArray([0, 1, np.nan], dtype=dtype)
|
||||
|
||||
def test_constructor_warns_when_losing_timezone(self):
|
||||
# GH#32501 warn when losing timezone information
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = SparseArray(dti)
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = SparseArray(pd.Series(dti))
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_constructor_spindex_dtype(self):
|
||||
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
|
||||
# TODO: actionable?
|
||||
# XXX: Behavior change: specifying SparseIndex no longer changes the
|
||||
# fill_value
|
||||
expected = SparseArray([0, 1, 2, 0], kind="integer")
|
||||
tm.assert_sp_array_equal(arr, expected)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=np.int64,
|
||||
fill_value=0,
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
arr = SparseArray(
|
||||
data=[1, 2, 3],
|
||||
sparse_index=IntIndex(4, [1, 2, 3]),
|
||||
dtype=None,
|
||||
fill_value=0,
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 3], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
@pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
|
||||
def test_constructor_spindex_dtype_scalar(self, sparse_index):
|
||||
# scalar input
|
||||
msg = "Constructing SparseArray with scalar data is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
|
||||
exp = SparseArray([1], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
|
||||
exp = SparseArray([1], dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
def test_constructor_spindex_dtype_scalar_broadcasts(self):
|
||||
arr = SparseArray(
|
||||
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None
|
||||
)
|
||||
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
|
||||
tm.assert_sp_array_equal(arr, exp)
|
||||
assert arr.dtype == SparseDtype(np.int64)
|
||||
assert arr.fill_value == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, fill_value",
|
||||
[
|
||||
(np.array([1, 2]), 0),
|
||||
(np.array([1.0, 2.0]), np.nan),
|
||||
([True, False], False),
|
||||
([pd.Timestamp("2017-01-01")], pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_constructor_inferred_fill_value(self, data, fill_value):
|
||||
result = SparseArray(data).fill_value
|
||||
|
||||
if isna(fill_value):
|
||||
assert isna(result)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
||||
@pytest.mark.parametrize("size", [0, 10])
|
||||
def test_from_spmatrix(self, size, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(size, 1, density=0.5, format=format)
|
||||
result = SparseArray.from_spmatrix(mat)
|
||||
|
||||
result = np.asarray(result)
|
||||
expected = mat.toarray().ravel()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
||||
def test_from_spmatrix_including_explicit_zero(self, format):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.random(10, 1, density=0.5, format=format)
|
||||
mat.data[0] = 0
|
||||
result = SparseArray.from_spmatrix(mat)
|
||||
|
||||
result = np.asarray(result)
|
||||
expected = mat.toarray().ravel()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_from_spmatrix_raises(self):
|
||||
sp_sparse = pytest.importorskip("scipy.sparse")
|
||||
|
||||
mat = sp_sparse.eye(5, 4, format="csc")
|
||||
|
||||
with pytest.raises(ValueError, match="not '4'"):
|
||||
SparseArray.from_spmatrix(mat)
|
||||
|
||||
def test_constructor_from_too_large_array(self):
|
||||
with pytest.raises(TypeError, match="expected dimension <= 1 data"):
|
||||
SparseArray(np.arange(10).reshape((2, 5)))
|
||||
|
||||
def test_constructor_from_sparse(self):
|
||||
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
res = SparseArray(zarr)
|
||||
assert res.fill_value == 0
|
||||
tm.assert_almost_equal(res.sp_values, zarr.sp_values)
|
||||
|
||||
def test_constructor_copy(self):
|
||||
arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
arr = SparseArray(arr_data)
|
||||
|
||||
cp = SparseArray(arr, copy=True)
|
||||
cp.sp_values[:3] = 0
|
||||
assert not (arr.sp_values[:3] == 0).any()
|
||||
|
||||
not_copy = SparseArray(arr)
|
||||
not_copy.sp_values[:3] = 0
|
||||
assert (arr.sp_values[:3] == 0).all()
|
||||
|
||||
def test_constructor_bool(self):
|
||||
# GH#10648
|
||||
data = np.array([False, False, True, True, False, False])
|
||||
arr = SparseArray(data, fill_value=False, dtype=bool)
|
||||
|
||||
assert arr.dtype == SparseDtype(bool)
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
|
||||
# Behavior change: np.asarray densifies.
|
||||
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))
|
||||
|
||||
dense = arr.to_dense()
|
||||
assert dense.dtype == bool
|
||||
tm.assert_numpy_array_equal(dense, data)
|
||||
|
||||
def test_constructor_bool_fill_value(self):
|
||||
arr = SparseArray([True, False, True], dtype=None)
|
||||
assert arr.dtype == SparseDtype(np.bool_)
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool_)
|
||||
assert arr.dtype == SparseDtype(np.bool_)
|
||||
assert not arr.fill_value
|
||||
|
||||
arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
|
||||
assert arr.dtype == SparseDtype(np.bool_, True)
|
||||
assert arr.fill_value
|
||||
|
||||
def test_constructor_float32(self):
|
||||
# GH#10648
|
||||
data = np.array([1.0, np.nan, 3], dtype=np.float32)
|
||||
arr = SparseArray(data, dtype=np.float32)
|
||||
|
||||
assert arr.dtype == SparseDtype(np.float32)
|
||||
tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32))
|
||||
# Behavior change: np.asarray densifies.
|
||||
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
||||
tm.assert_numpy_array_equal(
|
||||
arr.sp_index.indices, np.array([0, 2], dtype=np.int32)
|
||||
)
|
||||
|
||||
dense = arr.to_dense()
|
||||
assert dense.dtype == np.float32
|
||||
tm.assert_numpy_array_equal(dense, data)
|
@ -0,0 +1,224 @@
|
||||
import re
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", 0),
|
||||
("float", np.nan),
|
||||
("bool", False),
|
||||
("object", np.nan),
|
||||
("datetime64[ns]", np.datetime64("NaT", "ns")),
|
||||
("timedelta64[ns]", np.timedelta64("NaT", "ns")),
|
||||
],
|
||||
)
|
||||
def test_inferred_dtype(dtype, fill_value):
|
||||
sparse_dtype = SparseDtype(dtype)
|
||||
result = sparse_dtype.fill_value
|
||||
if pd.isna(fill_value):
|
||||
assert pd.isna(result) and type(result) == type(fill_value)
|
||||
else:
|
||||
assert result == fill_value
|
||||
|
||||
|
||||
def test_from_sparse_dtype():
|
||||
dtype = SparseDtype("float", 0)
|
||||
result = SparseDtype(dtype)
|
||||
assert result.fill_value == 0
|
||||
|
||||
|
||||
def test_from_sparse_dtype_fill_value():
|
||||
dtype = SparseDtype("int", 1)
|
||||
result = SparseDtype(dtype, fill_value=2)
|
||||
expected = SparseDtype("int", 2)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, fill_value",
|
||||
[
|
||||
("int", None),
|
||||
("float", None),
|
||||
("bool", None),
|
||||
("object", None),
|
||||
("datetime64[ns]", None),
|
||||
("timedelta64[ns]", None),
|
||||
("int", np.nan),
|
||||
("float", 0),
|
||||
],
|
||||
)
|
||||
def test_equal(dtype, fill_value):
|
||||
a = SparseDtype(dtype, fill_value)
|
||||
b = SparseDtype(dtype, fill_value)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
def test_nans_equal():
|
||||
a = SparseDtype(float, float("nan"))
|
||||
b = SparseDtype(float, np.nan)
|
||||
assert a == b
|
||||
assert b == a
|
||||
|
||||
|
||||
with warnings.catch_warnings():
|
||||
msg = "Allowing arbitrary scalar fill_value in SparseDtype is deprecated"
|
||||
warnings.filterwarnings("ignore", msg, category=FutureWarning)
|
||||
|
||||
tups = [
|
||||
(SparseDtype("float64"), SparseDtype("float32")),
|
||||
(SparseDtype("float64"), SparseDtype("float64", 0)),
|
||||
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
|
||||
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||||
(SparseDtype("float64"), np.dtype("float64")),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
tups,
|
||||
)
|
||||
def test_not_equal(a, b):
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_construct_from_string_raises():
|
||||
with pytest.raises(
|
||||
TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'"
|
||||
):
|
||||
SparseDtype.construct_from_string("not a dtype")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(SparseDtype(int), True),
|
||||
(SparseDtype(float), True),
|
||||
(SparseDtype(bool), True),
|
||||
(SparseDtype(object), False),
|
||||
(SparseDtype(str), False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
def test_str_uses_object():
|
||||
result = SparseDtype(str).subtype
|
||||
assert result == np.dtype("object")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[float64]", SparseDtype(np.dtype("float64"))),
|
||||
("Sparse[float32]", SparseDtype(np.dtype("float32"))),
|
||||
("Sparse[int]", SparseDtype(np.dtype("int"))),
|
||||
("Sparse[str]", SparseDtype(np.dtype("str"))),
|
||||
("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
|
||||
("Sparse", SparseDtype(np.dtype("float"), np.nan)),
|
||||
],
|
||||
)
|
||||
def test_construct_from_string(string, expected):
|
||||
result = SparseDtype.construct_from_string(string)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected",
|
||||
[
|
||||
(SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
|
||||
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||||
(SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
|
||||
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||||
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||||
],
|
||||
)
|
||||
def test_hash_equal(a, b, expected):
|
||||
result = a == b
|
||||
assert result is expected
|
||||
|
||||
result = hash(a) == hash(b)
|
||||
assert result is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string, expected",
|
||||
[
|
||||
("Sparse[int]", "int"),
|
||||
("Sparse[int, 0]", "int"),
|
||||
("Sparse[int64]", "int64"),
|
||||
("Sparse[int64, 0]", "int64"),
|
||||
("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_parse_subtype(string, expected):
|
||||
subtype, _ = SparseDtype._parse_subtype(string)
|
||||
assert subtype == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
|
||||
)
|
||||
def test_construct_from_string_fill_value_raises(string):
|
||||
with pytest.raises(TypeError, match="fill_value in the string is not"):
|
||||
SparseDtype.construct_from_string(string)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected",
|
||||
[
|
||||
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||||
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||||
(SparseDtype(int, 1), np.str_, SparseDtype(object, "1")),
|
||||
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||||
],
|
||||
)
|
||||
def test_update_dtype(original, dtype, expected):
|
||||
result = original.update_dtype(dtype)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"original, dtype, expected_error_msg",
|
||||
[
|
||||
(
|
||||
SparseDtype(float, np.nan),
|
||||
int,
|
||||
re.escape("Cannot convert non-finite values (NA or inf) to integer"),
|
||||
),
|
||||
(
|
||||
SparseDtype(str, "abc"),
|
||||
int,
|
||||
r"invalid literal for int\(\) with base 10: ('abc'|np\.str_\('abc'\))",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_update_dtype_raises(original, dtype, expected_error_msg):
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
original.update_dtype(dtype)
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH-34352
|
||||
result = str(SparseDtype("int64", fill_value=0))
|
||||
expected = "Sparse[int64, 0]"
|
||||
assert result == expected
|
||||
|
||||
result = str(SparseDtype(object, fill_value="0"))
|
||||
expected = "Sparse[object, '0']"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sparse_dtype_subtype_must_be_numpy_dtype():
|
||||
# GH#53160
|
||||
msg = "SparseDtype subtype must be a numpy dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
SparseDtype("category", fill_value="c")
|
@ -0,0 +1,302 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import SparseDtype
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr_data():
|
||||
return np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arr(arr_data):
|
||||
return SparseArray(arr_data)
|
||||
|
||||
|
||||
class TestGetitem:
|
||||
def test_getitem(self, arr):
|
||||
dense = arr.to_dense()
|
||||
for i, value in enumerate(arr):
|
||||
tm.assert_almost_equal(value, dense[i])
|
||||
tm.assert_almost_equal(arr[-i], dense[-i])
|
||||
|
||||
def test_getitem_arraylike_mask(self, arr):
|
||||
arr = SparseArray([0, 1, 2])
|
||||
result = arr[[True, False, True]]
|
||||
expected = SparseArray([0, 2])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc",
|
||||
[
|
||||
np.s_[:],
|
||||
np.s_[1:10],
|
||||
np.s_[1:100],
|
||||
np.s_[10:1],
|
||||
np.s_[:-3],
|
||||
np.s_[-5:-4],
|
||||
np.s_[:-12],
|
||||
np.s_[-12:],
|
||||
np.s_[2:],
|
||||
np.s_[2::3],
|
||||
np.s_[::2],
|
||||
np.s_[::-1],
|
||||
np.s_[::-2],
|
||||
np.s_[1:6:2],
|
||||
np.s_[:-6:-2],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
|
||||
)
|
||||
def test_getslice(self, slc, as_dense):
|
||||
as_dense = np.array(as_dense)
|
||||
arr = SparseArray(as_dense)
|
||||
|
||||
result = arr[slc]
|
||||
expected = SparseArray(as_dense[slc])
|
||||
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
def test_getslice_tuple(self):
|
||||
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
|
||||
|
||||
sparse = SparseArray(dense)
|
||||
res = sparse[(slice(4, None),)]
|
||||
exp = SparseArray(dense[4:])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
sparse = SparseArray(dense, fill_value=0)
|
||||
res = sparse[(slice(4, None),)]
|
||||
exp = SparseArray(dense[4:], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
msg = "too many indices for array"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse[4:, :]
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
# check numpy compat
|
||||
dense[4:, :]
|
||||
|
||||
def test_boolean_slice_empty(self):
|
||||
arr = SparseArray([0, 1, 2])
|
||||
res = arr[[False, False, False]]
|
||||
assert res.dtype == arr.dtype
|
||||
|
||||
def test_getitem_bool_sparse_array(self, arr):
|
||||
# GH 23122
|
||||
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
|
||||
exp = SparseArray([np.nan, 2, np.nan, 5, 6])
|
||||
tm.assert_sp_array_equal(arr[spar_bool], exp)
|
||||
|
||||
spar_bool = ~spar_bool
|
||||
res = arr[spar_bool]
|
||||
exp = SparseArray([np.nan, 1, 3, 4, np.nan])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
spar_bool = SparseArray(
|
||||
[False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan
|
||||
)
|
||||
res = arr[spar_bool]
|
||||
exp = SparseArray([np.nan, 3, 5])
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_getitem_bool_sparse_array_as_comparison(self):
|
||||
# GH 45110
|
||||
arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
|
||||
res = arr[arr > 2]
|
||||
exp = SparseArray([3.0, 4.0], fill_value=np.nan)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
def test_get_item(self, arr):
|
||||
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
||||
|
||||
assert np.isnan(arr[1])
|
||||
assert arr[2] == 1
|
||||
assert arr[7] == 5
|
||||
|
||||
assert zarr[0] == 0
|
||||
assert zarr[2] == 1
|
||||
assert zarr[7] == 5
|
||||
|
||||
errmsg = "must be an integer between -10 and 10"
|
||||
|
||||
with pytest.raises(IndexError, match=errmsg):
|
||||
arr[11]
|
||||
|
||||
with pytest.raises(IndexError, match=errmsg):
|
||||
arr[-11]
|
||||
|
||||
assert arr[-1] == arr[len(arr) - 1]
|
||||
|
||||
|
||||
class TestSetitem:
|
||||
def test_set_item(self, arr_data):
|
||||
arr = SparseArray(arr_data).copy()
|
||||
|
||||
def setitem():
|
||||
arr[5] = 3
|
||||
|
||||
def setslice():
|
||||
arr[1:5] = 2
|
||||
|
||||
with pytest.raises(TypeError, match="assignment via setitem"):
|
||||
setitem()
|
||||
|
||||
with pytest.raises(TypeError, match="assignment via setitem"):
|
||||
setslice()
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_scalar_raises(self, arr):
|
||||
msg = "'indices' must be an array, not a scalar '2'."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.take(2)
|
||||
|
||||
def test_take(self, arr_data, arr):
|
||||
exp = SparseArray(np.take(arr_data, [2, 3]))
|
||||
tm.assert_sp_array_equal(arr.take([2, 3]), exp)
|
||||
|
||||
exp = SparseArray(np.take(arr_data, [0, 1, 2]))
|
||||
tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp)
|
||||
|
||||
def test_take_all_empty(self):
|
||||
sparse = pd.array([0, 0], dtype=SparseDtype("int64"))
|
||||
result = sparse.take([0, 1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_sp_array_equal(sparse, result)
|
||||
|
||||
def test_take_different_fill_value(self):
|
||||
# Take with a different fill value shouldn't overwrite the original
|
||||
sparse = pd.array([0.0], dtype=SparseDtype("float64", fill_value=0.0))
|
||||
result = sparse.take([0, -1], allow_fill=True, fill_value=np.nan)
|
||||
expected = pd.array([0, np.nan], dtype=sparse.dtype)
|
||||
tm.assert_sp_array_equal(expected, result)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
data = np.array([1, np.nan, 0, 3, 0])
|
||||
sparse = SparseArray(data, fill_value=0)
|
||||
|
||||
exp = SparseArray(np.take(data, [0]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([0]), exp)
|
||||
|
||||
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
|
||||
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
|
||||
|
||||
def test_take_negative(self, arr_data, arr):
|
||||
exp = SparseArray(np.take(arr_data, [-1]))
|
||||
tm.assert_sp_array_equal(arr.take([-1]), exp)
|
||||
|
||||
exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
|
||||
tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
|
||||
|
||||
def test_bad_take(self, arr):
|
||||
with pytest.raises(IndexError, match="bounds"):
|
||||
arr.take([11])
|
||||
|
||||
def test_take_filling(self):
|
||||
# similar tests as GH 12631
|
||||
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# TODO: actionable?
|
||||
# XXX: test change: fill_value=True -> allow_fill=True
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, 4])
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "Invalid value in 'indices'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -2]), allow_fill=True)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -5]), allow_fill=True)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), allow_fill=True)
|
||||
|
||||
def test_take_filling_fill_value(self):
|
||||
# same tests as GH#12631
|
||||
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
|
||||
# TODO: actionable?
|
||||
# XXX: behavior change.
|
||||
# the old way of filling self.fill_value doesn't follow EA rules.
|
||||
# It's supposed to be self.dtype.na_value (nan in this case)
|
||||
expected = SparseArray([0, np.nan, np.nan], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = SparseArray([0, np.nan, 4], fill_value=0)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "Invalid value in 'indices'."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -2]), allow_fill=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
sparse.take(np.array([1, 0, -5]), allow_fill=True)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["block", "integer"])
|
||||
def test_take_filling_all_nan(self, kind):
|
||||
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
|
||||
result = sparse.take(np.array([1, 0, -1]))
|
||||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
msg = "out of bounds value in 'indices'"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, -6]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]))
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
sparse.take(np.array([1, 5]), fill_value=True)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_retain_fill_value(self):
|
||||
# GH#45691 don't lose fill_value on _where
|
||||
arr = SparseArray([np.nan, 1.0], fill_value=0)
|
||||
|
||||
mask = np.array([True, False])
|
||||
|
||||
res = arr._where(~mask, 1)
|
||||
exp = SparseArray([1, 1.0], fill_value=0)
|
||||
tm.assert_sp_array_equal(res, exp)
|
||||
|
||||
ser = pd.Series(arr)
|
||||
res = ser.where(~mask, 1)
|
||||
tm.assert_series_equal(res, pd.Series(exp))
|
@ -0,0 +1,551 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.sparse as splib
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.sparse import (
|
||||
BlockIndex,
|
||||
IntIndex,
|
||||
make_sparse_index,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_length():
|
||||
return 20
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
[
|
||||
[0, 7, 15],
|
||||
[3, 5, 5],
|
||||
[2, 9, 14],
|
||||
[2, 3, 5],
|
||||
[2, 9, 15],
|
||||
[1, 3, 4],
|
||||
],
|
||||
[
|
||||
[0, 5],
|
||||
[4, 4],
|
||||
[1],
|
||||
[4],
|
||||
[1],
|
||||
[3],
|
||||
],
|
||||
[
|
||||
[0],
|
||||
[10],
|
||||
[0, 5],
|
||||
[3, 7],
|
||||
[0, 5],
|
||||
[3, 5],
|
||||
],
|
||||
[
|
||||
[10],
|
||||
[5],
|
||||
[0, 12],
|
||||
[5, 3],
|
||||
[12],
|
||||
[3],
|
||||
],
|
||||
[
|
||||
[0, 10],
|
||||
[4, 6],
|
||||
[5, 17],
|
||||
[4, 2],
|
||||
[],
|
||||
[],
|
||||
],
|
||||
[
|
||||
[0],
|
||||
[5],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
],
|
||||
],
|
||||
ids=[
|
||||
"plain_case",
|
||||
"delete_blocks",
|
||||
"split_blocks",
|
||||
"skip_block",
|
||||
"no_intersect",
|
||||
"one_empty",
|
||||
],
|
||||
)
|
||||
def cases(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSparseIndexUnion:
|
||||
@pytest.mark.parametrize(
|
||||
"xloc, xlen, yloc, ylen, eloc, elen",
|
||||
[
|
||||
[[0], [5], [5], [4], [0], [9]],
|
||||
[[0, 10], [5, 5], [2, 17], [5, 2], [0, 10, 17], [7, 5, 2]],
|
||||
[[1], [5], [3], [5], [1], [7]],
|
||||
[[2, 10], [4, 4], [4], [8], [2], [12]],
|
||||
[[0, 5], [3, 5], [0], [7], [0], [10]],
|
||||
[[2, 10], [4, 4], [4, 13], [8, 4], [2], [15]],
|
||||
[[2], [15], [4, 9, 14], [3, 2, 2], [2], [15]],
|
||||
[[0, 10], [3, 3], [5, 15], [2, 2], [0, 5, 10, 15], [3, 2, 3, 2]],
|
||||
],
|
||||
)
|
||||
def test_index_make_union(self, xloc, xlen, yloc, ylen, eloc, elen, test_length):
|
||||
# Case 1
|
||||
# x: ----
|
||||
# y: ----
|
||||
# r: --------
|
||||
# Case 2
|
||||
# x: ----- -----
|
||||
# y: ----- --
|
||||
# Case 3
|
||||
# x: ------
|
||||
# y: -------
|
||||
# r: ----------
|
||||
# Case 4
|
||||
# x: ------ -----
|
||||
# y: -------
|
||||
# r: -------------
|
||||
# Case 5
|
||||
# x: --- -----
|
||||
# y: -------
|
||||
# r: -------------
|
||||
# Case 6
|
||||
# x: ------ -----
|
||||
# y: ------- ---
|
||||
# r: -------------
|
||||
# Case 7
|
||||
# x: ----------------------
|
||||
# y: ---- ---- ---
|
||||
# r: ----------------------
|
||||
# Case 8
|
||||
# x: ---- ---
|
||||
# y: --- ---
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
bresult = xindex.make_union(yindex)
|
||||
assert isinstance(bresult, BlockIndex)
|
||||
tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(bresult.blengths, np.array(elen, dtype=np.int32))
|
||||
|
||||
ixindex = xindex.to_int_index()
|
||||
iyindex = yindex.to_int_index()
|
||||
iresult = ixindex.make_union(iyindex)
|
||||
assert isinstance(iresult, IntIndex)
|
||||
tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices)
|
||||
|
||||
def test_int_index_make_union(self):
|
||||
a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 2], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 2], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32))
|
||||
res = a.make_union(b)
|
||||
exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32))
|
||||
assert res.equals(exp)
|
||||
|
||||
a = IntIndex(5, np.array([0, 1], dtype=np.int32))
|
||||
b = IntIndex(4, np.array([0, 1], dtype=np.int32))
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.make_union(b)
|
||||
|
||||
|
||||
class TestSparseIndexIntersect:
|
||||
@td.skip_if_windows
|
||||
def test_intersect(self, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, eloc, elen = cases
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
expected = BlockIndex(test_length, eloc, elen)
|
||||
longer_index = BlockIndex(test_length + 1, yloc, ylen)
|
||||
|
||||
result = xindex.intersect(yindex)
|
||||
assert result.equals(expected)
|
||||
result = xindex.to_int_index().intersect(yindex.to_int_index())
|
||||
assert result.equals(expected.to_int_index())
|
||||
|
||||
msg = "Indices must reference same underlying length"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
xindex.intersect(longer_index)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
xindex.to_int_index().intersect(longer_index.to_int_index())
|
||||
|
||||
def test_intersect_empty(self):
|
||||
xindex = IntIndex(4, np.array([], dtype=np.int32))
|
||||
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
xindex = xindex.to_block_index()
|
||||
yindex = yindex.to_block_index()
|
||||
assert xindex.intersect(yindex).equals(xindex)
|
||||
assert yindex.intersect(xindex).equals(xindex)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
[
|
||||
# Argument 2 to "IntIndex" has incompatible type "ndarray[Any,
|
||||
# dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"
|
||||
IntIndex(5, np.array([1, 2], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(0, np.array([], dtype=np.int32)), # type: ignore[arg-type]
|
||||
IntIndex(5, np.array([], dtype=np.int32)), # type: ignore[arg-type]
|
||||
],
|
||||
)
|
||||
def test_intersect_identical(self, case):
|
||||
assert case.intersect(case).equals(case)
|
||||
case = case.to_block_index()
|
||||
assert case.intersect(case).equals(case)
|
||||
|
||||
|
||||
class TestSparseIndexCommon:
|
||||
def test_int_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(
|
||||
4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
|
||||
)
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_block_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_lookup(self, kind):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == -1
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 0
|
||||
assert idx.lookup(3) == 1
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
|
||||
for i in range(-1, 5):
|
||||
assert idx.lookup(i) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == 1
|
||||
assert idx.lookup(2) == 2
|
||||
assert idx.lookup(3) == 3
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
assert idx.lookup(-1) == -1
|
||||
assert idx.lookup(0) == 0
|
||||
assert idx.lookup(1) == -1
|
||||
assert idx.lookup(2) == 1
|
||||
assert idx.lookup(3) == 2
|
||||
assert idx.lookup(4) == -1
|
||||
|
||||
@pytest.mark.parametrize("kind", ["integer", "block"])
|
||||
def test_lookup_array(self, kind):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind=kind)
|
||||
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
|
||||
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
|
||||
exp = np.array([-1, 0, 2], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
|
||||
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind)
|
||||
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
|
||||
exp = np.array([1, -1, 2, 0], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
|
||||
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx, expected",
|
||||
[
|
||||
[0, -1],
|
||||
[5, 0],
|
||||
[7, 2],
|
||||
[8, -1],
|
||||
[9, -1],
|
||||
[10, -1],
|
||||
[11, -1],
|
||||
[12, 3],
|
||||
[17, 8],
|
||||
[18, -1],
|
||||
],
|
||||
)
|
||||
def test_lookup_basics(self, idx, expected):
|
||||
bindex = BlockIndex(20, [5, 12], [3, 6])
|
||||
assert bindex.lookup(idx) == expected
|
||||
|
||||
iindex = bindex.to_int_index()
|
||||
assert iindex.lookup(idx) == expected
|
||||
|
||||
|
||||
class TestBlockIndex:
|
||||
def test_block_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block")
|
||||
assert isinstance(idx, BlockIndex)
|
||||
assert idx.npoints == 3
|
||||
tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32))
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32))
|
||||
|
||||
@pytest.mark.parametrize("i", [5, 10, 100, 101])
|
||||
def test_make_block_boundary(self, i):
|
||||
idx = make_sparse_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block")
|
||||
|
||||
exp = np.arange(0, i, 2, dtype=np.int32)
|
||||
tm.assert_numpy_array_equal(idx.blocs, exp)
|
||||
tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = BlockIndex(10, [0, 4], [2, 5])
|
||||
|
||||
assert index.equals(index)
|
||||
assert not index.equals(BlockIndex(10, [0, 4], [2, 6]))
|
||||
|
||||
def test_check_integrity(self):
|
||||
locs = []
|
||||
lengths = []
|
||||
|
||||
# 0-length OK
|
||||
BlockIndex(0, locs, lengths)
|
||||
|
||||
# also OK even though empty
|
||||
BlockIndex(1, locs, lengths)
|
||||
|
||||
msg = "Block 0 extends beyond end"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [5], [10])
|
||||
|
||||
msg = "Block 0 overlaps"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
BlockIndex(10, [2, 5], [5, 3])
|
||||
|
||||
def test_to_int_index(self):
|
||||
locs = [0, 10]
|
||||
lengths = [4, 6]
|
||||
exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15]
|
||||
|
||||
block = BlockIndex(20, locs, lengths)
|
||||
dense = block.to_int_index()
|
||||
|
||||
tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32))
|
||||
|
||||
def test_to_block_index(self):
|
||||
index = BlockIndex(10, [0, 5], [4, 5])
|
||||
assert index.to_block_index() is index
|
||||
|
||||
|
||||
class TestIntIndex:
|
||||
def test_check_integrity(self):
|
||||
# Too many indices than specified in self.length
|
||||
msg = "Too many indices"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=1, indices=[1, 2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# No index can be negative.
|
||||
msg = "No index can be less than zero"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, -2, 3])
|
||||
|
||||
# All indices must be less than the length.
|
||||
msg = "All indices must be less than the length"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 5])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 2, 6])
|
||||
|
||||
# Indices must be strictly ascending.
|
||||
msg = "Indices must be strictly increasing"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntIndex(length=5, indices=[1, 3, 3])
|
||||
|
||||
def test_int_internal(self):
|
||||
idx = make_sparse_index(4, np.array([2, 3], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 2
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(4, np.array([], dtype=np.int32), kind="integer")
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 0
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32))
|
||||
|
||||
idx = make_sparse_index(
|
||||
4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer"
|
||||
)
|
||||
assert isinstance(idx, IntIndex)
|
||||
assert idx.npoints == 4
|
||||
tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32))
|
||||
|
||||
def test_equals(self):
|
||||
index = IntIndex(10, [0, 1, 2, 3, 4])
|
||||
assert index.equals(index)
|
||||
assert not index.equals(IntIndex(10, [0, 1, 2, 3]))
|
||||
|
||||
def test_to_block_index(self, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, _, _ = cases
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
|
||||
# see if survive the round trip
|
||||
xbindex = xindex.to_int_index().to_block_index()
|
||||
ybindex = yindex.to_int_index().to_block_index()
|
||||
assert isinstance(xbindex, BlockIndex)
|
||||
assert xbindex.equals(xindex)
|
||||
assert ybindex.equals(yindex)
|
||||
|
||||
def test_to_int_index(self):
|
||||
index = IntIndex(10, [2, 3, 4, 5, 6])
|
||||
assert index.to_int_index() is index
|
||||
|
||||
|
||||
class TestSparseOperators:
|
||||
@pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"])
|
||||
def test_op(self, opname, cases, test_length):
|
||||
xloc, xlen, yloc, ylen, _, _ = cases
|
||||
sparse_op = getattr(splib, f"sparse_{opname}_float64")
|
||||
python_op = getattr(operator, opname)
|
||||
|
||||
xindex = BlockIndex(test_length, xloc, xlen)
|
||||
yindex = BlockIndex(test_length, yloc, ylen)
|
||||
|
||||
xdindex = xindex.to_int_index()
|
||||
ydindex = yindex.to_int_index()
|
||||
|
||||
x = np.arange(xindex.npoints) * 10.0 + 1
|
||||
y = np.arange(yindex.npoints) * 100.0 + 1
|
||||
|
||||
xfill = 0
|
||||
yfill = 2
|
||||
|
||||
result_block_vals, rb_index, bfill = sparse_op(
|
||||
x, xindex, xfill, y, yindex, yfill
|
||||
)
|
||||
result_int_vals, ri_index, ifill = sparse_op(
|
||||
x, xdindex, xfill, y, ydindex, yfill
|
||||
)
|
||||
|
||||
assert rb_index.to_int_index().equals(ri_index)
|
||||
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
|
||||
assert bfill == ifill
|
||||
|
||||
# check versus Series...
|
||||
xseries = Series(x, xdindex.indices)
|
||||
xseries = xseries.reindex(np.arange(test_length)).fillna(xfill)
|
||||
|
||||
yseries = Series(y, ydindex.indices)
|
||||
yseries = yseries.reindex(np.arange(test_length)).fillna(yfill)
|
||||
|
||||
series_result = python_op(xseries, yseries)
|
||||
series_result = series_result.reindex(ri_index.indices)
|
||||
|
||||
tm.assert_numpy_array_equal(result_block_vals, series_result.values)
|
||||
tm.assert_numpy_array_equal(result_int_vals, series_result.values)
|
@ -0,0 +1,306 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NaT,
|
||||
SparseDtype,
|
||||
Timestamp,
|
||||
isna,
|
||||
)
|
||||
from pandas.core.arrays.sparse import SparseArray
|
||||
|
||||
|
||||
class TestReductions:
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_all(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = SparseArray(data).all()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).all()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).all()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([True, True, True], True, False),
|
||||
([1, 2, 1], 1, 0),
|
||||
([1.0, 2.0, 1.0], 1.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_numpy_all(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = np.all(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.all(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.all(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
# raises with a different message on py2.
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.all(SparseArray(data), out=np.array([]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_any(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = SparseArray(data).any()
|
||||
assert out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = SparseArray(data).any()
|
||||
assert not out
|
||||
|
||||
out = SparseArray(data, fill_value=pos).any()
|
||||
assert not out
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,pos,neg",
|
||||
[
|
||||
([False, True, False], True, False),
|
||||
([0, 2, 0], 2, 0),
|
||||
([0.0, 2.0, 0.0], 2.0, 0.0),
|
||||
],
|
||||
)
|
||||
def test_numpy_any(self, data, pos, neg):
|
||||
# GH#17570
|
||||
out = np.any(SparseArray(data))
|
||||
assert out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert out
|
||||
|
||||
data[1] = neg
|
||||
out = np.any(SparseArray(data))
|
||||
assert not out
|
||||
|
||||
out = np.any(SparseArray(data, fill_value=pos))
|
||||
assert not out
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.any(SparseArray(data), out=out)
|
||||
|
||||
def test_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).sum()
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data, fill_value=2).sum()
|
||||
assert out == 40.0
|
||||
|
||||
out = SparseArray(data, fill_value=np.nan).sum()
|
||||
assert out == 40.0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])],
|
||||
)
|
||||
@pytest.mark.parametrize("fill_value", [0, 1, np.nan])
|
||||
@pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
|
||||
def test_sum_min_count(self, arr, fill_value, min_count, expected):
|
||||
# GH#25777
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = sparray.sum(min_count=min_count)
|
||||
if np.isnan(expected):
|
||||
assert np.isnan(result)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
def test_bool_sum_min_count(self):
|
||||
spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
|
||||
res = spar_bool.sum(min_count=1)
|
||||
assert res == 5
|
||||
res = spar_bool.sum(min_count=11)
|
||||
assert isna(res)
|
||||
|
||||
def test_numpy_sum(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.sum(SparseArray(data))
|
||||
assert out == 45.0
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.sum(SparseArray(data, fill_value=2))
|
||||
assert out == 40.0
|
||||
|
||||
out = np.sum(SparseArray(data, fill_value=np.nan))
|
||||
assert out == 40.0
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.sum(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.sum(SparseArray(data), out=out)
|
||||
|
||||
def test_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = SparseArray(data).mean()
|
||||
assert out == 40.0 / 9
|
||||
|
||||
def test_numpy_mean(self):
|
||||
data = np.arange(10).astype(float)
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 4.5
|
||||
|
||||
data[5] = np.nan
|
||||
out = np.mean(SparseArray(data))
|
||||
assert out == 40.0 / 9
|
||||
|
||||
msg = "the 'dtype' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.mean(SparseArray(data), dtype=np.int64)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.mean(SparseArray(data), out=out)
|
||||
|
||||
|
||||
class TestMinMax:
|
||||
@pytest.mark.parametrize(
|
||||
"raw_data,max_expected,min_expected",
|
||||
[
|
||||
(np.arange(5.0), [4], [0]),
|
||||
(-np.arange(5.0), [0], [-4]),
|
||||
(np.array([0, 1, 2, np.nan, 4]), [4], [0]),
|
||||
(np.array([np.nan] * 5), [np.nan], [np.nan]),
|
||||
(np.array([]), [np.nan], [np.nan]),
|
||||
],
|
||||
)
|
||||
def test_nan_fill_value(self, raw_data, max_expected, min_expected):
|
||||
arr = SparseArray(raw_data)
|
||||
max_result = arr.max()
|
||||
min_result = arr.min()
|
||||
assert max_result in max_expected
|
||||
assert min_result in min_expected
|
||||
|
||||
max_result = arr.max(skipna=False)
|
||||
min_result = arr.min(skipna=False)
|
||||
if np.isnan(raw_data).any():
|
||||
assert np.isnan(max_result)
|
||||
assert np.isnan(min_result)
|
||||
else:
|
||||
assert max_result in max_expected
|
||||
assert min_result in min_expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_value,max_expected,min_expected",
|
||||
[
|
||||
(100, 100, 0),
|
||||
(-100, 1, -100),
|
||||
],
|
||||
)
|
||||
def test_fill_value(self, fill_value, max_expected, min_expected):
|
||||
arr = SparseArray(
|
||||
np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
|
||||
)
|
||||
max_result = arr.max()
|
||||
assert max_result == max_expected
|
||||
|
||||
min_result = arr.min()
|
||||
assert min_result == min_expected
|
||||
|
||||
def test_only_fill_value(self):
|
||||
fv = 100
|
||||
arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
|
||||
assert len(arr._valid_sp_values) == 0
|
||||
|
||||
assert arr.max() == fv
|
||||
assert arr.min() == fv
|
||||
assert arr.max(skipna=False) == fv
|
||||
assert arr.min(skipna=False) == fv
|
||||
|
||||
@pytest.mark.parametrize("func", ["min", "max"])
|
||||
@pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected",
|
||||
[
|
||||
(SparseDtype(np.float64, np.nan), np.nan),
|
||||
(SparseDtype(np.float64, 5.0), np.nan),
|
||||
(SparseDtype("datetime64[ns]", NaT), NaT),
|
||||
(SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT),
|
||||
],
|
||||
)
|
||||
def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
|
||||
arr = SparseArray(data, dtype=dtype)
|
||||
result = getattr(arr, func)()
|
||||
if expected is NaT:
|
||||
# TODO: pin down whether we wrap datetime64("NaT")
|
||||
assert result is NaT or np.isnat(result)
|
||||
else:
|
||||
assert np.isnan(result)
|
||||
|
||||
|
||||
class TestArgmaxArgmin:
|
||||
@pytest.mark.parametrize(
|
||||
"arr,argmax_expected,argmin_expected",
|
||||
[
|
||||
(SparseArray([1, 2, 0, 1, 2]), 1, 2),
|
||||
(SparseArray([-1, -2, 0, -1, -2]), 2, 1),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2),
|
||||
(SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=0), 0, 10),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10),
|
||||
(SparseArray([0] * 10 + [-1], fill_value=1), 0, 10),
|
||||
(SparseArray([-1] + [0] * 10, fill_value=0), 1, 0),
|
||||
(SparseArray([1] + [0] * 10, fill_value=0), 0, 1),
|
||||
(SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0),
|
||||
(SparseArray([1] + [0] * 10, fill_value=1), 0, 1),
|
||||
],
|
||||
)
|
||||
def test_argmax_argmin(self, arr, argmax_expected, argmin_expected):
|
||||
argmax_result = arr.argmax()
|
||||
argmin_result = arr.argmin()
|
||||
assert argmax_result == argmax_expected
|
||||
assert argmin_result == argmin_expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr,method",
|
||||
[(SparseArray([]), "argmax"), (SparseArray([]), "argmin")],
|
||||
)
|
||||
def test_empty_array(self, arr, method):
|
||||
msg = f"attempt to get {method} of an empty sequence"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.argmax() if method == "argmax" else arr.argmin()
|
@ -0,0 +1,79 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning")
|
||||
@pytest.mark.parametrize("fill_value", [0, np.nan])
|
||||
@pytest.mark.parametrize("op", [operator.pos, operator.neg])
|
||||
def test_unary_op(op, fill_value):
|
||||
arr = np.array([0, 1, np.nan, 2])
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = op(sparray)
|
||||
expected = SparseArray(op(arr), fill_value=op(fill_value))
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_value", [True, False])
|
||||
def test_invert(fill_value):
|
||||
arr = np.array([True, False, False, True])
|
||||
sparray = SparseArray(arr, fill_value=fill_value)
|
||||
result = ~sparray
|
||||
expected = SparseArray(~arr, fill_value=not fill_value)
|
||||
tm.assert_sp_array_equal(result, expected)
|
||||
|
||||
result = ~pd.Series(sparray)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ~pd.DataFrame({"A": sparray})
|
||||
expected = pd.DataFrame({"A": expected})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestUnaryMethods:
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_neg_operator(self):
|
||||
arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
res = -arr
|
||||
exp = SparseArray([1, 2, np.nan, -3], fill_value=np.nan, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8)
|
||||
res = -arr
|
||||
exp = SparseArray([1, 2, -1, -3], fill_value=1, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_abs_operator(self):
|
||||
arr = SparseArray([-1, -2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
res = abs(arr)
|
||||
exp = SparseArray([1, 2, np.nan, 3], fill_value=np.nan, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([-1, -2, 1, 3], fill_value=-1, dtype=np.int8)
|
||||
res = abs(arr)
|
||||
exp = SparseArray([1, 2, 1, 3], fill_value=1, dtype=np.int8)
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
def test_invert_operator(self):
|
||||
arr = SparseArray([False, True, False, True], fill_value=False, dtype=np.bool_)
|
||||
exp = SparseArray(
|
||||
np.invert([False, True, False, True]), fill_value=True, dtype=np.bool_
|
||||
)
|
||||
res = ~arr
|
||||
tm.assert_sp_array_equal(exp, res)
|
||||
|
||||
arr = SparseArray([0, 1, 0, 2, 3, 0], fill_value=0, dtype=np.int32)
|
||||
res = ~arr
|
||||
exp = SparseArray([-1, -2, -1, -3, -4, -1], fill_value=-1, dtype=np.int32)
|
||||
tm.assert_sp_array_equal(exp, res)
|
Reference in New Issue
Block a user