done
This commit is contained in:
@ -0,0 +1,519 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import (
|
||||
BaseIndexer,
|
||||
FixedForwardWindowIndexer,
|
||||
)
|
||||
from pandas.core.indexers.objects import (
|
||||
ExpandingIndexer,
|
||||
FixedWindowIndexer,
|
||||
VariableOffsetWindowIndexer,
|
||||
)
|
||||
|
||||
from pandas.tseries.offsets import BusinessDay
|
||||
|
||||
|
||||
def test_bad_get_window_bounds_signature():
|
||||
class BadIndexer(BaseIndexer):
|
||||
def get_window_bounds(self):
|
||||
return None
|
||||
|
||||
indexer = BadIndexer()
|
||||
with pytest.raises(ValueError, match="BadIndexer does not implement"):
|
||||
Series(range(5)).rolling(indexer)
|
||||
|
||||
|
||||
def test_expanding_indexer():
|
||||
s = Series(range(10))
|
||||
indexer = ExpandingIndexer()
|
||||
result = s.rolling(indexer).mean()
|
||||
expected = s.expanding().mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_constructor_arg():
|
||||
# Example found in computation.rst
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = i + 1
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = df.rolling(indexer).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_accepts_rolling_args():
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if (
|
||||
center
|
||||
and min_periods == 1
|
||||
and closed == "both"
|
||||
and step == 1
|
||||
and i == 2
|
||||
):
|
||||
start[i] = 0
|
||||
end[i] = num_values
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1)
|
||||
result = df.rolling(
|
||||
indexer, center=True, min_periods=1, closed="both", step=1
|
||||
).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,np_func,expected,np_kwargs",
|
||||
[
|
||||
("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}),
|
||||
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}),
|
||||
(
|
||||
"max",
|
||||
np.max,
|
||||
[2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan],
|
||||
{},
|
||||
),
|
||||
(
|
||||
"std",
|
||||
np.std,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
55.71654452,
|
||||
54.85739087,
|
||||
53.9845657,
|
||||
1.0,
|
||||
1.0,
|
||||
0.70710678,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"var",
|
||||
np.var,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
3104.333333,
|
||||
3009.333333,
|
||||
2914.333333,
|
||||
1.0,
|
||||
1.0,
|
||||
0.500000,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"median",
|
||||
np.median,
|
||||
[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan],
|
||||
{},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_window(
|
||||
frame_or_series, func, np_func, expected, np_kwargs, step
|
||||
):
|
||||
# GH 32865
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
|
||||
match = "Forward-looking windows can't have center=True"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, center=True)
|
||||
getattr(rolling, func)()
|
||||
|
||||
match = "Forward-looking windows don't support setting the closed argument"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, closed="right")
|
||||
getattr(rolling, func)()
|
||||
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)
|
||||
result = getattr(rolling, func)()
|
||||
|
||||
# Check that the function output matches the explicitly provided array
|
||||
expected = frame_or_series(expected)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# Check that the rolling function output matches applying an alternative
|
||||
# function to the rolling window object
|
||||
expected2 = frame_or_series(rolling.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result, expected2)
|
||||
|
||||
# Check that the function output matches applying an alternative function
|
||||
# if min_periods isn't specified
|
||||
# GH 39604: After count-min_periods deprecation, apply(lambda x: len(x))
|
||||
# is equivalent to count after setting min_periods=0
|
||||
min_periods = 0 if func == "count" else None
|
||||
rolling3 = frame_or_series(values).rolling(window=indexer, min_periods=min_periods)
|
||||
result3 = getattr(rolling3, func)()
|
||||
expected3 = frame_or_series(rolling3.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result3, expected3)
|
||||
|
||||
|
||||
def test_rolling_forward_skewness(frame_or_series, step):
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=5)
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=3, step=step)
|
||||
result = rolling.skew()
|
||||
|
||||
expected = frame_or_series(
|
||||
[
|
||||
0.0,
|
||||
2.232396,
|
||||
2.229508,
|
||||
2.228340,
|
||||
2.229091,
|
||||
2.231989,
|
||||
0.0,
|
||||
0.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,expected",
|
||||
[
|
||||
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]),
|
||||
(
|
||||
"corr",
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
0.8704775290207161,
|
||||
0.018229084250926637,
|
||||
-0.861357304646493,
|
||||
1.0,
|
||||
1.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_cov_corr(func, expected):
|
||||
values1 = np.arange(10).reshape(-1, 1)
|
||||
values2 = values1 * 2
|
||||
values1[5, 0] = 100
|
||||
values = np.concatenate([values1, values2], axis=1)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
|
||||
# We are interested in checking only pairwise covariance / correlation
|
||||
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
|
||||
result = result.reset_index(drop=True)
|
||||
expected = Series(expected).reset_index(drop=True)
|
||||
expected.name = result.name
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"closed,expected_data",
|
||||
[
|
||||
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
|
||||
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
|
||||
],
|
||||
)
|
||||
def test_non_fixed_variable_window_indexer(closed, expected_data):
|
||||
index = date_range("2020", periods=10)
|
||||
df = DataFrame(range(10), index=index)
|
||||
offset = BusinessDay(1)
|
||||
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
|
||||
result = df.rolling(indexer, closed=closed).sum()
|
||||
expected = DataFrame(expected_data, index=index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_dti():
|
||||
# GH 54379
|
||||
with pytest.raises(ValueError, match="index must be a DatetimeIndex."):
|
||||
VariableOffsetWindowIndexer(index="foo", offset=BusinessDay(1))
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_offset():
|
||||
# GH 54379
|
||||
idx = date_range("2020", periods=10)
|
||||
with pytest.raises(ValueError, match="offset must be a DateOffset-like object."):
|
||||
VariableOffsetWindowIndexer(index=idx, offset="foo")
|
||||
|
||||
|
||||
def test_fixed_forward_indexer_count(step):
|
||||
# GH: 35579
|
||||
df = DataFrame({"b": [None, None, None, 7]})
|
||||
indexer = FixedForwardWindowIndexer(window_size=2)
|
||||
result = df.rolling(window=indexer, min_periods=0, step=step).count()
|
||||
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])]
|
||||
)
|
||||
@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])])
|
||||
def test_indexer_quantile_sum(end_value, values, func, args):
|
||||
# GH 37153
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = max(i + end_value, 1)
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = getattr(df.rolling(indexer), func)(*args)
|
||||
expected = DataFrame({"values": values})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer]
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 12])
|
||||
@pytest.mark.parametrize(
|
||||
"df_data",
|
||||
[
|
||||
{"a": [1, 1], "b": [0, 1]},
|
||||
{"a": [1, 2], "b": [0, 1]},
|
||||
{"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))},
|
||||
],
|
||||
)
|
||||
def test_indexers_are_reusable_after_groupby_rolling(
|
||||
indexer_class, window_size, df_data
|
||||
):
|
||||
# GH 43267
|
||||
df = DataFrame(df_data)
|
||||
num_trials = 3
|
||||
indexer = indexer_class(window_size=window_size)
|
||||
original_window_size = indexer.window_size
|
||||
for i in range(num_trials):
|
||||
df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
assert indexer.window_size == original_window_size
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"window_size, num_values, expected_start, expected_end",
|
||||
[
|
||||
(1, 1, [0], [1]),
|
||||
(1, 2, [0, 1], [1, 2]),
|
||||
(2, 1, [0], [1]),
|
||||
(2, 2, [0, 1], [2, 2]),
|
||||
(5, 12, range(12), list(range(5, 12)) + [12] * 5),
|
||||
(12, 5, range(5), [5] * 5),
|
||||
(0, 0, np.array([]), np.array([])),
|
||||
(1, 0, np.array([]), np.array([])),
|
||||
(0, 1, [0], [0]),
|
||||
],
|
||||
)
|
||||
def test_fixed_forward_indexer_bounds(
|
||||
window_size, num_values, expected_start, expected_end, step
|
||||
):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
start, end = indexer.get_window_bounds(num_values=num_values, step=step)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
start, np.array(expected_start[::step]), check_dtype=False
|
||||
)
|
||||
tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False)
|
||||
assert len(start) == len(end)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, window_size, expected",
|
||||
[
|
||||
(
|
||||
DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}),
|
||||
2,
|
||||
Series(
|
||||
[0, 1.5, 2.0],
|
||||
index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
(
|
||||
DataFrame(
|
||||
{
|
||||
"b": [np.nan, 1, 2, np.nan] + list(range(4, 18)),
|
||||
"a": [1] * 7 + [2] * 11,
|
||||
"c": range(18),
|
||||
}
|
||||
),
|
||||
12,
|
||||
Series(
|
||||
[
|
||||
3.6,
|
||||
3.6,
|
||||
4.25,
|
||||
5.0,
|
||||
5.0,
|
||||
5.5,
|
||||
6.0,
|
||||
12.0,
|
||||
12.5,
|
||||
13.0,
|
||||
13.5,
|
||||
14.0,
|
||||
14.5,
|
||||
15.0,
|
||||
15.5,
|
||||
16.0,
|
||||
16.5,
|
||||
17.0,
|
||||
],
|
||||
index=MultiIndex.from_arrays(
|
||||
[[1] * 7 + [2] * 11, range(18)], names=["a", None]
|
||||
),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"group_keys",
|
||||
[
|
||||
(1,),
|
||||
(1, 2),
|
||||
(2, 1),
|
||||
(1, 1, 2),
|
||||
(1, 2, 1),
|
||||
(1, 1, 2, 2),
|
||||
(1, 2, 3, 2, 3),
|
||||
(1, 1, 2) * 4,
|
||||
(1, 2, 3) * 5,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20])
|
||||
def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
|
||||
# GH 43267
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.array(list(group_keys)),
|
||||
"b": np.arange(len(group_keys), dtype=np.float64) + 17,
|
||||
"c": np.arange(len(group_keys), dtype=np.int64),
|
||||
}
|
||||
)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum()
|
||||
result.index.names = ["a", "c"]
|
||||
|
||||
groups = df.groupby("a")[["a", "b", "c"]]
|
||||
manual = concat(
|
||||
[
|
||||
g.assign(
|
||||
b=[
|
||||
g["b"].iloc[i : i + window_size].sum(min_count=1)
|
||||
for i in range(len(g))
|
||||
]
|
||||
)
|
||||
for _, g in groups
|
||||
]
|
||||
)
|
||||
manual = manual.set_index(["a", "c"])["b"]
|
||||
|
||||
tm.assert_series_equal(result, manual)
|
||||
|
||||
|
||||
def test_unequal_start_end_bounds():
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([1, 2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series(1).rolling(indexer)
|
||||
match = "start"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
||||
|
||||
|
||||
def test_unequal_bounds_to_object():
|
||||
# GH 44470
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series([1, 1]).rolling(indexer)
|
||||
match = "start and end"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
Reference in New Issue
Block a user