done
This commit is contained in:
		| @ -0,0 +1,72 @@ | ||||
| import itertools | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     notna, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def create_series(): | ||||
|     return [ | ||||
|         Series(dtype=np.float64, name="a"), | ||||
|         Series([np.nan] * 5), | ||||
|         Series([1.0] * 5), | ||||
|         Series(range(5, 0, -1)), | ||||
|         Series(range(5)), | ||||
|         Series([np.nan, 1.0, np.nan, 1.0, 1.0]), | ||||
|         Series([np.nan, 1.0, np.nan, 2.0, 3.0]), | ||||
|         Series([np.nan, 1.0, np.nan, 3.0, 2.0]), | ||||
|     ] | ||||
|  | ||||
|  | ||||
| def create_dataframes(): | ||||
|     return [ | ||||
|         DataFrame(columns=["a", "a"]), | ||||
|         DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]), | ||||
|     ] + [DataFrame(s) for s in create_series()] | ||||
|  | ||||
|  | ||||
| def is_constant(x): | ||||
|     values = x.values.ravel("K") | ||||
|     return len(set(values[notna(values)])) == 1 | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=( | ||||
|         obj | ||||
|         for obj in itertools.chain(create_series(), create_dataframes()) | ||||
|         if is_constant(obj) | ||||
|     ), | ||||
| ) | ||||
| def consistent_data(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=create_series()) | ||||
| def series_data(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=itertools.chain(create_series(), create_dataframes())) | ||||
| def all_data(request): | ||||
|     """ | ||||
|     Test: | ||||
|         - Empty Series / DataFrame | ||||
|         - All NaN | ||||
|         - All consistent value | ||||
|         - Monotonically decreasing | ||||
|         - Monotonically increasing | ||||
|         - Monotonically consistent with NaNs | ||||
|         - Monotonically increasing with NaNs | ||||
|         - Monotonically decreasing with NaNs | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[0, 2]) | ||||
| def min_periods(request): | ||||
|     return request.param | ||||
| @ -0,0 +1,243 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     concat, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def create_mock_weights(obj, com, adjust, ignore_na): | ||||
|     if isinstance(obj, DataFrame): | ||||
|         if not len(obj.columns): | ||||
|             return DataFrame(index=obj.index, columns=obj.columns) | ||||
|         w = concat( | ||||
|             [ | ||||
|                 create_mock_series_weights( | ||||
|                     obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na | ||||
|                 ) | ||||
|                 for i in range(len(obj.columns)) | ||||
|             ], | ||||
|             axis=1, | ||||
|         ) | ||||
|         w.index = obj.index | ||||
|         w.columns = obj.columns | ||||
|         return w | ||||
|     else: | ||||
|         return create_mock_series_weights(obj, com, adjust, ignore_na) | ||||
|  | ||||
|  | ||||
| def create_mock_series_weights(s, com, adjust, ignore_na): | ||||
|     w = Series(np.nan, index=s.index, name=s.name) | ||||
|     alpha = 1.0 / (1.0 + com) | ||||
|     if adjust: | ||||
|         count = 0 | ||||
|         for i in range(len(s)): | ||||
|             if s.iat[i] == s.iat[i]: | ||||
|                 w.iat[i] = pow(1.0 / (1.0 - alpha), count) | ||||
|                 count += 1 | ||||
|             elif not ignore_na: | ||||
|                 count += 1 | ||||
|     else: | ||||
|         sum_wts = 0.0 | ||||
|         prev_i = -1 | ||||
|         count = 0 | ||||
|         for i in range(len(s)): | ||||
|             if s.iat[i] == s.iat[i]: | ||||
|                 if prev_i == -1: | ||||
|                     w.iat[i] = 1.0 | ||||
|                 else: | ||||
|                     w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i) | ||||
|                 sum_wts += w.iat[i] | ||||
|                 prev_i = count | ||||
|                 count += 1 | ||||
|             elif not ignore_na: | ||||
|                 count += 1 | ||||
|     return w | ||||
|  | ||||
|  | ||||
| def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods): | ||||
|     com = 3.0 | ||||
|  | ||||
|     result = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).mean() | ||||
|     weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) | ||||
|     expected = all_data.multiply(weights).cumsum().divide(weights.cumsum()).ffill() | ||||
|     expected[ | ||||
|         all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1) | ||||
|     ] = np.nan | ||||
|     tm.assert_equal(result, expected.astype("float64")) | ||||
|  | ||||
|  | ||||
| def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods): | ||||
|     com = 3.0 | ||||
|  | ||||
|     count_x = consistent_data.expanding().count() | ||||
|     mean_x = consistent_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).mean() | ||||
|     # check that correlation of a series with itself is either 1 or NaN | ||||
|     corr_x_x = consistent_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).corr(consistent_data) | ||||
|     exp = ( | ||||
|         consistent_data.max() | ||||
|         if isinstance(consistent_data, Series) | ||||
|         else consistent_data.max().max() | ||||
|     ) | ||||
|  | ||||
|     # check mean of constant series | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = exp | ||||
|     tm.assert_equal(mean_x, expected) | ||||
|  | ||||
|     # check correlation of constant series with itself is NaN | ||||
|     expected[:] = np.nan | ||||
|     tm.assert_equal(corr_x_x, expected) | ||||
|  | ||||
|  | ||||
| def test_ewm_consistency_var_debiasing_factors( | ||||
|     all_data, adjust, ignore_na, min_periods | ||||
| ): | ||||
|     com = 3.0 | ||||
|  | ||||
|     # check variance debiasing factors | ||||
|     var_unbiased_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=False) | ||||
|     var_biased_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=True) | ||||
|  | ||||
|     weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na) | ||||
|     cum_sum = weights.cumsum().ffill() | ||||
|     cum_sum_sq = (weights * weights).cumsum().ffill() | ||||
|     numerator = cum_sum * cum_sum | ||||
|     denominator = numerator - cum_sum_sq | ||||
|     denominator[denominator <= 0.0] = np.nan | ||||
|     var_debiasing_factors_x = numerator / denominator | ||||
|  | ||||
|     tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("bias", [True, False]) | ||||
| def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias): | ||||
|     com = 3.0 | ||||
|  | ||||
|     mean_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).mean() | ||||
|     var_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=bias) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     if bias: | ||||
|         # check that biased var(x) == mean(x^2) - mean(x)^2 | ||||
|         mean_x2 = ( | ||||
|             (all_data * all_data) | ||||
|             .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) | ||||
|             .mean() | ||||
|         ) | ||||
|         tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("bias", [True, False]) | ||||
| def test_moments_consistency_var_constant( | ||||
|     consistent_data, adjust, ignore_na, min_periods, bias | ||||
| ): | ||||
|     com = 3.0 | ||||
|     count_x = consistent_data.expanding(min_periods=min_periods).count() | ||||
|     var_x = consistent_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=bias) | ||||
|  | ||||
|     # check that variance of constant series is identically 0 | ||||
|     assert not (var_x > 0).any().any() | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = 0.0 | ||||
|     if not bias: | ||||
|         expected[count_x < 2] = np.nan | ||||
|     tm.assert_equal(var_x, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("bias", [True, False]) | ||||
| def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias): | ||||
|     com = 3.0 | ||||
|     var_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=bias) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     std_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).std(bias=bias) | ||||
|     assert not (std_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == std(x)^2 | ||||
|     tm.assert_equal(var_x, std_x * std_x) | ||||
|  | ||||
|     cov_x_x = all_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).cov(all_data, bias=bias) | ||||
|     assert not (cov_x_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == cov(x, x) | ||||
|     tm.assert_equal(var_x, cov_x_x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("bias", [True, False]) | ||||
| def test_ewm_consistency_series_cov_corr( | ||||
|     series_data, adjust, ignore_na, min_periods, bias | ||||
| ): | ||||
|     com = 3.0 | ||||
|  | ||||
|     var_x_plus_y = ( | ||||
|         (series_data + series_data) | ||||
|         .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) | ||||
|         .var(bias=bias) | ||||
|     ) | ||||
|     var_x = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=bias) | ||||
|     var_y = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).var(bias=bias) | ||||
|     cov_x_y = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).cov(series_data, bias=bias) | ||||
|     # check that cov(x, y) == (var(x+y) - var(x) - | ||||
|     # var(y)) / 2 | ||||
|     tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) | ||||
|  | ||||
|     # check that corr(x, y) == cov(x, y) / (std(x) * | ||||
|     # std(y)) | ||||
|     corr_x_y = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).corr(series_data) | ||||
|     std_x = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).std(bias=bias) | ||||
|     std_y = series_data.ewm( | ||||
|         com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|     ).std(bias=bias) | ||||
|     tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) | ||||
|  | ||||
|     if bias: | ||||
|         # check that biased cov(x, y) == mean(x*y) - | ||||
|         # mean(x)*mean(y) | ||||
|         mean_x = series_data.ewm( | ||||
|             com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|         ).mean() | ||||
|         mean_y = series_data.ewm( | ||||
|             com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na | ||||
|         ).mean() | ||||
|         mean_x_times_y = ( | ||||
|             (series_data * series_data) | ||||
|             .ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na) | ||||
|             .mean() | ||||
|         ) | ||||
|         tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) | ||||
| @ -0,0 +1,144 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def no_nans(x): | ||||
|     return x.notna().all().all() | ||||
|  | ||||
|  | ||||
| def all_na(x): | ||||
|     return x.isnull().all().all() | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) | ||||
| def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f): | ||||
|     if f is np.sum: | ||||
|         if not no_nans(all_data) and not ( | ||||
|             all_na(all_data) and not all_data.empty and min_periods > 0 | ||||
|         ): | ||||
|             request.applymarker( | ||||
|                 pytest.mark.xfail(reason="np.sum has different behavior with NaNs") | ||||
|             ) | ||||
|     expanding_f_result = all_data.expanding(min_periods=min_periods).sum() | ||||
|     expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply( | ||||
|         func=f, raw=True | ||||
|     ) | ||||
|     tm.assert_equal(expanding_f_result, expanding_apply_f_result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_moments_consistency_var(all_data, min_periods, ddof): | ||||
|     var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     if ddof == 0: | ||||
|         # check that biased var(x) == mean(x^2) - mean(x)^2 | ||||
|         mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean() | ||||
|         mean_x = all_data.expanding(min_periods=min_periods).mean() | ||||
|         tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_moments_consistency_var_constant(consistent_data, min_periods, ddof): | ||||
|     count_x = consistent_data.expanding(min_periods=min_periods).count() | ||||
|     var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|  | ||||
|     # check that variance of constant series is identically 0 | ||||
|     assert not (var_x > 0).any().any() | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = 0.0 | ||||
|     if ddof == 1: | ||||
|         expected[count_x < 2] = np.nan | ||||
|     tm.assert_equal(var_x, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof): | ||||
|     var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof) | ||||
|     assert not (std_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == std(x)^2 | ||||
|     tm.assert_equal(var_x, std_x * std_x) | ||||
|  | ||||
|     cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof) | ||||
|     assert not (cov_x_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == cov(x, x) | ||||
|     tm.assert_equal(var_x, cov_x_x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof): | ||||
|     var_x_plus_y = ( | ||||
|         (series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|     ) | ||||
|     var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|     var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof) | ||||
|     cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof) | ||||
|     # check that cov(x, y) == (var(x+y) - var(x) - | ||||
|     # var(y)) / 2 | ||||
|     tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) | ||||
|  | ||||
|     # check that corr(x, y) == cov(x, y) / (std(x) * | ||||
|     # std(y)) | ||||
|     corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data) | ||||
|     std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof) | ||||
|     std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof) | ||||
|     tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) | ||||
|  | ||||
|     if ddof == 0: | ||||
|         # check that biased cov(x, y) == mean(x*y) - | ||||
|         # mean(x)*mean(y) | ||||
|         mean_x = series_data.expanding(min_periods=min_periods).mean() | ||||
|         mean_y = series_data.expanding(min_periods=min_periods).mean() | ||||
|         mean_x_times_y = ( | ||||
|             (series_data * series_data).expanding(min_periods=min_periods).mean() | ||||
|         ) | ||||
|         tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) | ||||
|  | ||||
|  | ||||
| def test_expanding_consistency_mean(all_data, min_periods): | ||||
|     result = all_data.expanding(min_periods=min_periods).mean() | ||||
|     expected = ( | ||||
|         all_data.expanding(min_periods=min_periods).sum() | ||||
|         / all_data.expanding(min_periods=min_periods).count() | ||||
|     ) | ||||
|     tm.assert_equal(result, expected.astype("float64")) | ||||
|  | ||||
|  | ||||
| def test_expanding_consistency_constant(consistent_data, min_periods): | ||||
|     count_x = consistent_data.expanding().count() | ||||
|     mean_x = consistent_data.expanding(min_periods=min_periods).mean() | ||||
|     # check that correlation of a series with itself is either 1 or NaN | ||||
|     corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data) | ||||
|  | ||||
|     exp = ( | ||||
|         consistent_data.max() | ||||
|         if isinstance(consistent_data, Series) | ||||
|         else consistent_data.max().max() | ||||
|     ) | ||||
|  | ||||
|     # check mean of constant series | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = exp | ||||
|     tm.assert_equal(mean_x, expected) | ||||
|  | ||||
|     # check correlation of constant series with itself is NaN | ||||
|     expected[:] = np.nan | ||||
|     tm.assert_equal(corr_x_x, expected) | ||||
|  | ||||
|  | ||||
| def test_expanding_consistency_var_debiasing_factors(all_data, min_periods): | ||||
|     # check variance debiasing factors | ||||
|     var_unbiased_x = all_data.expanding(min_periods=min_periods).var() | ||||
|     var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0) | ||||
|     var_debiasing_factors_x = all_data.expanding().count() / ( | ||||
|         all_data.expanding().count() - 1.0 | ||||
|     ).replace(0.0, np.nan) | ||||
|     tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) | ||||
| @ -0,0 +1,244 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def no_nans(x): | ||||
|     return x.notna().all().all() | ||||
|  | ||||
|  | ||||
| def all_na(x): | ||||
|     return x.isnull().all().all() | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[(1, 0), (5, 1)]) | ||||
| def rolling_consistency_cases(request): | ||||
|     """window, min_periods""" | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum]) | ||||
| def test_rolling_apply_consistency_sum( | ||||
|     request, all_data, rolling_consistency_cases, center, f | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     if f is np.sum: | ||||
|         if not no_nans(all_data) and not ( | ||||
|             all_na(all_data) and not all_data.empty and min_periods > 0 | ||||
|         ): | ||||
|             request.applymarker( | ||||
|                 pytest.mark.xfail(reason="np.sum has different behavior with NaNs") | ||||
|             ) | ||||
|     rolling_f_result = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).sum() | ||||
|     rolling_apply_f_result = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).apply(func=f, raw=True) | ||||
|     tm.assert_equal(rolling_f_result, rolling_apply_f_result) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( | ||||
|         ddof=ddof | ||||
|     ) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     if ddof == 0: | ||||
|         # check that biased var(x) == mean(x^2) - mean(x)^2 | ||||
|         mean_x = all_data.rolling( | ||||
|             window=window, min_periods=min_periods, center=center | ||||
|         ).mean() | ||||
|         mean_x2 = ( | ||||
|             (all_data * all_data) | ||||
|             .rolling(window=window, min_periods=min_periods, center=center) | ||||
|             .mean() | ||||
|         ) | ||||
|         tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_moments_consistency_var_constant( | ||||
|     consistent_data, rolling_consistency_cases, center, ddof | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     count_x = consistent_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).count() | ||||
|     var_x = consistent_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).var(ddof=ddof) | ||||
|  | ||||
|     # check that variance of constant series is identically 0 | ||||
|     assert not (var_x > 0).any().any() | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = 0.0 | ||||
|     if ddof == 1: | ||||
|         expected[count_x < 2] = np.nan | ||||
|     tm.assert_equal(var_x, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_rolling_consistency_var_std_cov( | ||||
|     all_data, rolling_consistency_cases, center, ddof | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var( | ||||
|         ddof=ddof | ||||
|     ) | ||||
|     assert not (var_x < 0).any().any() | ||||
|  | ||||
|     std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std( | ||||
|         ddof=ddof | ||||
|     ) | ||||
|     assert not (std_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == std(x)^2 | ||||
|     tm.assert_equal(var_x, std_x * std_x) | ||||
|  | ||||
|     cov_x_x = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).cov(all_data, ddof=ddof) | ||||
|     assert not (cov_x_x < 0).any().any() | ||||
|  | ||||
|     # check that var(x) == cov(x, x) | ||||
|     tm.assert_equal(var_x, cov_x_x) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ddof", [0, 1]) | ||||
| def test_rolling_consistency_series_cov_corr( | ||||
|     series_data, rolling_consistency_cases, center, ddof | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     var_x_plus_y = ( | ||||
|         (series_data + series_data) | ||||
|         .rolling(window=window, min_periods=min_periods, center=center) | ||||
|         .var(ddof=ddof) | ||||
|     ) | ||||
|     var_x = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).var(ddof=ddof) | ||||
|     var_y = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).var(ddof=ddof) | ||||
|     cov_x_y = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).cov(series_data, ddof=ddof) | ||||
|     # check that cov(x, y) == (var(x+y) - var(x) - | ||||
|     # var(y)) / 2 | ||||
|     tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) | ||||
|  | ||||
|     # check that corr(x, y) == cov(x, y) / (std(x) * | ||||
|     # std(y)) | ||||
|     corr_x_y = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).corr(series_data) | ||||
|     std_x = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).std(ddof=ddof) | ||||
|     std_y = series_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).std(ddof=ddof) | ||||
|     tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) | ||||
|  | ||||
|     if ddof == 0: | ||||
|         # check that biased cov(x, y) == mean(x*y) - | ||||
|         # mean(x)*mean(y) | ||||
|         mean_x = series_data.rolling( | ||||
|             window=window, min_periods=min_periods, center=center | ||||
|         ).mean() | ||||
|         mean_y = series_data.rolling( | ||||
|             window=window, min_periods=min_periods, center=center | ||||
|         ).mean() | ||||
|         mean_x_times_y = ( | ||||
|             (series_data * series_data) | ||||
|             .rolling(window=window, min_periods=min_periods, center=center) | ||||
|             .mean() | ||||
|         ) | ||||
|         tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) | ||||
|  | ||||
|  | ||||
| def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     result = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).mean() | ||||
|     expected = ( | ||||
|         all_data.rolling(window=window, min_periods=min_periods, center=center) | ||||
|         .sum() | ||||
|         .divide( | ||||
|             all_data.rolling( | ||||
|                 window=window, min_periods=min_periods, center=center | ||||
|             ).count() | ||||
|         ) | ||||
|     ) | ||||
|     tm.assert_equal(result, expected.astype("float64")) | ||||
|  | ||||
|  | ||||
| def test_rolling_consistency_constant( | ||||
|     consistent_data, rolling_consistency_cases, center | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     count_x = consistent_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).count() | ||||
|     mean_x = consistent_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).mean() | ||||
|     # check that correlation of a series with itself is either 1 or NaN | ||||
|     corr_x_x = consistent_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).corr(consistent_data) | ||||
|  | ||||
|     exp = ( | ||||
|         consistent_data.max() | ||||
|         if isinstance(consistent_data, Series) | ||||
|         else consistent_data.max().max() | ||||
|     ) | ||||
|  | ||||
|     # check mean of constant series | ||||
|     expected = consistent_data * np.nan | ||||
|     expected[count_x >= max(min_periods, 1)] = exp | ||||
|     tm.assert_equal(mean_x, expected) | ||||
|  | ||||
|     # check correlation of constant series with itself is NaN | ||||
|     expected[:] = np.nan | ||||
|     tm.assert_equal(corr_x_x, expected) | ||||
|  | ||||
|  | ||||
| def test_rolling_consistency_var_debiasing_factors( | ||||
|     all_data, rolling_consistency_cases, center | ||||
| ): | ||||
|     window, min_periods = rolling_consistency_cases | ||||
|  | ||||
|     # check variance debiasing factors | ||||
|     var_unbiased_x = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).var() | ||||
|     var_biased_x = all_data.rolling( | ||||
|         window=window, min_periods=min_periods, center=center | ||||
|     ).var(ddof=0) | ||||
|     var_debiasing_factors_x = ( | ||||
|         all_data.rolling(window=window, min_periods=min_periods, center=center) | ||||
|         .count() | ||||
|         .divide( | ||||
|             ( | ||||
|                 all_data.rolling( | ||||
|                     window=window, min_periods=min_periods, center=center | ||||
|                 ).count() | ||||
|                 - 1.0 | ||||
|             ).replace(0.0, np.nan) | ||||
|         ) | ||||
|     ) | ||||
|     tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) | ||||
		Reference in New Issue
	
	Block a user