done
This commit is contained in:
		| @ -0,0 +1,7 @@ | ||||
| """ | ||||
| Test files dedicated to individual (stand-alone) Series methods | ||||
|  | ||||
| Ideally these files/tests should correspond 1-to-1 with tests.frame.methods | ||||
|  | ||||
| These may also present opportunities for sharing/de-duplicating test code. | ||||
| """ | ||||
| @ -0,0 +1,41 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import Index | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_add_prefix_suffix(string_series): | ||||
|     with_prefix = string_series.add_prefix("foo#") | ||||
|     expected = Index([f"foo#{c}" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_prefix.index, expected) | ||||
|  | ||||
|     with_suffix = string_series.add_suffix("#foo") | ||||
|     expected = Index([f"{c}#foo" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_suffix.index, expected) | ||||
|  | ||||
|     with_pct_prefix = string_series.add_prefix("%") | ||||
|     expected = Index([f"%{c}" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_pct_prefix.index, expected) | ||||
|  | ||||
|     with_pct_suffix = string_series.add_suffix("%") | ||||
|     expected = Index([f"{c}%" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_pct_suffix.index, expected) | ||||
|  | ||||
|  | ||||
| def test_add_prefix_suffix_axis(string_series): | ||||
|     # GH 47819 | ||||
|     with_prefix = string_series.add_prefix("foo#", axis=0) | ||||
|     expected = Index([f"foo#{c}" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_prefix.index, expected) | ||||
|  | ||||
|     with_pct_suffix = string_series.add_suffix("#foo", axis=0) | ||||
|     expected = Index([f"{c}#foo" for c in string_series.index]) | ||||
|     tm.assert_index_equal(with_pct_suffix.index, expected) | ||||
|  | ||||
|  | ||||
| def test_add_prefix_suffix_invalid_axis(string_series): | ||||
|     with pytest.raises(ValueError, match="No axis named 1 for object type Series"): | ||||
|         string_series.add_prefix("foo#", axis=1) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No axis named 1 for object type Series"): | ||||
|         string_series.add_suffix("foo#", axis=1) | ||||
| @ -0,0 +1,262 @@ | ||||
| from datetime import timezone | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "first_slice,second_slice", | ||||
|     [ | ||||
|         [[2, None], [None, -5]], | ||||
|         [[None, 0], [None, -5]], | ||||
|         [[None, -5], [None, 0]], | ||||
|         [[None, 0], [None, 0]], | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("fill", [None, -1]) | ||||
| def test_align(datetime_series, first_slice, second_slice, join_type, fill): | ||||
|     a = datetime_series[slice(*first_slice)] | ||||
|     b = datetime_series[slice(*second_slice)] | ||||
|  | ||||
|     aa, ab = a.align(b, join=join_type, fill_value=fill) | ||||
|  | ||||
|     join_index = a.index.join(b.index, how=join_type) | ||||
|     if fill is not None: | ||||
|         diff_a = aa.index.difference(join_index) | ||||
|         diff_b = ab.index.difference(join_index) | ||||
|         if len(diff_a) > 0: | ||||
|             assert (aa.reindex(diff_a) == fill).all() | ||||
|         if len(diff_b) > 0: | ||||
|             assert (ab.reindex(diff_b) == fill).all() | ||||
|  | ||||
|     ea = a.reindex(join_index) | ||||
|     eb = b.reindex(join_index) | ||||
|  | ||||
|     if fill is not None: | ||||
|         ea = ea.fillna(fill) | ||||
|         eb = eb.fillna(fill) | ||||
|  | ||||
|     tm.assert_series_equal(aa, ea) | ||||
|     tm.assert_series_equal(ab, eb) | ||||
|     assert aa.name == "ts" | ||||
|     assert ea.name == "ts" | ||||
|     assert ab.name == "ts" | ||||
|     assert eb.name == "ts" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "first_slice,second_slice", | ||||
|     [ | ||||
|         [[2, None], [None, -5]], | ||||
|         [[None, 0], [None, -5]], | ||||
|         [[None, -5], [None, 0]], | ||||
|         [[None, 0], [None, 0]], | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("method", ["pad", "bfill"]) | ||||
| @pytest.mark.parametrize("limit", [None, 1]) | ||||
| def test_align_fill_method( | ||||
|     datetime_series, first_slice, second_slice, join_type, method, limit | ||||
| ): | ||||
|     a = datetime_series[slice(*first_slice)] | ||||
|     b = datetime_series[slice(*second_slice)] | ||||
|  | ||||
|     msg = ( | ||||
|         "The 'method', 'limit', and 'fill_axis' keywords in Series.align " | ||||
|         "are deprecated" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         aa, ab = a.align(b, join=join_type, method=method, limit=limit) | ||||
|  | ||||
|     join_index = a.index.join(b.index, how=join_type) | ||||
|     ea = a.reindex(join_index) | ||||
|     eb = b.reindex(join_index) | ||||
|  | ||||
|     msg2 = "Series.fillna with 'method' is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|         ea = ea.fillna(method=method, limit=limit) | ||||
|         eb = eb.fillna(method=method, limit=limit) | ||||
|  | ||||
|     tm.assert_series_equal(aa, ea) | ||||
|     tm.assert_series_equal(ab, eb) | ||||
|  | ||||
|  | ||||
| def test_align_nocopy(datetime_series, using_copy_on_write): | ||||
|     b = datetime_series[:5].copy() | ||||
|  | ||||
|     # do copy | ||||
|     a = datetime_series.copy() | ||||
|     ra, _ = a.align(b, join="left") | ||||
|     ra[:5] = 5 | ||||
|     assert not (a[:5] == 5).any() | ||||
|  | ||||
|     # do not copy | ||||
|     a = datetime_series.copy() | ||||
|     ra, _ = a.align(b, join="left", copy=False) | ||||
|     ra[:5] = 5 | ||||
|     if using_copy_on_write: | ||||
|         assert not (a[:5] == 5).any() | ||||
|     else: | ||||
|         assert (a[:5] == 5).all() | ||||
|  | ||||
|     # do copy | ||||
|     a = datetime_series.copy() | ||||
|     b = datetime_series[:5].copy() | ||||
|     _, rb = a.align(b, join="right") | ||||
|     rb[:3] = 5 | ||||
|     assert not (b[:3] == 5).any() | ||||
|  | ||||
|     # do not copy | ||||
|     a = datetime_series.copy() | ||||
|     b = datetime_series[:5].copy() | ||||
|     _, rb = a.align(b, join="right", copy=False) | ||||
|     rb[:2] = 5 | ||||
|     if using_copy_on_write: | ||||
|         assert not (b[:2] == 5).any() | ||||
|     else: | ||||
|         assert (b[:2] == 5).all() | ||||
|  | ||||
|  | ||||
| def test_align_same_index(datetime_series, using_copy_on_write): | ||||
|     a, b = datetime_series.align(datetime_series, copy=False) | ||||
|     if not using_copy_on_write: | ||||
|         assert a.index is datetime_series.index | ||||
|         assert b.index is datetime_series.index | ||||
|     else: | ||||
|         assert a.index.is_(datetime_series.index) | ||||
|         assert b.index.is_(datetime_series.index) | ||||
|  | ||||
|     a, b = datetime_series.align(datetime_series, copy=True) | ||||
|     assert a.index is not datetime_series.index | ||||
|     assert b.index is not datetime_series.index | ||||
|     assert a.index.is_(datetime_series.index) | ||||
|     assert b.index.is_(datetime_series.index) | ||||
|  | ||||
|  | ||||
| def test_align_multiindex(): | ||||
|     # GH 10665 | ||||
|  | ||||
|     midx = pd.MultiIndex.from_product( | ||||
|         [range(2), range(3), range(2)], names=("a", "b", "c") | ||||
|     ) | ||||
|     idx = pd.Index(range(2), name="b") | ||||
|     s1 = Series(np.arange(12, dtype="int64"), index=midx) | ||||
|     s2 = Series(np.arange(2, dtype="int64"), index=idx) | ||||
|  | ||||
|     # these must be the same results (but flipped) | ||||
|     res1l, res1r = s1.align(s2, join="left") | ||||
|     res2l, res2r = s2.align(s1, join="right") | ||||
|  | ||||
|     expl = s1 | ||||
|     tm.assert_series_equal(expl, res1l) | ||||
|     tm.assert_series_equal(expl, res2r) | ||||
|     expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) | ||||
|     tm.assert_series_equal(expr, res1r) | ||||
|     tm.assert_series_equal(expr, res2l) | ||||
|  | ||||
|     res1l, res1r = s1.align(s2, join="right") | ||||
|     res2l, res2r = s2.align(s1, join="left") | ||||
|  | ||||
|     exp_idx = pd.MultiIndex.from_product( | ||||
|         [range(2), range(2), range(2)], names=("a", "b", "c") | ||||
|     ) | ||||
|     expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) | ||||
|     tm.assert_series_equal(expl, res1l) | ||||
|     tm.assert_series_equal(expl, res2r) | ||||
|     expr = Series([0, 0, 1, 1] * 2, index=exp_idx) | ||||
|     tm.assert_series_equal(expr, res1r) | ||||
|     tm.assert_series_equal(expr, res2l) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None]) | ||||
| def test_align_with_dataframe_method(method): | ||||
|     # GH31788 | ||||
|     ser = Series(range(3), index=range(3)) | ||||
|     df = pd.DataFrame(0.0, index=range(3), columns=range(3)) | ||||
|  | ||||
|     msg = ( | ||||
|         "The 'method', 'limit', and 'fill_axis' keywords in Series.align " | ||||
|         "are deprecated" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result_ser, result_df = ser.align(df, method=method) | ||||
|     tm.assert_series_equal(result_ser, ser) | ||||
|     tm.assert_frame_equal(result_df, df) | ||||
|  | ||||
|  | ||||
| def test_align_dt64tzindex_mismatched_tzs(): | ||||
|     idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern") | ||||
|     ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1) | ||||
|     ser_central = ser.tz_convert("US/Central") | ||||
|     # different timezones convert to UTC | ||||
|  | ||||
|     new1, new2 = ser.align(ser_central) | ||||
|     assert new1.index.tz is timezone.utc | ||||
|     assert new2.index.tz is timezone.utc | ||||
|  | ||||
|  | ||||
| def test_align_periodindex(join_type): | ||||
|     rng = period_range("1/1/2000", "1/1/2010", freq="Y") | ||||
|     ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) | ||||
|  | ||||
|     # TODO: assert something? | ||||
|     ts.align(ts[::2], join=join_type) | ||||
|  | ||||
|  | ||||
| def test_align_stringindex(any_string_dtype): | ||||
|     left = Series(range(3), index=pd.Index(["a", "b", "d"], dtype=any_string_dtype)) | ||||
|     right = Series(range(3), index=pd.Index(["a", "b", "c"], dtype=any_string_dtype)) | ||||
|     result_left, result_right = left.align(right) | ||||
|  | ||||
|     expected_idx = pd.Index(["a", "b", "c", "d"], dtype=any_string_dtype) | ||||
|     expected_left = Series([0, 1, np.nan, 2], index=expected_idx) | ||||
|     expected_right = Series([0, 1, 2, np.nan], index=expected_idx) | ||||
|  | ||||
|     tm.assert_series_equal(result_left, expected_left) | ||||
|     tm.assert_series_equal(result_right, expected_right) | ||||
|  | ||||
|  | ||||
| def test_align_left_fewer_levels(): | ||||
|     # GH#45224 | ||||
|     left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"])) | ||||
|     right = Series( | ||||
|         [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"]) | ||||
|     ) | ||||
|     result_left, result_right = left.align(right) | ||||
|  | ||||
|     expected_right = Series( | ||||
|         [1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"]) | ||||
|     ) | ||||
|     expected_left = Series( | ||||
|         [2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"]) | ||||
|     ) | ||||
|     tm.assert_series_equal(result_left, expected_left) | ||||
|     tm.assert_series_equal(result_right, expected_right) | ||||
|  | ||||
|  | ||||
| def test_align_left_different_named_levels(): | ||||
|     # GH#45224 | ||||
|     left = Series( | ||||
|         [2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"]) | ||||
|     ) | ||||
|     right = Series( | ||||
|         [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"]) | ||||
|     ) | ||||
|     result_left, result_right = left.align(right) | ||||
|  | ||||
|     expected_left = Series( | ||||
|         [2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"]) | ||||
|     ) | ||||
|     expected_right = Series( | ||||
|         [1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"]) | ||||
|     ) | ||||
|     tm.assert_series_equal(result_left, expected_left) | ||||
|     tm.assert_series_equal(result_right, expected_right) | ||||
| @ -0,0 +1,84 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesArgsort: | ||||
|     def test_argsort_axis(self): | ||||
|         # GH#54257 | ||||
|         ser = Series(range(3)) | ||||
|  | ||||
|         msg = "No axis named 2 for object type Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.argsort(axis=2) | ||||
|  | ||||
|     def test_argsort_numpy(self, datetime_series): | ||||
|         ser = datetime_series | ||||
|  | ||||
|         res = np.argsort(ser).values | ||||
|         expected = np.argsort(np.array(ser)) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         # with missing values | ||||
|         ts = ser.copy() | ||||
|         ts[::2] = np.nan | ||||
|  | ||||
|         msg = "The behavior of Series.argsort in the presence of NA values" | ||||
|         with tm.assert_produces_warning( | ||||
|             FutureWarning, match=msg, check_stacklevel=False | ||||
|         ): | ||||
|             result = np.argsort(ts)[1::2] | ||||
|         expected = np.argsort(np.array(ts.dropna())) | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result.values, expected) | ||||
|  | ||||
|     def test_argsort(self, datetime_series): | ||||
|         argsorted = datetime_series.argsort() | ||||
|         assert issubclass(argsorted.dtype.type, np.integer) | ||||
|  | ||||
|     def test_argsort_dt64(self, unit): | ||||
|         # GH#2967 (introduced bug in 0.11-dev I think) | ||||
|         ser = Series( | ||||
|             [Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]" | ||||
|         ) | ||||
|         assert ser.dtype == f"datetime64[{unit}]" | ||||
|         shifted = ser.shift(-1) | ||||
|         assert shifted.dtype == f"datetime64[{unit}]" | ||||
|         assert isna(shifted[4]) | ||||
|  | ||||
|         result = ser.argsort() | ||||
|         expected = Series(range(5), dtype=np.intp) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         msg = "The behavior of Series.argsort in the presence of NA values" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = shifted.argsort() | ||||
|         expected = Series(list(range(4)) + [-1], dtype=np.intp) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_argsort_stable(self): | ||||
|         ser = Series(np.random.default_rng(2).integers(0, 100, size=10000)) | ||||
|         mindexer = ser.argsort(kind="mergesort") | ||||
|         qindexer = ser.argsort() | ||||
|  | ||||
|         mexpected = np.argsort(ser.values, kind="mergesort") | ||||
|         qexpected = np.argsort(ser.values, kind="quicksort") | ||||
|  | ||||
|         tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected)) | ||||
|         tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected)) | ||||
|         msg = ( | ||||
|             r"ndarray Expected type <class 'numpy\.ndarray'>, " | ||||
|             r"found <class 'pandas\.core\.series\.Series'> instead" | ||||
|         ) | ||||
|         with pytest.raises(AssertionError, match=msg): | ||||
|             tm.assert_numpy_array_equal(qindexer, mindexer) | ||||
|  | ||||
|     def test_argsort_preserve_name(self, datetime_series): | ||||
|         result = datetime_series.argsort() | ||||
|         assert result.name == datetime_series.name | ||||
| @ -0,0 +1,205 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import IncompatibleFrequency | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     PeriodIndex, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     isna, | ||||
|     notna, | ||||
|     offsets, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesAsof: | ||||
|     def test_asof_nanosecond_index_access(self): | ||||
|         ts = Timestamp("20130101").as_unit("ns")._value | ||||
|         dti = DatetimeIndex([ts + 50 + i for i in range(100)]) | ||||
|         ser = Series(np.random.default_rng(2).standard_normal(100), index=dti) | ||||
|  | ||||
|         first_value = ser.asof(ser.index[0]) | ||||
|  | ||||
|         # GH#46903 previously incorrectly was "day" | ||||
|         assert dti.resolution == "nanosecond" | ||||
|  | ||||
|         # this used to not work bc parsing was done by dateutil that didn't | ||||
|         #  handle nanoseconds | ||||
|         assert first_value == ser["2013-01-01 00:00:00.000000050"] | ||||
|  | ||||
|         expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns") | ||||
|         assert first_value == ser[Timestamp(expected_ts)] | ||||
|  | ||||
|     def test_basic(self): | ||||
|         # array or list or dates | ||||
|         N = 50 | ||||
|         rng = date_range("1/1/1990", periods=N, freq="53s") | ||||
|         ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) | ||||
|         ts.iloc[15:30] = np.nan | ||||
|         dates = date_range("1/1/1990", periods=N * 3, freq="25s") | ||||
|  | ||||
|         result = ts.asof(dates) | ||||
|         assert notna(result).all() | ||||
|         lb = ts.index[14] | ||||
|         ub = ts.index[30] | ||||
|  | ||||
|         result = ts.asof(list(dates)) | ||||
|         assert notna(result).all() | ||||
|         lb = ts.index[14] | ||||
|         ub = ts.index[30] | ||||
|  | ||||
|         mask = (result.index >= lb) & (result.index < ub) | ||||
|         rs = result[mask] | ||||
|         assert (rs == ts[lb]).all() | ||||
|  | ||||
|         val = result[result.index[result.index >= ub][0]] | ||||
|         assert ts[ub] == val | ||||
|  | ||||
|     def test_scalar(self): | ||||
|         N = 30 | ||||
|         rng = date_range("1/1/1990", periods=N, freq="53s") | ||||
|         # Explicit cast to float avoid implicit cast when setting nan | ||||
|         ts = Series(np.arange(N), index=rng, dtype="float") | ||||
|         ts.iloc[5:10] = np.nan | ||||
|         ts.iloc[15:20] = np.nan | ||||
|  | ||||
|         val1 = ts.asof(ts.index[7]) | ||||
|         val2 = ts.asof(ts.index[19]) | ||||
|  | ||||
|         assert val1 == ts.iloc[4] | ||||
|         assert val2 == ts.iloc[14] | ||||
|  | ||||
|         # accepts strings | ||||
|         val1 = ts.asof(str(ts.index[7])) | ||||
|         assert val1 == ts.iloc[4] | ||||
|  | ||||
|         # in there | ||||
|         result = ts.asof(ts.index[3]) | ||||
|         assert result == ts.iloc[3] | ||||
|  | ||||
|         # no as of value | ||||
|         d = ts.index[0] - offsets.BDay() | ||||
|         assert np.isnan(ts.asof(d)) | ||||
|  | ||||
|     def test_with_nan(self): | ||||
|         # basic asof test | ||||
|         rng = date_range("1/1/2000", "1/2/2000", freq="4h") | ||||
|         s = Series(np.arange(len(rng)), index=rng) | ||||
|         r = s.resample("2h").mean() | ||||
|  | ||||
|         result = r.asof(r.index) | ||||
|         expected = Series( | ||||
|             [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0], | ||||
|             index=date_range("1/1/2000", "1/2/2000", freq="2h"), | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         r.iloc[3:5] = np.nan | ||||
|         result = r.asof(r.index) | ||||
|         expected = Series( | ||||
|             [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0], | ||||
|             index=date_range("1/1/2000", "1/2/2000", freq="2h"), | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         r.iloc[-3:] = np.nan | ||||
|         result = r.asof(r.index) | ||||
|         expected = Series( | ||||
|             [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0], | ||||
|             index=date_range("1/1/2000", "1/2/2000", freq="2h"), | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_periodindex(self): | ||||
|         # array or list or dates | ||||
|         N = 50 | ||||
|         rng = period_range("1/1/1990", periods=N, freq="h") | ||||
|         ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) | ||||
|         ts.iloc[15:30] = np.nan | ||||
|         dates = date_range("1/1/1990", periods=N * 3, freq="37min") | ||||
|  | ||||
|         result = ts.asof(dates) | ||||
|         assert notna(result).all() | ||||
|         lb = ts.index[14] | ||||
|         ub = ts.index[30] | ||||
|  | ||||
|         result = ts.asof(list(dates)) | ||||
|         assert notna(result).all() | ||||
|         lb = ts.index[14] | ||||
|         ub = ts.index[30] | ||||
|  | ||||
|         pix = PeriodIndex(result.index.values, freq="h") | ||||
|         mask = (pix >= lb) & (pix < ub) | ||||
|         rs = result[mask] | ||||
|         assert (rs == ts[lb]).all() | ||||
|  | ||||
|         ts.iloc[5:10] = np.nan | ||||
|         ts.iloc[15:20] = np.nan | ||||
|  | ||||
|         val1 = ts.asof(ts.index[7]) | ||||
|         val2 = ts.asof(ts.index[19]) | ||||
|  | ||||
|         assert val1 == ts.iloc[4] | ||||
|         assert val2 == ts.iloc[14] | ||||
|  | ||||
|         # accepts strings | ||||
|         val1 = ts.asof(str(ts.index[7])) | ||||
|         assert val1 == ts.iloc[4] | ||||
|  | ||||
|         # in there | ||||
|         assert ts.asof(ts.index[3]) == ts.iloc[3] | ||||
|  | ||||
|         # no as of value | ||||
|         d = ts.index[0].to_timestamp() - offsets.BDay() | ||||
|         assert isna(ts.asof(d)) | ||||
|  | ||||
|         # Mismatched freq | ||||
|         msg = "Input has different freq" | ||||
|         with pytest.raises(IncompatibleFrequency, match=msg): | ||||
|             ts.asof(rng.asfreq("D")) | ||||
|  | ||||
|     def test_errors(self): | ||||
|         s = Series( | ||||
|             [1, 2, 3], | ||||
|             index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")], | ||||
|         ) | ||||
|  | ||||
|         # non-monotonic | ||||
|         assert not s.index.is_monotonic_increasing | ||||
|         with pytest.raises(ValueError, match="requires a sorted index"): | ||||
|             s.asof(s.index[0]) | ||||
|  | ||||
|         # subset with Series | ||||
|         N = 10 | ||||
|         rng = date_range("1/1/1990", periods=N, freq="53s") | ||||
|         s = Series(np.random.default_rng(2).standard_normal(N), index=rng) | ||||
|         with pytest.raises(ValueError, match="not valid for Series"): | ||||
|             s.asof(s.index[0], subset="foo") | ||||
|  | ||||
|     def test_all_nans(self): | ||||
|         # GH 15713 | ||||
|         # series is all nans | ||||
|  | ||||
|         # testing non-default indexes | ||||
|         N = 50 | ||||
|         rng = date_range("1/1/1990", periods=N, freq="53s") | ||||
|  | ||||
|         dates = date_range("1/1/1990", periods=N * 3, freq="25s") | ||||
|         result = Series(np.nan, index=rng).asof(dates) | ||||
|         expected = Series(np.nan, index=dates) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # testing scalar input | ||||
|         date = date_range("1/1/1990", periods=N * 3, freq="25s")[0] | ||||
|         result = Series(np.nan, index=rng).asof(date) | ||||
|         assert isna(result) | ||||
|  | ||||
|         # test name is propagated | ||||
|         result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5]) | ||||
|         expected = Series(np.nan, index=[4, 5], name="test") | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,689 @@ | ||||
| from datetime import ( | ||||
|     datetime, | ||||
|     timedelta, | ||||
| ) | ||||
| from importlib import reload | ||||
| import string | ||||
| import sys | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.tslibs import iNaT | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Categorical, | ||||
|     CategoricalDtype, | ||||
|     DatetimeTZDtype, | ||||
|     Index, | ||||
|     Interval, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     cut, | ||||
|     date_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def rand_str(nchars: int) -> str: | ||||
|     """ | ||||
|     Generate one random byte string. | ||||
|     """ | ||||
|     RANDS_CHARS = np.array( | ||||
|         list(string.ascii_letters + string.digits), dtype=(np.str_, 1) | ||||
|     ) | ||||
|     return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars)) | ||||
|  | ||||
|  | ||||
| class TestAstypeAPI: | ||||
|     def test_astype_unitless_dt64_raises(self): | ||||
|         # GH#47844 | ||||
|         ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]") | ||||
|         df = ser.to_frame() | ||||
|  | ||||
|         msg = "Casting to unit-less dtype 'datetime64' is not supported" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.astype(np.datetime64) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.astype(np.datetime64) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.astype("datetime64") | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.astype("datetime64") | ||||
|  | ||||
|     def test_arg_for_errors_in_astype(self): | ||||
|         # see GH#14878 | ||||
|         ser = Series([1, 2, 3]) | ||||
|  | ||||
|         msg = ( | ||||
|             r"Expected value of kwarg 'errors' to be one of \['raise', " | ||||
|             r"'ignore'\]\. Supplied value is 'False'" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.astype(np.float64, errors=False) | ||||
|  | ||||
|         ser.astype(np.int8, errors="raise") | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype_class", [dict, Series]) | ||||
|     def test_astype_dict_like(self, dtype_class): | ||||
|         # see GH#7271 | ||||
|         ser = Series(range(0, 10, 2), name="abc") | ||||
|  | ||||
|         dt1 = dtype_class({"abc": str}) | ||||
|         result = ser.astype(dt1) | ||||
|         expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype="str") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         dt2 = dtype_class({"abc": "float64"}) | ||||
|         result = ser.astype(dt2) | ||||
|         expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         dt3 = dtype_class({"abc": str, "def": str}) | ||||
|         msg = ( | ||||
|             "Only the Series name can be used for the key in Series dtype " | ||||
|             r"mappings\." | ||||
|         ) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.astype(dt3) | ||||
|  | ||||
|         dt4 = dtype_class({0: str}) | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.astype(dt4) | ||||
|  | ||||
|         # GH#16717 | ||||
|         # if dtypes provided is empty, it should error | ||||
|         if dtype_class is Series: | ||||
|             dt5 = dtype_class({}, dtype=object) | ||||
|         else: | ||||
|             dt5 = dtype_class({}) | ||||
|  | ||||
|         with pytest.raises(KeyError, match=msg): | ||||
|             ser.astype(dt5) | ||||
|  | ||||
|  | ||||
| class TestAstype: | ||||
|     @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) | ||||
|     def test_astype_object_to_dt64_non_nano(self, tz): | ||||
|         # GH#55756, GH#54620 | ||||
|         ts = Timestamp("2999-01-01") | ||||
|         dtype = "M8[us]" | ||||
|         if tz is not None: | ||||
|             dtype = f"M8[us, {tz}]" | ||||
|         vals = [ts, "2999-01-02 03:04:05.678910", 2500] | ||||
|         ser = Series(vals, dtype=object) | ||||
|         result = ser.astype(dtype) | ||||
|  | ||||
|         # The 2500 is interpreted as microseconds, consistent with what | ||||
|         #  we would get if we created DatetimeIndexes from vals[:2] and vals[2:] | ||||
|         #  and concated the results. | ||||
|         pointwise = [ | ||||
|             vals[0].tz_localize(tz), | ||||
|             Timestamp(vals[1], tz=tz), | ||||
|             to_datetime(vals[2], unit="us", utc=True).tz_convert(tz), | ||||
|         ] | ||||
|         exp_vals = [x.as_unit("us").asm8 for x in pointwise] | ||||
|         exp_arr = np.array(exp_vals, dtype="M8[us]") | ||||
|         expected = Series(exp_arr, dtype="M8[us]") | ||||
|         if tz is not None: | ||||
|             expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_mixed_object_to_dt64tz(self): | ||||
|         # pre-2.0 this raised ValueError bc of tz mismatch | ||||
|         # xref GH#32581 | ||||
|         ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific") | ||||
|         ts2 = ts.tz_convert("Asia/Tokyo") | ||||
|  | ||||
|         ser = Series([ts, ts2], dtype=object) | ||||
|         res = ser.astype("datetime64[ns, Europe/Brussels]") | ||||
|         expected = Series( | ||||
|             [ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")], | ||||
|             dtype="datetime64[ns, Europe/Brussels]", | ||||
|         ) | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", np.typecodes["All"]) | ||||
|     def test_astype_empty_constructor_equality(self, dtype): | ||||
|         # see GH#15524 | ||||
|  | ||||
|         if dtype not in ( | ||||
|             "S", | ||||
|             "V",  # poor support (if any) currently | ||||
|             "M", | ||||
|             "m",  # Generic timestamps raise a ValueError. Already tested. | ||||
|         ): | ||||
|             init_empty = Series([], dtype=dtype) | ||||
|             as_type_empty = Series([]).astype(dtype) | ||||
|             tm.assert_series_equal(init_empty, as_type_empty) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [str, np.str_]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "series", | ||||
|         [ | ||||
|             Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]), | ||||
|             Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_str_map(self, dtype, series, using_infer_string): | ||||
|         # see GH#4405 | ||||
|         using_string_dtype = using_infer_string and dtype is str | ||||
|         result = series.astype(dtype) | ||||
|         if using_string_dtype: | ||||
|             expected = series.map(lambda val: str(val) if val is not np.nan else np.nan) | ||||
|         else: | ||||
|             expected = series.map(str) | ||||
|             if using_infer_string: | ||||
|                 expected = expected.astype(object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_float_to_period(self): | ||||
|         result = Series([np.nan]).astype("period[D]") | ||||
|         expected = Series([NaT], dtype="period[D]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_no_pandas_dtype(self): | ||||
|         # https://github.com/pandas-dev/pandas/pull/24866 | ||||
|         ser = Series([1, 2], dtype="int64") | ||||
|         # Don't have NumpyEADtype in the public API, so we use `.array.dtype`, | ||||
|         # which is a NumpyEADtype. | ||||
|         result = ser.astype(ser.array.dtype) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) | ||||
|     def test_astype_generic_timestamp_no_frequency(self, dtype, request): | ||||
|         # see GH#15524, GH#15987 | ||||
|         data = [1] | ||||
|         ser = Series(data) | ||||
|  | ||||
|         if np.dtype(dtype).name not in ["timedelta64", "datetime64"]: | ||||
|             mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit") | ||||
|             request.applymarker(mark) | ||||
|  | ||||
|         msg = ( | ||||
|             rf"The '{dtype.__name__}' dtype has no unit\. " | ||||
|             rf"Please pass in '{dtype.__name__}\[ns\]' instead." | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.astype(dtype) | ||||
|  | ||||
|     def test_astype_dt64_to_str(self): | ||||
|         # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex | ||||
|         dti = date_range("2012-01-01", periods=3) | ||||
|         result = Series(dti).astype(str) | ||||
|         expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype="str") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_dt64tz_to_str(self): | ||||
|         # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex | ||||
|         dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern") | ||||
|         result = Series(dti_tz).astype(str) | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 "2012-01-01 00:00:00-05:00", | ||||
|                 "2012-01-02 00:00:00-05:00", | ||||
|                 "2012-01-03 00:00:00-05:00", | ||||
|             ], | ||||
|             dtype="str", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_datetime(self, unit): | ||||
|         ser = Series(iNaT, dtype=f"M8[{unit}]", index=range(5)) | ||||
|  | ||||
|         ser = ser.astype("O") | ||||
|         assert ser.dtype == np.object_ | ||||
|  | ||||
|         ser = Series([datetime(2001, 1, 2, 0, 0)]) | ||||
|  | ||||
|         ser = ser.astype("O") | ||||
|         assert ser.dtype == np.object_ | ||||
|  | ||||
|         ser = Series( | ||||
|             [datetime(2001, 1, 2, 0, 0) for i in range(3)], dtype=f"M8[{unit}]" | ||||
|         ) | ||||
|  | ||||
|         ser[1] = np.nan | ||||
|         assert ser.dtype == f"M8[{unit}]" | ||||
|  | ||||
|         ser = ser.astype("O") | ||||
|         assert ser.dtype == np.object_ | ||||
|  | ||||
|     def test_astype_datetime64tz(self): | ||||
|         ser = Series(date_range("20130101", periods=3, tz="US/Eastern")) | ||||
|  | ||||
|         # astype | ||||
|         result = ser.astype(object) | ||||
|         expected = Series(ser.astype(object), dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|         # astype - object, preserves on construction | ||||
|         result = Series(ser.astype(object)) | ||||
|         expected = ser.astype(object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # astype - datetime64[ns, tz] | ||||
|         msg = "Cannot use .astype to convert from timezone-naive" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # dt64->dt64tz astype deprecated | ||||
|             Series(ser.values).astype("datetime64[ns, US/Eastern]") | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             # dt64->dt64tz astype deprecated | ||||
|             Series(ser.values).astype(ser.dtype) | ||||
|  | ||||
|         result = ser.astype("datetime64[ns, CET]") | ||||
|         expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_str_cast_dt64(self): | ||||
|         # see GH#9757 | ||||
|         ts = Series([Timestamp("2010-01-04 00:00:00")]) | ||||
|         res = ts.astype(str) | ||||
|  | ||||
|         expected = Series(["2010-01-04"], dtype="str") | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|         ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")]) | ||||
|         res = ts.astype(str) | ||||
|  | ||||
|         expected = Series(["2010-01-04 00:00:00-05:00"], dtype="str") | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     def test_astype_str_cast_td64(self): | ||||
|         # see GH#9757 | ||||
|  | ||||
|         td = Series([Timedelta(1, unit="d")]) | ||||
|         ser = td.astype(str) | ||||
|  | ||||
|         expected = Series(["1 days"], dtype="str") | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_dt64_series_astype_object(self): | ||||
|         dt64ser = Series(date_range("20130101", periods=3)) | ||||
|         result = dt64ser.astype(object) | ||||
|         assert isinstance(result.iloc[0], datetime) | ||||
|         assert result.dtype == np.object_ | ||||
|  | ||||
|     def test_td64_series_astype_object(self): | ||||
|         tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]") | ||||
|         result = tdser.astype(object) | ||||
|         assert isinstance(result.iloc[0], timedelta) | ||||
|         assert result.dtype == np.object_ | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, dtype", | ||||
|         [ | ||||
|             (["x", "y", "z"], "string[python]"), | ||||
|             pytest.param( | ||||
|                 ["x", "y", "z"], | ||||
|                 "string[pyarrow]", | ||||
|                 marks=td.skip_if_no("pyarrow"), | ||||
|             ), | ||||
|             (["x", "y", "z"], "category"), | ||||
|             (3 * [Timestamp("2020-01-01", tz="UTC")], None), | ||||
|             (3 * [Interval(0, 1)], None), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("errors", ["raise", "ignore"]) | ||||
|     def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35471 | ||||
|         ser = Series(data, dtype=dtype) | ||||
|         if errors == "ignore": | ||||
|             expected = ser | ||||
|             result = ser.astype(float, errors="ignore") | ||||
|             tm.assert_series_equal(result, expected) | ||||
|         else: | ||||
|             msg = "(Cannot cast)|(could not convert)" | ||||
|             with pytest.raises((ValueError, TypeError), match=msg): | ||||
|                 ser.astype(float, errors=errors) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) | ||||
|     def test_astype_from_float_to_str(self, dtype): | ||||
|         # https://github.com/pandas-dev/pandas/issues/36451 | ||||
|         ser = Series([0.1], dtype=dtype) | ||||
|         result = ser.astype(str) | ||||
|         expected = Series(["0.1"], dtype="str") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "value, string_value", | ||||
|         [ | ||||
|             (None, "None"), | ||||
|             (np.nan, "nan"), | ||||
|             (NA, "<NA>"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_to_str_preserves_na(self, value, string_value, using_infer_string): | ||||
|         # https://github.com/pandas-dev/pandas/issues/36904 | ||||
|         ser = Series(["a", "b", value], dtype=object) | ||||
|         result = ser.astype(str) | ||||
|         expected = Series( | ||||
|             ["a", "b", None if using_infer_string else string_value], dtype="str" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) | ||||
|     def test_astype(self, dtype): | ||||
|         ser = Series(np.random.default_rng(2).standard_normal(5), name="foo") | ||||
|         as_typed = ser.astype(dtype) | ||||
|  | ||||
|         assert as_typed.dtype == dtype | ||||
|         assert as_typed.name == ser.name | ||||
|  | ||||
|     @pytest.mark.parametrize("value", [np.nan, np.inf]) | ||||
|     @pytest.mark.parametrize("dtype", [np.int32, np.int64]) | ||||
|     def test_astype_cast_nan_inf_int(self, dtype, value): | ||||
|         # gh-14265: check NaN and inf raise error when converting to int | ||||
|         msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" | ||||
|         ser = Series([value]) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.astype(dtype) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) | ||||
|     def test_astype_cast_object_int_fail(self, dtype): | ||||
|         arr = Series(["car", "house", "tree", "1"]) | ||||
|         msg = r"invalid literal for int\(\) with base 10: 'car'" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             arr.astype(dtype) | ||||
|  | ||||
|     def test_astype_float_to_uint_negatives_raise( | ||||
|         self, float_numpy_dtype, any_unsigned_int_numpy_dtype | ||||
|     ): | ||||
|         # GH#45151 We don't cast negative numbers to nonsense values | ||||
|         # TODO: same for EA float/uint dtypes, signed integers? | ||||
|         arr = np.arange(5).astype(float_numpy_dtype) - 3  # includes negatives | ||||
|         ser = Series(arr) | ||||
|  | ||||
|         msg = "Cannot losslessly cast from .* to .*" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.astype(any_unsigned_int_numpy_dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.to_frame().astype(any_unsigned_int_numpy_dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             # We currently catch and re-raise in Index.astype | ||||
|             Index(ser).astype(any_unsigned_int_numpy_dtype) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.array.astype(any_unsigned_int_numpy_dtype) | ||||
|  | ||||
|     def test_astype_cast_object_int(self): | ||||
|         arr = Series(["1", "2", "3", "4"], dtype=object) | ||||
|         result = arr.astype(int) | ||||
|  | ||||
|         tm.assert_series_equal(result, Series(np.arange(1, 5))) | ||||
|  | ||||
|     def test_astype_unicode(self, using_infer_string): | ||||
|         # see GH#7758: A bit of magic is required to set | ||||
|         # default encoding to utf-8 | ||||
|         digits = string.digits | ||||
|         test_series = [ | ||||
|             Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]), | ||||
|             Series(["データーサイエンス、お前はもう死んでいる"]), | ||||
|         ] | ||||
|  | ||||
|         former_encoding = None | ||||
|  | ||||
|         if sys.getdefaultencoding() == "utf-8": | ||||
|             # GH#45326 as of 2.0 Series.astype matches Index.astype by handling | ||||
|             #  bytes with obj.decode() instead of str(obj) | ||||
|             item = "野菜食べないとやばい" | ||||
|             ser = Series([item.encode()]) | ||||
|             result = ser.astype(np.str_) | ||||
|             expected = Series([item], dtype=object) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         for ser in test_series: | ||||
|             res = ser.astype(np.str_) | ||||
|             expec = ser.map(str) | ||||
|             if using_infer_string: | ||||
|                 expec = expec.astype(object) | ||||
|             tm.assert_series_equal(res, expec) | ||||
|  | ||||
|         # Restore the former encoding | ||||
|         if former_encoding is not None and former_encoding != "utf-8": | ||||
|             reload(sys) | ||||
|             sys.setdefaultencoding(former_encoding) | ||||
|  | ||||
|     def test_astype_bytes(self): | ||||
|         # GH#39474 | ||||
|         result = Series(["foo", "bar", "baz"]).astype(bytes) | ||||
|         assert result.dtypes == np.dtype("S3") | ||||
|  | ||||
|     def test_astype_nan_to_bool(self): | ||||
|         # GH#43018 | ||||
|         ser = Series(np.nan, dtype="object") | ||||
|         result = ser.astype("bool") | ||||
|         expected = Series(True, dtype="bool") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype", | ||||
|         tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES, | ||||
|     ) | ||||
|     def test_astype_ea_to_datetimetzdtype(self, dtype): | ||||
|         # GH37553 | ||||
|         ser = Series([4, 0, 9], dtype=dtype) | ||||
|         result = ser.astype(DatetimeTZDtype(tz="US/Pacific")) | ||||
|  | ||||
|         expected = Series( | ||||
|             { | ||||
|                 0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"), | ||||
|                 1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"), | ||||
|                 2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_retain_attrs(self, any_numpy_dtype): | ||||
|         # GH#44414 | ||||
|         ser = Series([0, 1, 2, 3]) | ||||
|         ser.attrs["Location"] = "Michigan" | ||||
|  | ||||
|         result = ser.astype(any_numpy_dtype).attrs | ||||
|         expected = ser.attrs | ||||
|  | ||||
|         tm.assert_dict_equal(expected, result) | ||||
|  | ||||
|  | ||||
| class TestAstypeString: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, dtype", | ||||
|         [ | ||||
|             ([True, NA], "boolean"), | ||||
|             (["A", NA], "category"), | ||||
|             (["2020-10-10", "2020-10-10"], "datetime64[ns]"), | ||||
|             (["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"), | ||||
|             ( | ||||
|                 ["2012-01-01 00:00:00-05:00", NaT], | ||||
|                 "datetime64[ns, US/Eastern]", | ||||
|             ), | ||||
|             ([1, None], "UInt16"), | ||||
|             (["1/1/2021", "2/1/2021"], "period[M]"), | ||||
|             (["1/1/2021", "2/1/2021", NaT], "period[M]"), | ||||
|             (["1 Day", "59 Days", NaT], "timedelta64[ns]"), | ||||
|             # currently no way to parse IntervalArray from a list of strings | ||||
|         ], | ||||
|     ) | ||||
|     def test_astype_string_to_extension_dtype_roundtrip( | ||||
|         self, data, dtype, request, nullable_string_dtype | ||||
|     ): | ||||
|         if dtype == "boolean": | ||||
|             mark = pytest.mark.xfail( | ||||
|                 reason="TODO StringArray.astype() with missing values #GH40566" | ||||
|             ) | ||||
|             request.applymarker(mark) | ||||
|         # GH-40351 | ||||
|         ser = Series(data, dtype=dtype) | ||||
|  | ||||
|         # Note: just passing .astype(dtype) fails for dtype="category" | ||||
|         #  with bc ser.dtype.categories will be object dtype whereas | ||||
|         #  result.dtype.categories will have string dtype | ||||
|         result = ser.astype(nullable_string_dtype).astype(ser.dtype) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| class TestAstypeCategorical: | ||||
|     def test_astype_categorical_to_other(self): | ||||
|         cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) | ||||
|         ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() | ||||
|         ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) | ||||
|  | ||||
|         expected = ser | ||||
|         tm.assert_series_equal(ser.astype("category"), expected) | ||||
|         tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) | ||||
|         msg = r"Cannot cast object|str dtype to float64" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.astype("float64") | ||||
|  | ||||
|         cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) | ||||
|         exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype="str") | ||||
|         tm.assert_series_equal(cat.astype("str"), exp) | ||||
|         s2 = Series(Categorical(["1", "2", "3", "4"])) | ||||
|         exp2 = Series([1, 2, 3, 4]).astype("int") | ||||
|         tm.assert_series_equal(s2.astype("int"), exp2) | ||||
|  | ||||
|         # object don't sort correctly, so just compare that we have the same | ||||
|         # values | ||||
|         def cmp(a, b): | ||||
|             tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) | ||||
|  | ||||
|         expected = Series(np.array(ser.values), name="value_group") | ||||
|         cmp(ser.astype("object"), expected) | ||||
|         cmp(ser.astype(np.object_), expected) | ||||
|  | ||||
|         # array conversion | ||||
|         tm.assert_almost_equal(np.array(ser), np.array(ser.values)) | ||||
|  | ||||
|         tm.assert_series_equal(ser.astype("category"), ser) | ||||
|         tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) | ||||
|  | ||||
|         roundtrip_expected = ser.cat.set_categories( | ||||
|             ser.cat.categories.sort_values() | ||||
|         ).cat.remove_unused_categories() | ||||
|         result = ser.astype("object").astype("category") | ||||
|         tm.assert_series_equal(result, roundtrip_expected) | ||||
|         result = ser.astype("object").astype(CategoricalDtype()) | ||||
|         tm.assert_series_equal(result, roundtrip_expected) | ||||
|  | ||||
|     def test_astype_categorical_invalid_conversions(self): | ||||
|         # invalid conversion (these are NOT a dtype) | ||||
|         cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) | ||||
|         ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() | ||||
|         ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) | ||||
|  | ||||
|         msg = ( | ||||
|             "dtype '<class 'pandas.core.arrays.categorical.Categorical'>' " | ||||
|             "not understood" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.astype(Categorical) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.astype("object").astype(Categorical) | ||||
|  | ||||
|     def test_astype_categoricaldtype(self): | ||||
|         ser = Series(["a", "b", "a"]) | ||||
|         result = ser.astype(CategoricalDtype(["a", "b"], ordered=True)) | ||||
|         expected = Series(Categorical(["a", "b", "a"], ordered=True)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.astype(CategoricalDtype(["a", "b"], ordered=False)) | ||||
|         expected = Series(Categorical(["a", "b", "a"], ordered=False)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False)) | ||||
|         expected = Series( | ||||
|             Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"])) | ||||
|  | ||||
|     @pytest.mark.parametrize("name", [None, "foo"]) | ||||
|     @pytest.mark.parametrize("dtype_ordered", [True, False]) | ||||
|     @pytest.mark.parametrize("series_ordered", [True, False]) | ||||
|     def test_astype_categorical_to_categorical( | ||||
|         self, name, dtype_ordered, series_ordered | ||||
|     ): | ||||
|         # GH#10696, GH#18593 | ||||
|         s_data = list("abcaacbab") | ||||
|         s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) | ||||
|         ser = Series(s_data, dtype=s_dtype, name=name) | ||||
|  | ||||
|         # unspecified categories | ||||
|         dtype = CategoricalDtype(ordered=dtype_ordered) | ||||
|         result = ser.astype(dtype) | ||||
|         exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) | ||||
|         expected = Series(s_data, name=name, dtype=exp_dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # different categories | ||||
|         dtype = CategoricalDtype(list("adc"), dtype_ordered) | ||||
|         result = ser.astype(dtype) | ||||
|         expected = Series(s_data, name=name, dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         if dtype_ordered is False: | ||||
|             # not specifying ordered, so only test once | ||||
|             expected = ser | ||||
|             result = ser.astype("category") | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_bool_missing_to_categorical(self): | ||||
|         # GH-19182 | ||||
|         ser = Series([True, False, np.nan]) | ||||
|         assert ser.dtypes == np.object_ | ||||
|  | ||||
|         result = ser.astype(CategoricalDtype(categories=[True, False])) | ||||
|         expected = Series(Categorical([True, False, np.nan], categories=[True, False])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_astype_categories_raises(self): | ||||
|         # deprecated GH#17636, removed in GH#27141 | ||||
|         ser = Series(["a", "b", "a"]) | ||||
|         with pytest.raises(TypeError, match="got an unexpected"): | ||||
|             ser.astype("category", categories=["a", "b"], ordered=True) | ||||
|  | ||||
|     @pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]]) | ||||
|     def test_astype_from_categorical(self, items): | ||||
|         ser = Series(items) | ||||
|         exp = Series(Categorical(items)) | ||||
|         res = ser.astype("category") | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_astype_from_categorical_with_keywords(self): | ||||
|         # with keywords | ||||
|         lst = ["a", "b", "c", "a"] | ||||
|         ser = Series(lst) | ||||
|         exp = Series(Categorical(lst, ordered=True)) | ||||
|         res = ser.astype(CategoricalDtype(None, ordered=True)) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) | ||||
|         res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True)) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_astype_timedelta64_with_np_nan(self): | ||||
|         # GH45798 | ||||
|         result = Series([Timedelta(1), np.nan], dtype="timedelta64[ns]") | ||||
|         expected = Series([Timedelta(1), NaT], dtype="timedelta64[ns]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @td.skip_if_no("pyarrow") | ||||
|     def test_astype_int_na_string(self): | ||||
|         # GH#57418 | ||||
|         ser = Series([12, NA], dtype="Int64[pyarrow]") | ||||
|         result = ser.astype("string[pyarrow]") | ||||
|         expected = Series(["12", NA], dtype="string[pyarrow]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,30 @@ | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| class TestAutoCorr: | ||||
|     def test_autocorr(self, datetime_series): | ||||
|         # Just run the function | ||||
|         corr1 = datetime_series.autocorr() | ||||
|  | ||||
|         # Now run it with the lag parameter | ||||
|         corr2 = datetime_series.autocorr(lag=1) | ||||
|  | ||||
|         # corr() with lag needs Series of at least length 2 | ||||
|         if len(datetime_series) <= 2: | ||||
|             assert np.isnan(corr1) | ||||
|             assert np.isnan(corr2) | ||||
|         else: | ||||
|             assert corr1 == corr2 | ||||
|  | ||||
|         # Choose a random lag between 1 and length of Series - 2 | ||||
|         # and compare the result with the Series corr() function | ||||
|         n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2)) | ||||
|         corr1 = datetime_series.corr(datetime_series.shift(n)) | ||||
|         corr2 = datetime_series.autocorr(lag=n) | ||||
|  | ||||
|         # corr() with lag needs Series of at least length 2 | ||||
|         if len(datetime_series) <= 2: | ||||
|             assert np.isnan(corr1) | ||||
|             assert np.isnan(corr2) | ||||
|         else: | ||||
|             assert corr1 == corr2 | ||||
| @ -0,0 +1,75 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestBetween: | ||||
|     def test_between(self): | ||||
|         series = Series(date_range("1/1/2000", periods=10)) | ||||
|         left, right = series[[2, 7]] | ||||
|  | ||||
|         result = series.between(left, right) | ||||
|         expected = (series >= left) & (series <= right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_between_datetime_object_dtype(self): | ||||
|         ser = Series(bdate_range("1/1/2000", periods=20), dtype=object) | ||||
|         ser[::2] = np.nan | ||||
|  | ||||
|         result = ser[ser.between(ser[3], ser[17])] | ||||
|         expected = ser[3:18].dropna() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser[ser.between(ser[3], ser[17], inclusive="neither")] | ||||
|         expected = ser[5:16].dropna() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_between_period_values(self): | ||||
|         ser = Series(period_range("2000-01-01", periods=10, freq="D")) | ||||
|         left, right = ser[[2, 7]] | ||||
|         result = ser.between(left, right) | ||||
|         expected = (ser >= left) & (ser <= right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_between_inclusive_string(self): | ||||
|         # GH 40628 | ||||
|         series = Series(date_range("1/1/2000", periods=10)) | ||||
|         left, right = series[[2, 7]] | ||||
|  | ||||
|         result = series.between(left, right, inclusive="both") | ||||
|         expected = (series >= left) & (series <= right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.between(left, right, inclusive="left") | ||||
|         expected = (series >= left) & (series < right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.between(left, right, inclusive="right") | ||||
|         expected = (series > left) & (series <= right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.between(left, right, inclusive="neither") | ||||
|         expected = (series > left) & (series < right) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("inclusive", ["yes", True, False]) | ||||
|     def test_between_error_args(self, inclusive): | ||||
|         # GH 40628 | ||||
|         series = Series(date_range("1/1/2000", periods=10)) | ||||
|         left, right = series[[2, 7]] | ||||
|  | ||||
|         value_error_msg = ( | ||||
|             "Inclusive has to be either string of 'both'," | ||||
|             "'left', 'right', or 'neither'." | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=value_error_msg): | ||||
|             series = Series(date_range("1/1/2000", periods=10)) | ||||
|             series.between(left, right, inclusive=inclusive) | ||||
| @ -0,0 +1,148 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     array as pd_array, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df(): | ||||
|     """ | ||||
|     base dataframe for testing | ||||
|     """ | ||||
|     return DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|  | ||||
|  | ||||
| def test_case_when_caselist_is_not_a_list(df): | ||||
|     """ | ||||
|     Raise ValueError if caselist is not a list. | ||||
|     """ | ||||
|     msg = "The caselist argument should be a list; " | ||||
|     msg += "instead got.+" | ||||
|     with pytest.raises(TypeError, match=msg):  # GH39154 | ||||
|         df["a"].case_when(caselist=()) | ||||
|  | ||||
|  | ||||
| def test_case_when_no_caselist(df): | ||||
|     """ | ||||
|     Raise ValueError if no caselist is provided. | ||||
|     """ | ||||
|     msg = "provide at least one boolean condition, " | ||||
|     msg += "with a corresponding replacement." | ||||
|     with pytest.raises(ValueError, match=msg):  # GH39154 | ||||
|         df["a"].case_when([]) | ||||
|  | ||||
|  | ||||
| def test_case_when_odd_caselist(df): | ||||
|     """ | ||||
|     Raise ValueError if no of caselist is odd. | ||||
|     """ | ||||
|     msg = "Argument 0 must have length 2; " | ||||
|     msg += "a condition and replacement; instead got length 3." | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df["a"].case_when([(df["a"].eq(1), 1, df.a.gt(1))]) | ||||
|  | ||||
|  | ||||
| def test_case_when_raise_error_from_mask(df): | ||||
|     """ | ||||
|     Raise Error from within Series.mask | ||||
|     """ | ||||
|     msg = "Failed to apply condition0 and replacement0." | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df["a"].case_when([(df["a"].eq(1), [1, 2])]) | ||||
|  | ||||
|  | ||||
| def test_case_when_single_condition(df): | ||||
|     """ | ||||
|     Test output on a single condition. | ||||
|     """ | ||||
|     result = Series([np.nan, np.nan, np.nan]).case_when([(df.a.eq(1), 1)]) | ||||
|     expected = Series([1, np.nan, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_multiple_conditions(df): | ||||
|     """ | ||||
|     Test output when booleans are derived from a computation | ||||
|     """ | ||||
|     result = Series([np.nan, np.nan, np.nan]).case_when( | ||||
|         [(df.a.eq(1), 1), (Series([False, True, False]), 2)] | ||||
|     ) | ||||
|     expected = Series([1, 2, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_multiple_conditions_replacement_list(df): | ||||
|     """ | ||||
|     Test output when replacement is a list | ||||
|     """ | ||||
|     result = Series([np.nan, np.nan, np.nan]).case_when( | ||||
|         [([True, False, False], 1), (df["a"].gt(1) & df["b"].eq(5), [1, 2, 3])] | ||||
|     ) | ||||
|     expected = Series([1, 2, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_multiple_conditions_replacement_extension_dtype(df): | ||||
|     """ | ||||
|     Test output when replacement has an extension dtype | ||||
|     """ | ||||
|     result = Series([np.nan, np.nan, np.nan]).case_when( | ||||
|         [ | ||||
|             ([True, False, False], 1), | ||||
|             (df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")), | ||||
|         ], | ||||
|     ) | ||||
|     expected = Series([1, 2, np.nan], dtype="Float64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_multiple_conditions_replacement_series(df): | ||||
|     """ | ||||
|     Test output when replacement is a Series | ||||
|     """ | ||||
|     result = Series([np.nan, np.nan, np.nan]).case_when( | ||||
|         [ | ||||
|             (np.array([True, False, False]), 1), | ||||
|             (df["a"].gt(1) & df["b"].eq(5), Series([1, 2, 3])), | ||||
|         ], | ||||
|     ) | ||||
|     expected = Series([1, 2, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_non_range_index(): | ||||
|     """ | ||||
|     Test output if index is not RangeIndex | ||||
|     """ | ||||
|     rng = np.random.default_rng(seed=123) | ||||
|     dates = date_range("1/1/2000", periods=8) | ||||
|     df = DataFrame( | ||||
|         rng.standard_normal(size=(8, 4)), index=dates, columns=["A", "B", "C", "D"] | ||||
|     ) | ||||
|     result = Series(5, index=df.index, name="A").case_when([(df.A.gt(0), df.B)]) | ||||
|     expected = df.A.mask(df.A.gt(0), df.B).where(df.A.gt(0), 5) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_case_when_callable(): | ||||
|     """ | ||||
|     Test output on a callable | ||||
|     """ | ||||
|     # https://numpy.org/doc/stable/reference/generated/numpy.piecewise.html | ||||
|     x = np.linspace(-2.5, 2.5, 6) | ||||
|     ser = Series(x) | ||||
|     result = ser.case_when( | ||||
|         caselist=[ | ||||
|             (lambda df: df < 0, lambda df: -df), | ||||
|             (lambda df: df >= 0, lambda df: df), | ||||
|         ] | ||||
|     ) | ||||
|     expected = np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x]) | ||||
|     tm.assert_series_equal(result, Series(expected)) | ||||
| @ -0,0 +1,146 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     isna, | ||||
|     notna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesClip: | ||||
|     def test_clip(self, datetime_series): | ||||
|         val = datetime_series.median() | ||||
|  | ||||
|         assert datetime_series.clip(lower=val).min() == val | ||||
|         assert datetime_series.clip(upper=val).max() == val | ||||
|  | ||||
|         result = datetime_series.clip(-0.5, 0.5) | ||||
|         expected = np.clip(datetime_series, -0.5, 0.5) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert isinstance(expected, Series) | ||||
|  | ||||
|     def test_clip_types_and_nulls(self): | ||||
|         sers = [ | ||||
|             Series([np.nan, 1.0, 2.0, 3.0]), | ||||
|             Series([None, "a", "b", "c"]), | ||||
|             Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")), | ||||
|         ] | ||||
|  | ||||
|         for s in sers: | ||||
|             thresh = s[2] | ||||
|             lower = s.clip(lower=thresh) | ||||
|             upper = s.clip(upper=thresh) | ||||
|             assert lower[notna(lower)].min() == thresh | ||||
|             assert upper[notna(upper)].max() == thresh | ||||
|             assert list(isna(s)) == list(isna(lower)) | ||||
|             assert list(isna(s)) == list(isna(upper)) | ||||
|  | ||||
|     def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture): | ||||
|         # Ensure that clipping method can handle NA values with out failing | ||||
|         # GH#40581 | ||||
|  | ||||
|         if nulls_fixture is pd.NaT: | ||||
|             # constructor will raise, see | ||||
|             #  test_constructor_mismatched_null_nullable_dtype | ||||
|             pytest.skip("See test_constructor_mismatched_null_nullable_dtype") | ||||
|  | ||||
|         ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype) | ||||
|         s_clipped_upper = ser.clip(upper=2.0) | ||||
|         s_clipped_lower = ser.clip(lower=2.0) | ||||
|  | ||||
|         expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype) | ||||
|         expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype) | ||||
|  | ||||
|         tm.assert_series_equal(s_clipped_upper, expected_upper) | ||||
|         tm.assert_series_equal(s_clipped_lower, expected_lower) | ||||
|  | ||||
|     def test_clip_with_na_args(self): | ||||
|         """Should process np.nan argument as None""" | ||||
|         # GH#17276 | ||||
|         s = Series([1, 2, 3]) | ||||
|  | ||||
|         tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) | ||||
|         tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) | ||||
|  | ||||
|         # GH#19992 | ||||
|         msg = "Downcasting behavior in Series and DataFrame methods 'where'" | ||||
|         # TODO: avoid this warning here?  seems like we should never be upcasting | ||||
|         #  in the first place? | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             res = s.clip(lower=[0, 4, np.nan]) | ||||
|         tm.assert_series_equal(res, Series([1, 4, 3])) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             res = s.clip(upper=[1, np.nan, 1]) | ||||
|         tm.assert_series_equal(res, Series([1, 2, 1])) | ||||
|  | ||||
|         # GH#40420 | ||||
|         s = Series([1, 2, 3]) | ||||
|         result = s.clip(0, [np.nan, np.nan, np.nan]) | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|     def test_clip_against_series(self): | ||||
|         # GH#6966 | ||||
|  | ||||
|         s = Series([1.0, 1.0, 4.0]) | ||||
|  | ||||
|         lower = Series([1.0, 2.0, 3.0]) | ||||
|         upper = Series([1.5, 2.5, 3.5]) | ||||
|  | ||||
|         tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) | ||||
|         tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) | ||||
|  | ||||
|     @pytest.mark.parametrize("inplace", [True, False]) | ||||
|     @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) | ||||
|     def test_clip_against_list_like(self, inplace, upper): | ||||
|         # GH#15390 | ||||
|         original = Series([5, 6, 7]) | ||||
|         result = original.clip(upper=upper, inplace=inplace) | ||||
|         expected = Series([1, 2, 3]) | ||||
|  | ||||
|         if inplace: | ||||
|             result = original | ||||
|         tm.assert_series_equal(result, expected, check_exact=True) | ||||
|  | ||||
|     def test_clip_with_datetimes(self): | ||||
|         # GH#11838 | ||||
|         # naive and tz-aware datetimes | ||||
|  | ||||
|         t = Timestamp("2015-12-01 09:30:30") | ||||
|         s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")]) | ||||
|         result = s.clip(upper=t) | ||||
|         expected = Series( | ||||
|             [Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern") | ||||
|         s = Series( | ||||
|             [ | ||||
|                 Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2015-12-01 09:31:00", tz="US/Eastern"), | ||||
|             ] | ||||
|         ) | ||||
|         result = s.clip(upper=t) | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), | ||||
|                 Timestamp("2015-12-01 09:30:30", tz="US/Eastern"), | ||||
|             ] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [object, "M8[us]"]) | ||||
|     def test_clip_with_timestamps_and_oob_datetimes(self, dtype): | ||||
|         # GH-42794 | ||||
|         ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) | ||||
|  | ||||
|         result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) | ||||
|         expected = Series([Timestamp.min, Timestamp.max], dtype=dtype) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,17 @@ | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCombine: | ||||
|     def test_combine_scalar(self): | ||||
|         # GH#21248 | ||||
|         # Note - combine() with another Series is tested elsewhere because | ||||
|         # it is used when testing operators | ||||
|         ser = Series([i * 10 for i in range(5)]) | ||||
|         result = ser.combine(3, lambda x, y: x + y) | ||||
|         expected = Series([i * 10 + 3 for i in range(5)]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.combine(22, lambda x, y: min(x, y)) | ||||
|         expected = Series([min(i * 10, 22) for i in range(5)]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,150 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Period, | ||||
|     Series, | ||||
|     date_range, | ||||
|     period_range, | ||||
|     to_datetime, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCombineFirst: | ||||
|     def test_combine_first_period_datetime(self): | ||||
|         # GH#3367 | ||||
|         didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME") | ||||
|         pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M") | ||||
|         # check to be consistent with DatetimeIndex | ||||
|         for idx in [didx, pidx]: | ||||
|             a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) | ||||
|             b = Series([9, 9, 9, 9, 9, 9, 9], index=idx) | ||||
|  | ||||
|             result = a.combine_first(b) | ||||
|             expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_combine_first_name(self, datetime_series): | ||||
|         result = datetime_series.combine_first(datetime_series[:5]) | ||||
|         assert result.name == datetime_series.name | ||||
|  | ||||
|     def test_combine_first(self, using_infer_string): | ||||
|         values = np.arange(20, dtype=np.float64) | ||||
|         series = Series(values, index=np.arange(20, dtype=np.int64)) | ||||
|  | ||||
|         series_copy = series * 2 | ||||
|         series_copy[::2] = np.nan | ||||
|  | ||||
|         # nothing used from the input | ||||
|         combined = series.combine_first(series_copy) | ||||
|  | ||||
|         tm.assert_series_equal(combined, series) | ||||
|  | ||||
|         # Holes filled from input | ||||
|         combined = series_copy.combine_first(series) | ||||
|         assert np.isfinite(combined).all() | ||||
|  | ||||
|         tm.assert_series_equal(combined[::2], series[::2]) | ||||
|         tm.assert_series_equal(combined[1::2], series_copy[1::2]) | ||||
|  | ||||
|         # mixed types | ||||
|         index = pd.Index([str(i) for i in range(20)]) | ||||
|         floats = Series(np.random.default_rng(2).standard_normal(20), index=index) | ||||
|         strings = Series([str(i) for i in range(10)], index=index[::2], dtype=object) | ||||
|  | ||||
|         combined = strings.combine_first(floats) | ||||
|  | ||||
|         tm.assert_series_equal(strings, combined.loc[index[::2]]) | ||||
|         tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) | ||||
|  | ||||
|         # corner case | ||||
|         ser = Series([1.0, 2, 3], index=[0, 1, 2]) | ||||
|         empty = Series([], index=[], dtype=object) | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = ser.combine_first(empty) | ||||
|         if not using_infer_string: | ||||
|             ser.index = ser.index.astype("O") | ||||
|         tm.assert_series_equal(ser, result) | ||||
|  | ||||
|     def test_combine_first_dt64(self, unit): | ||||
|         s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit) | ||||
|         s1 = to_datetime(Series([np.nan, "2011"])).dt.as_unit(unit) | ||||
|         rs = s0.combine_first(s1) | ||||
|         xp = to_datetime(Series(["2010", "2011"])).dt.as_unit(unit) | ||||
|         tm.assert_series_equal(rs, xp) | ||||
|  | ||||
|         s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit) | ||||
|         s1 = Series([np.nan, "2011"]) | ||||
|         rs = s0.combine_first(s1) | ||||
|  | ||||
|         xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]") | ||||
|  | ||||
|         tm.assert_series_equal(rs, xp) | ||||
|  | ||||
|     def test_combine_first_dt_tz_values(self, tz_naive_fixture): | ||||
|         ser1 = Series( | ||||
|             pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), | ||||
|             name="ser1", | ||||
|         ) | ||||
|         ser2 = Series( | ||||
|             pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), | ||||
|             index=[2, 3, 4], | ||||
|             name="ser2", | ||||
|         ) | ||||
|         result = ser1.combine_first(ser2) | ||||
|         exp_vals = pd.DatetimeIndex( | ||||
|             ["20150101", "20150102", "20150103", "20160515", "20160516"], | ||||
|             tz=tz_naive_fixture, | ||||
|         ) | ||||
|         exp = Series(exp_vals, name="ser1") | ||||
|         tm.assert_series_equal(exp, result) | ||||
|  | ||||
|     def test_combine_first_timezone_series_with_empty_series(self): | ||||
|         # GH 41800 | ||||
|         time_index = date_range( | ||||
|             datetime(2021, 1, 1, 1), | ||||
|             datetime(2021, 1, 1, 10), | ||||
|             freq="h", | ||||
|             tz="Europe/Rome", | ||||
|         ) | ||||
|         s1 = Series(range(10), index=time_index) | ||||
|         s2 = Series(index=time_index) | ||||
|         msg = "The behavior of array concatenation with empty entries is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s1.combine_first(s2) | ||||
|         tm.assert_series_equal(result, s1) | ||||
|  | ||||
|     def test_combine_first_preserves_dtype(self): | ||||
|         # GH51764 | ||||
|         s1 = Series([1666880195890293744, 1666880195890293837]) | ||||
|         s2 = Series([1, 2, 3]) | ||||
|         result = s1.combine_first(s2) | ||||
|         expected = Series([1666880195890293744, 1666880195890293837, 3]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_combine_mixed_timezone(self): | ||||
|         # GH 26283 | ||||
|         uniform_tz = Series({pd.Timestamp("2019-05-01", tz="UTC"): 1.0}) | ||||
|         multi_tz = Series( | ||||
|             { | ||||
|                 pd.Timestamp("2019-05-01 01:00:00+0100", tz="Europe/London"): 2.0, | ||||
|                 pd.Timestamp("2019-05-02", tz="UTC"): 3.0, | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         result = uniform_tz.combine_first(multi_tz) | ||||
|         expected = Series( | ||||
|             [1.0, 3.0], | ||||
|             index=pd.Index( | ||||
|                 [ | ||||
|                     pd.Timestamp("2019-05-01 00:00:00+00:00", tz="UTC"), | ||||
|                     pd.Timestamp("2019-05-02 00:00:00+00:00", tz="UTC"), | ||||
|                 ], | ||||
|                 dtype="object", | ||||
|             ), | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,141 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) | ||||
| def test_compare_axis(align_axis): | ||||
|     # GH#30429 | ||||
|     s1 = pd.Series(["a", "b", "c"]) | ||||
|     s2 = pd.Series(["x", "b", "z"]) | ||||
|  | ||||
|     result = s1.compare(s2, align_axis=align_axis) | ||||
|  | ||||
|     if align_axis in (1, "columns"): | ||||
|         indices = pd.Index([0, 2]) | ||||
|         columns = pd.Index(["self", "other"]) | ||||
|         expected = pd.DataFrame( | ||||
|             [["a", "x"], ["c", "z"]], index=indices, columns=columns | ||||
|         ) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|     else: | ||||
|         indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) | ||||
|         expected = pd.Series(["a", "x", "c", "z"], index=indices) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep_shape, keep_equal", | ||||
|     [ | ||||
|         (True, False), | ||||
|         (False, True), | ||||
|         (True, True), | ||||
|         # False, False case is already covered in test_compare_axis | ||||
|     ], | ||||
| ) | ||||
| def test_compare_various_formats(keep_shape, keep_equal): | ||||
|     s1 = pd.Series(["a", "b", "c"]) | ||||
|     s2 = pd.Series(["x", "b", "z"]) | ||||
|  | ||||
|     result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal) | ||||
|  | ||||
|     if keep_shape: | ||||
|         indices = pd.Index([0, 1, 2]) | ||||
|         columns = pd.Index(["self", "other"]) | ||||
|         if keep_equal: | ||||
|             expected = pd.DataFrame( | ||||
|                 [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns | ||||
|             ) | ||||
|         else: | ||||
|             expected = pd.DataFrame( | ||||
|                 [["a", "x"], [np.nan, np.nan], ["c", "z"]], | ||||
|                 index=indices, | ||||
|                 columns=columns, | ||||
|             ) | ||||
|     else: | ||||
|         indices = pd.Index([0, 2]) | ||||
|         columns = pd.Index(["self", "other"]) | ||||
|         expected = pd.DataFrame( | ||||
|             [["a", "x"], ["c", "z"]], index=indices, columns=columns | ||||
|         ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_compare_with_equal_nulls(): | ||||
|     # We want to make sure two NaNs are considered the same | ||||
|     # and dropped where applicable | ||||
|     s1 = pd.Series(["a", "b", np.nan]) | ||||
|     s2 = pd.Series(["x", "b", np.nan]) | ||||
|  | ||||
|     result = s1.compare(s2) | ||||
|     expected = pd.DataFrame([["a", "x"]], columns=["self", "other"]) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_compare_with_non_equal_nulls(): | ||||
|     # We want to make sure the relevant NaNs do not get dropped | ||||
|     s1 = pd.Series(["a", "b", "c"]) | ||||
|     s2 = pd.Series(["x", "b", np.nan]) | ||||
|  | ||||
|     result = s1.compare(s2, align_axis=0) | ||||
|  | ||||
|     indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) | ||||
|     expected = pd.Series(["a", "x", "c", np.nan], index=indices) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_compare_multi_index(): | ||||
|     index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]]) | ||||
|     s1 = pd.Series(["a", "b", "c"], index=index) | ||||
|     s2 = pd.Series(["x", "b", "z"], index=index) | ||||
|  | ||||
|     result = s1.compare(s2, align_axis=0) | ||||
|  | ||||
|     indices = pd.MultiIndex.from_arrays( | ||||
|         [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]] | ||||
|     ) | ||||
|     expected = pd.Series(["a", "x", "c", "z"], index=indices) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_compare_unaligned_objects(): | ||||
|     # test Series with different indices | ||||
|     msg = "Can only compare identically-labeled Series objects" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) | ||||
|         ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"]) | ||||
|         ser1.compare(ser2) | ||||
|  | ||||
|     # test Series with different lengths | ||||
|     msg = "Can only compare identically-labeled Series objects" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser1 = pd.Series([1, 2, 3]) | ||||
|         ser2 = pd.Series([1, 2, 3, 4]) | ||||
|         ser1.compare(ser2) | ||||
|  | ||||
|  | ||||
| def test_compare_datetime64_and_string(): | ||||
|     # Issue https://github.com/pandas-dev/pandas/issues/45506 | ||||
|     # Catch OverflowError when comparing datetime64 and string | ||||
|     data = [ | ||||
|         {"a": "2015-07-01", "b": "08335394550"}, | ||||
|         {"a": "2015-07-02", "b": "+49 (0) 0345 300033"}, | ||||
|         {"a": "2015-07-03", "b": "+49(0)2598 04457"}, | ||||
|         {"a": "2015-07-04", "b": "0741470003"}, | ||||
|         {"a": "2015-07-05", "b": "04181 83668"}, | ||||
|     ] | ||||
|     dtypes = {"a": "datetime64[ns]", "b": "string"} | ||||
|     df = pd.DataFrame(data=data).astype(dtypes) | ||||
|  | ||||
|     result_eq1 = df["a"].eq(df["b"]) | ||||
|     result_eq2 = df["a"] == df["b"] | ||||
|     result_neq = df["a"] != df["b"] | ||||
|  | ||||
|     expected_eq = pd.Series([False] * 5)  # For .eq and == | ||||
|     expected_neq = pd.Series([True] * 5)  # For != | ||||
|  | ||||
|     tm.assert_series_equal(result_eq1, expected_eq) | ||||
|     tm.assert_series_equal(result_eq2, expected_eq) | ||||
|     tm.assert_series_equal(result_neq, expected_neq) | ||||
| @ -0,0 +1,309 @@ | ||||
| from itertools import product | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas._libs import lib | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| # Each test case consists of a tuple with the data and dtype to create the | ||||
| # test Series, the default dtype for the expected result (which is valid | ||||
| # for most cases), and the specific cases where the result deviates from | ||||
| # this default. Those overrides are defined as a dict with (keyword, val) as | ||||
| # dictionary key. In case of multiple items, the last override takes precedence. | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         ( | ||||
|             # data | ||||
|             [1, 2, 3], | ||||
|             # original dtype | ||||
|             np.dtype("int32"), | ||||
|             # default expected dtype | ||||
|             "Int32", | ||||
|             # exceptions on expected dtype | ||||
|             {("convert_integer", False): np.dtype("int32")}, | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2, 3], | ||||
|             np.dtype("int64"), | ||||
|             "Int64", | ||||
|             {("convert_integer", False): np.dtype("int64")}, | ||||
|         ), | ||||
|         ( | ||||
|             ["x", "y", "z"], | ||||
|             np.dtype("O"), | ||||
|             pd.StringDtype(), | ||||
|             {("convert_string", False): np.dtype("O")}, | ||||
|         ), | ||||
|         ( | ||||
|             [True, False, np.nan], | ||||
|             np.dtype("O"), | ||||
|             pd.BooleanDtype(), | ||||
|             {("convert_boolean", False): np.dtype("O")}, | ||||
|         ), | ||||
|         ( | ||||
|             ["h", "i", np.nan], | ||||
|             np.dtype("O"), | ||||
|             pd.StringDtype(), | ||||
|             {("convert_string", False): np.dtype("O")}, | ||||
|         ), | ||||
|         (  # GH32117 | ||||
|             ["h", "i", 1], | ||||
|             np.dtype("O"), | ||||
|             np.dtype("O"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             [10, np.nan, 20], | ||||
|             np.dtype("float"), | ||||
|             "Int64", | ||||
|             { | ||||
|                 ("convert_integer", False, "convert_floating", True): "Float64", | ||||
|                 ("convert_integer", False, "convert_floating", False): np.dtype( | ||||
|                     "float" | ||||
|                 ), | ||||
|             }, | ||||
|         ), | ||||
|         ( | ||||
|             [np.nan, 100.5, 200], | ||||
|             np.dtype("float"), | ||||
|             "Float64", | ||||
|             {("convert_floating", False): np.dtype("float")}, | ||||
|         ), | ||||
|         ( | ||||
|             [3, 4, 5], | ||||
|             "Int8", | ||||
|             "Int8", | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             [[1, 2], [3, 4], [5]], | ||||
|             None, | ||||
|             np.dtype("O"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             [4, 5, 6], | ||||
|             np.dtype("uint32"), | ||||
|             "UInt32", | ||||
|             {("convert_integer", False): np.dtype("uint32")}, | ||||
|         ), | ||||
|         ( | ||||
|             [-10, 12, 13], | ||||
|             np.dtype("i1"), | ||||
|             "Int8", | ||||
|             {("convert_integer", False): np.dtype("i1")}, | ||||
|         ), | ||||
|         ( | ||||
|             [1.2, 1.3], | ||||
|             np.dtype("float32"), | ||||
|             "Float32", | ||||
|             {("convert_floating", False): np.dtype("float32")}, | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2.0], | ||||
|             object, | ||||
|             "Int64", | ||||
|             { | ||||
|                 ("convert_integer", False): "Float64", | ||||
|                 ("convert_integer", False, "convert_floating", False): np.dtype( | ||||
|                     "float" | ||||
|                 ), | ||||
|                 ("infer_objects", False): np.dtype("object"), | ||||
|             }, | ||||
|         ), | ||||
|         ( | ||||
|             [1, 2.5], | ||||
|             object, | ||||
|             "Float64", | ||||
|             { | ||||
|                 ("convert_floating", False): np.dtype("float"), | ||||
|                 ("infer_objects", False): np.dtype("object"), | ||||
|             }, | ||||
|         ), | ||||
|         (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("s"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ms"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("us"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             pd.DatetimeTZDtype(tz="UTC"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"), | ||||
|             "datetime64[ns]", | ||||
|             np.dtype("datetime64[ns]"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"), | ||||
|             object, | ||||
|             np.dtype("datetime64[ns]"), | ||||
|             {("infer_objects", False): np.dtype("object")}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.period_range("1/1/2011", freq="M", periods=3), | ||||
|             None, | ||||
|             pd.PeriodDtype("M"), | ||||
|             {}, | ||||
|         ), | ||||
|         ( | ||||
|             pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), | ||||
|             None, | ||||
|             pd.IntervalDtype("int64", "right"), | ||||
|             {}, | ||||
|         ), | ||||
|     ] | ||||
| ) | ||||
| def test_cases(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSeriesConvertDtypes: | ||||
|     @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) | ||||
|     @pytest.mark.parametrize("params", product(*[(True, False)] * 5)) | ||||
|     def test_convert_dtypes( | ||||
|         self, | ||||
|         test_cases, | ||||
|         params, | ||||
|         using_infer_string, | ||||
|     ): | ||||
|         data, maindtype, expected_default, expected_other = test_cases | ||||
|         if ( | ||||
|             hasattr(data, "dtype") | ||||
|             and lib.is_np_dtype(data.dtype, "M") | ||||
|             and isinstance(maindtype, pd.DatetimeTZDtype) | ||||
|         ): | ||||
|             # this astype is deprecated in favor of tz_localize | ||||
|             msg = "Cannot use .astype to convert from timezone-naive dtype" | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 pd.Series(data, dtype=maindtype) | ||||
|             return | ||||
|  | ||||
|         if maindtype is not None: | ||||
|             series = pd.Series(data, dtype=maindtype) | ||||
|         else: | ||||
|             series = pd.Series(data) | ||||
|  | ||||
|         result = series.convert_dtypes(*params) | ||||
|  | ||||
|         param_names = [ | ||||
|             "infer_objects", | ||||
|             "convert_string", | ||||
|             "convert_integer", | ||||
|             "convert_boolean", | ||||
|             "convert_floating", | ||||
|         ] | ||||
|         params_dict = dict(zip(param_names, params)) | ||||
|  | ||||
|         expected_dtype = expected_default | ||||
|         for spec, dtype in expected_other.items(): | ||||
|             if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])): | ||||
|                 expected_dtype = dtype | ||||
|         if ( | ||||
|             using_infer_string | ||||
|             and expected_default == "string" | ||||
|             and expected_dtype == object | ||||
|             and params[0] | ||||
|             and not params[1] | ||||
|         ): | ||||
|             # If convert_string=False and infer_objects=True, we end up with the | ||||
|             # default string dtype instead of preserving object for string data | ||||
|             expected_dtype = pd.StringDtype(na_value=np.nan) | ||||
|  | ||||
|         expected = pd.Series(data, dtype=expected_dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Test that it is a copy | ||||
|         copy = series.copy(deep=True) | ||||
|  | ||||
|         if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]: | ||||
|             with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): | ||||
|                 result[result.notna()] = np.nan | ||||
|         else: | ||||
|             result[result.notna()] = np.nan | ||||
|  | ||||
|         # Make sure original not changed | ||||
|         tm.assert_series_equal(series, copy) | ||||
|  | ||||
|     def test_convert_string_dtype(self, nullable_string_dtype): | ||||
|         # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns | ||||
|         # that are already string dtype | ||||
|         df = pd.DataFrame( | ||||
|             {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype | ||||
|         ) | ||||
|         result = df.convert_dtypes() | ||||
|         tm.assert_frame_equal(df, result) | ||||
|  | ||||
|     def test_convert_bool_dtype(self): | ||||
|         # GH32287 | ||||
|         df = pd.DataFrame({"A": pd.array([True])}) | ||||
|         tm.assert_frame_equal(df, df.convert_dtypes()) | ||||
|  | ||||
|     def test_convert_byte_string_dtype(self): | ||||
|         # GH-43183 | ||||
|         byte_str = b"binary-string" | ||||
|  | ||||
|         df = pd.DataFrame(data={"A": byte_str}, index=[0]) | ||||
|         result = df.convert_dtypes() | ||||
|         expected = df | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "infer_objects, dtype", [(True, "Int64"), (False, "object")] | ||||
|     ) | ||||
|     def test_convert_dtype_object_with_na(self, infer_objects, dtype): | ||||
|         # GH#48791 | ||||
|         ser = pd.Series([1, pd.NA]) | ||||
|         result = ser.convert_dtypes(infer_objects=infer_objects) | ||||
|         expected = pd.Series([1, pd.NA], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "infer_objects, dtype", [(True, "Float64"), (False, "object")] | ||||
|     ) | ||||
|     def test_convert_dtype_object_with_na_float(self, infer_objects, dtype): | ||||
|         # GH#48791 | ||||
|         ser = pd.Series([1.5, pd.NA]) | ||||
|         result = ser.convert_dtypes(infer_objects=infer_objects) | ||||
|         expected = pd.Series([1.5, pd.NA], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_convert_dtypes_pyarrow_to_np_nullable(self): | ||||
|         # GH 53648 | ||||
|         pytest.importorskip("pyarrow") | ||||
|         ser = pd.Series(range(2), dtype="int32[pyarrow]") | ||||
|         result = ser.convert_dtypes(dtype_backend="numpy_nullable") | ||||
|         expected = pd.Series(range(2), dtype="Int32") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_convert_dtypes_pyarrow_null(self): | ||||
|         # GH#55346 | ||||
|         pa = pytest.importorskip("pyarrow") | ||||
|         ser = pd.Series([None, None]) | ||||
|         result = ser.convert_dtypes(dtype_backend="pyarrow") | ||||
|         expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null())) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,91 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestCopy: | ||||
|     @pytest.mark.parametrize("deep", ["default", None, False, True]) | ||||
|     def test_copy(self, deep, using_copy_on_write, warn_copy_on_write): | ||||
|         ser = Series(np.arange(10), dtype="float64") | ||||
|  | ||||
|         # default deep is True | ||||
|         if deep == "default": | ||||
|             ser2 = ser.copy() | ||||
|         else: | ||||
|             ser2 = ser.copy(deep=deep) | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             # INFO(CoW) a shallow copy doesn't yet copy the data | ||||
|             # but parent will not be modified (CoW) | ||||
|             if deep is None or deep is False: | ||||
|                 assert np.may_share_memory(ser.values, ser2.values) | ||||
|             else: | ||||
|                 assert not np.may_share_memory(ser.values, ser2.values) | ||||
|  | ||||
|         with tm.assert_cow_warning(warn_copy_on_write and deep is False): | ||||
|             ser2[::2] = np.nan | ||||
|  | ||||
|         if deep is not False or using_copy_on_write: | ||||
|             # Did not modify original Series | ||||
|             assert np.isnan(ser2[0]) | ||||
|             assert not np.isnan(ser[0]) | ||||
|         else: | ||||
|             # we DID modify the original Series | ||||
|             assert np.isnan(ser2[0]) | ||||
|             assert np.isnan(ser[0]) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") | ||||
|     @pytest.mark.parametrize("deep", ["default", None, False, True]) | ||||
|     def test_copy_tzaware(self, deep, using_copy_on_write): | ||||
|         # GH#11794 | ||||
|         # copy of tz-aware | ||||
|         expected = Series([Timestamp("2012/01/01", tz="UTC")]) | ||||
|         expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) | ||||
|  | ||||
|         ser = Series([Timestamp("2012/01/01", tz="UTC")]) | ||||
|  | ||||
|         if deep == "default": | ||||
|             ser2 = ser.copy() | ||||
|         else: | ||||
|             ser2 = ser.copy(deep=deep) | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             # INFO(CoW) a shallow copy doesn't yet copy the data | ||||
|             # but parent will not be modified (CoW) | ||||
|             if deep is None or deep is False: | ||||
|                 assert np.may_share_memory(ser.values, ser2.values) | ||||
|             else: | ||||
|                 assert not np.may_share_memory(ser.values, ser2.values) | ||||
|  | ||||
|         ser2[0] = Timestamp("1999/01/01", tz="UTC") | ||||
|  | ||||
|         # default deep is True | ||||
|         if deep is not False or using_copy_on_write: | ||||
|             # Did not modify original Series | ||||
|             tm.assert_series_equal(ser2, expected2) | ||||
|             tm.assert_series_equal(ser, expected) | ||||
|         else: | ||||
|             # we DID modify the original Series | ||||
|             tm.assert_series_equal(ser2, expected2) | ||||
|             tm.assert_series_equal(ser, expected2) | ||||
|  | ||||
|     def test_copy_name(self, datetime_series): | ||||
|         result = datetime_series.copy() | ||||
|         assert result.name == datetime_series.name | ||||
|  | ||||
|     def test_copy_index_name_checking(self, datetime_series): | ||||
|         # don't want to be able to modify the index stored elsewhere after | ||||
|         # making a copy | ||||
|  | ||||
|         datetime_series.index.name = None | ||||
|         assert datetime_series.index.name is None | ||||
|         assert datetime_series is datetime_series | ||||
|  | ||||
|         cp = datetime_series.copy() | ||||
|         cp.index.name = "foo" | ||||
|         assert datetime_series.index.name is None | ||||
| @ -0,0 +1,34 @@ | ||||
| import numpy as np | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesCount: | ||||
|     def test_count(self, datetime_series): | ||||
|         assert datetime_series.count() == len(datetime_series) | ||||
|  | ||||
|         datetime_series[::2] = np.nan | ||||
|  | ||||
|         assert datetime_series.count() == np.isfinite(datetime_series).sum() | ||||
|  | ||||
|     def test_count_inf_as_na(self): | ||||
|         # GH#29478 | ||||
|         ser = Series([pd.Timestamp("1990/1/1")]) | ||||
|         msg = "use_inf_as_na option is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             with pd.option_context("use_inf_as_na", True): | ||||
|                 assert ser.count() == 1 | ||||
|  | ||||
|     def test_count_categorical(self): | ||||
|         ser = Series( | ||||
|             Categorical( | ||||
|                 [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True | ||||
|             ) | ||||
|         ) | ||||
|         result = ser.count() | ||||
|         assert result == 2 | ||||
| @ -0,0 +1,185 @@ | ||||
| import math | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesCov: | ||||
|     def test_cov(self, datetime_series): | ||||
|         # full overlap | ||||
|         tm.assert_almost_equal( | ||||
|             datetime_series.cov(datetime_series), datetime_series.std() ** 2 | ||||
|         ) | ||||
|  | ||||
|         # partial overlap | ||||
|         tm.assert_almost_equal( | ||||
|             datetime_series[:15].cov(datetime_series[5:]), | ||||
|             datetime_series[5:15].std() ** 2, | ||||
|         ) | ||||
|  | ||||
|         # No overlap | ||||
|         assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) | ||||
|  | ||||
|         # all NA | ||||
|         cp = datetime_series[:10].copy() | ||||
|         cp[:] = np.nan | ||||
|         assert isna(cp.cov(cp)) | ||||
|  | ||||
|         # min_periods | ||||
|         assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) | ||||
|  | ||||
|         ts1 = datetime_series[:15].reindex(datetime_series.index) | ||||
|         ts2 = datetime_series[5:].reindex(datetime_series.index) | ||||
|         assert isna(ts1.cov(ts2, min_periods=12)) | ||||
|  | ||||
|     @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) | ||||
|     @pytest.mark.parametrize("dtype", ["float64", "Float64"]) | ||||
|     def test_cov_ddof(self, test_ddof, dtype): | ||||
|         # GH#34611 | ||||
|         np_array1 = np.random.default_rng(2).random(10) | ||||
|         np_array2 = np.random.default_rng(2).random(10) | ||||
|  | ||||
|         s1 = Series(np_array1, dtype=dtype) | ||||
|         s2 = Series(np_array2, dtype=dtype) | ||||
|  | ||||
|         result = s1.cov(s2, ddof=test_ddof) | ||||
|         expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1] | ||||
|         assert math.isclose(expected, result) | ||||
|  | ||||
|  | ||||
| class TestSeriesCorr: | ||||
|     @pytest.mark.parametrize("dtype", ["float64", "Float64"]) | ||||
|     def test_corr(self, datetime_series, dtype): | ||||
|         stats = pytest.importorskip("scipy.stats") | ||||
|  | ||||
|         datetime_series = datetime_series.astype(dtype) | ||||
|  | ||||
|         # full overlap | ||||
|         tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) | ||||
|  | ||||
|         # partial overlap | ||||
|         tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) | ||||
|  | ||||
|         assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) | ||||
|  | ||||
|         ts1 = datetime_series[:15].reindex(datetime_series.index) | ||||
|         ts2 = datetime_series[5:].reindex(datetime_series.index) | ||||
|         assert isna(ts1.corr(ts2, min_periods=12)) | ||||
|  | ||||
|         # No overlap | ||||
|         assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) | ||||
|  | ||||
|         # all NA | ||||
|         cp = datetime_series[:10].copy() | ||||
|         cp[:] = np.nan | ||||
|         assert isna(cp.corr(cp)) | ||||
|  | ||||
|         A = Series( | ||||
|             np.arange(10, dtype=np.float64), | ||||
|             index=date_range("2020-01-01", periods=10), | ||||
|             name="ts", | ||||
|         ) | ||||
|         B = A.copy() | ||||
|         result = A.corr(B) | ||||
|         expected, _ = stats.pearsonr(A, B) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|     def test_corr_rank(self): | ||||
|         stats = pytest.importorskip("scipy.stats") | ||||
|  | ||||
|         # kendall and spearman | ||||
|         A = Series( | ||||
|             np.arange(10, dtype=np.float64), | ||||
|             index=date_range("2020-01-01", periods=10), | ||||
|             name="ts", | ||||
|         ) | ||||
|         B = A.copy() | ||||
|         A[-5:] = A[:5].copy() | ||||
|         result = A.corr(B, method="kendall") | ||||
|         expected = stats.kendalltau(A, B)[0] | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         result = A.corr(B, method="spearman") | ||||
|         expected = stats.spearmanr(A, B)[0] | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # results from R | ||||
|         A = Series( | ||||
|             [ | ||||
|                 -0.89926396, | ||||
|                 0.94209606, | ||||
|                 -1.03289164, | ||||
|                 -0.95445587, | ||||
|                 0.76910310, | ||||
|                 -0.06430576, | ||||
|                 -2.09704447, | ||||
|                 0.40660407, | ||||
|                 -0.89926396, | ||||
|                 0.94209606, | ||||
|             ] | ||||
|         ) | ||||
|         B = Series( | ||||
|             [ | ||||
|                 -1.01270225, | ||||
|                 -0.62210117, | ||||
|                 -1.56895827, | ||||
|                 0.59592943, | ||||
|                 -0.01680292, | ||||
|                 1.17258718, | ||||
|                 -1.06009347, | ||||
|                 -0.10222060, | ||||
|                 -0.89076239, | ||||
|                 0.89372375, | ||||
|             ] | ||||
|         ) | ||||
|         kexp = 0.4319297 | ||||
|         sexp = 0.5853767 | ||||
|         tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) | ||||
|         tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) | ||||
|  | ||||
|     def test_corr_invalid_method(self): | ||||
|         # GH PR #22298 | ||||
|         s1 = Series(np.random.default_rng(2).standard_normal(10)) | ||||
|         s2 = Series(np.random.default_rng(2).standard_normal(10)) | ||||
|         msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s1.corr(s2, method="____") | ||||
|  | ||||
|     def test_corr_callable_method(self, datetime_series): | ||||
|         # simple correlation example | ||||
|         # returns 1 if exact equality, 0 otherwise | ||||
|         my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 | ||||
|  | ||||
|         # simple example | ||||
|         s1 = Series([1, 2, 3, 4, 5]) | ||||
|         s2 = Series([5, 4, 3, 2, 1]) | ||||
|         expected = 0 | ||||
|         tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) | ||||
|  | ||||
|         # full overlap | ||||
|         tm.assert_almost_equal( | ||||
|             datetime_series.corr(datetime_series, method=my_corr), 1.0 | ||||
|         ) | ||||
|  | ||||
|         # partial overlap | ||||
|         tm.assert_almost_equal( | ||||
|             datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 | ||||
|         ) | ||||
|  | ||||
|         # No overlap | ||||
|         assert np.isnan( | ||||
|             datetime_series[::2].corr(datetime_series[1::2], method=my_corr) | ||||
|         ) | ||||
|  | ||||
|         # dataframe example | ||||
|         df = pd.DataFrame([s1, s2]) | ||||
|         expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) | ||||
|         tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) | ||||
| @ -0,0 +1,203 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gte1p25 | ||||
|  | ||||
| from pandas.core.dtypes.common import ( | ||||
|     is_complex_dtype, | ||||
|     is_extension_array_dtype, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Period, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesDescribe: | ||||
|     def test_describe_ints(self): | ||||
|         ser = Series([0, 1, 2, 3, 4], name="int_data") | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [5, 2, ser.std(), 0, 1, 2, 3, 4], | ||||
|             name="int_data", | ||||
|             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_bools(self): | ||||
|         ser = Series([True, True, False, False, False], name="bool_data") | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_strs(self): | ||||
|         ser = Series(["a", "a", "b", "c", "d"], name="str_data") | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_timedelta64(self): | ||||
|         ser = Series( | ||||
|             [ | ||||
|                 Timedelta("1 days"), | ||||
|                 Timedelta("2 days"), | ||||
|                 Timedelta("3 days"), | ||||
|                 Timedelta("4 days"), | ||||
|                 Timedelta("5 days"), | ||||
|             ], | ||||
|             name="timedelta_data", | ||||
|         ) | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]], | ||||
|             name="timedelta_data", | ||||
|             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_period(self): | ||||
|         ser = Series( | ||||
|             [Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")], | ||||
|             name="period_data", | ||||
|         ) | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [3, 2, ser[0], 2], | ||||
|             name="period_data", | ||||
|             index=["count", "unique", "top", "freq"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_empty_object(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/27183 | ||||
|         s = Series([None, None], dtype=object) | ||||
|         result = s.describe() | ||||
|         expected = Series( | ||||
|             [0, 0, np.nan, np.nan], | ||||
|             dtype=object, | ||||
|             index=["count", "unique", "top", "freq"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s[:0].describe() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         # ensure NaN, not None | ||||
|         assert np.isnan(result.iloc[2]) | ||||
|         assert np.isnan(result.iloc[3]) | ||||
|  | ||||
|     def test_describe_with_tz(self, tz_naive_fixture): | ||||
|         # GH 21332 | ||||
|         tz = tz_naive_fixture | ||||
|         name = str(tz_naive_fixture) | ||||
|         start = Timestamp(2018, 1, 1) | ||||
|         end = Timestamp(2018, 1, 5) | ||||
|         s = Series(date_range(start, end, tz=tz), name=name) | ||||
|         result = s.describe() | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 5, | ||||
|                 Timestamp(2018, 1, 3).tz_localize(tz), | ||||
|                 start.tz_localize(tz), | ||||
|                 s[1], | ||||
|                 s[2], | ||||
|                 s[3], | ||||
|                 end.tz_localize(tz), | ||||
|             ], | ||||
|             name=name, | ||||
|             index=["count", "mean", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_with_tz_numeric(self): | ||||
|         name = tz = "CET" | ||||
|         start = Timestamp(2018, 1, 1) | ||||
|         end = Timestamp(2018, 1, 5) | ||||
|         s = Series(date_range(start, end, tz=tz), name=name) | ||||
|  | ||||
|         result = s.describe() | ||||
|  | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 5, | ||||
|                 Timestamp("2018-01-03 00:00:00", tz=tz), | ||||
|                 Timestamp("2018-01-01 00:00:00", tz=tz), | ||||
|                 Timestamp("2018-01-02 00:00:00", tz=tz), | ||||
|                 Timestamp("2018-01-03 00:00:00", tz=tz), | ||||
|                 Timestamp("2018-01-04 00:00:00", tz=tz), | ||||
|                 Timestamp("2018-01-05 00:00:00", tz=tz), | ||||
|             ], | ||||
|             name=name, | ||||
|             index=["count", "mean", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_datetime_is_numeric_includes_datetime(self): | ||||
|         s = Series(date_range("2012", periods=3)) | ||||
|         result = s.describe() | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 3, | ||||
|                 Timestamp("2012-01-02"), | ||||
|                 Timestamp("2012-01-01"), | ||||
|                 Timestamp("2012-01-01T12:00:00"), | ||||
|                 Timestamp("2012-01-02"), | ||||
|                 Timestamp("2012-01-02T12:00:00"), | ||||
|                 Timestamp("2012-01-03"), | ||||
|             ], | ||||
|             index=["count", "mean", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.filterwarnings("ignore:Casting complex values to real discards") | ||||
|     def test_numeric_result_dtype(self, any_numeric_dtype): | ||||
|         # GH#48340 - describe should always return float on non-complex numeric input | ||||
|         if is_extension_array_dtype(any_numeric_dtype): | ||||
|             dtype = "Float64" | ||||
|         else: | ||||
|             dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None | ||||
|  | ||||
|         ser = Series([0, 1], dtype=any_numeric_dtype) | ||||
|         if dtype == "complex128" and np_version_gte1p25: | ||||
|             with pytest.raises( | ||||
|                 TypeError, match=r"^a must be an array of real numbers$" | ||||
|             ): | ||||
|                 ser.describe() | ||||
|             return | ||||
|         result = ser.describe() | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 2.0, | ||||
|                 0.5, | ||||
|                 ser.std(), | ||||
|                 0, | ||||
|                 0.25, | ||||
|                 0.5, | ||||
|                 0.75, | ||||
|                 1.0, | ||||
|             ], | ||||
|             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], | ||||
|             dtype=dtype, | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_describe_one_element_ea(self): | ||||
|         # GH#52515 | ||||
|         ser = Series([0.0], dtype="Float64") | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = ser.describe() | ||||
|         expected = Series( | ||||
|             [1, 0, NA, 0, 0, 0, 0, 0], | ||||
|             dtype="Float64", | ||||
|             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,88 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     TimedeltaIndex, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesDiff: | ||||
|     def test_diff_np(self): | ||||
|         # TODO(__array_function__): could make np.diff return a Series | ||||
|         #  matching ser.diff() | ||||
|  | ||||
|         ser = Series(np.arange(5)) | ||||
|  | ||||
|         res = np.diff(ser) | ||||
|         expected = np.array([1, 1, 1, 1]) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|     def test_diff_int(self): | ||||
|         # int dtype | ||||
|         a = 10000000000000000 | ||||
|         b = a + 1 | ||||
|         ser = Series([a, b]) | ||||
|  | ||||
|         result = ser.diff() | ||||
|         assert result[1] == 1 | ||||
|  | ||||
|     def test_diff_tz(self): | ||||
|         # Combined datetime diff, normal diff and boolean diff test | ||||
|         ts = Series( | ||||
|             np.arange(10, dtype=np.float64), | ||||
|             index=date_range("2020-01-01", periods=10), | ||||
|             name="ts", | ||||
|         ) | ||||
|         ts.diff() | ||||
|  | ||||
|         # neg n | ||||
|         result = ts.diff(-1) | ||||
|         expected = ts - ts.shift(-1) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # 0 | ||||
|         result = ts.diff(0) | ||||
|         expected = ts - ts | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_diff_dt64(self): | ||||
|         # datetime diff (GH#3100) | ||||
|         ser = Series(date_range("20130102", periods=5)) | ||||
|         result = ser.diff() | ||||
|         expected = ser - ser.shift(1) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # timedelta diff | ||||
|         result = result - result.shift(1)  # previous result | ||||
|         expected = expected.diff()  # previously expected | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_diff_dt64tz(self): | ||||
|         # with tz | ||||
|         ser = Series( | ||||
|             date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" | ||||
|         ) | ||||
|         result = ser.diff() | ||||
|         expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "input,output,diff", | ||||
|         [([False, True, True, False, False], [np.nan, True, False, True, False], 1)], | ||||
|     ) | ||||
|     def test_diff_bool(self, input, output, diff): | ||||
|         # boolean series (test for fixing #17294) | ||||
|         ser = Series(input) | ||||
|         result = ser.diff() | ||||
|         expected = Series(output) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_diff_object_dtype(self): | ||||
|         # object series | ||||
|         ser = Series([False, True, 5.0, np.nan, True, False]) | ||||
|         result = ser.diff() | ||||
|         expected = ser - ser.shift(1) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,99 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_bool_dtype | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, index, drop_labels, axis, expected_data, expected_index", | ||||
|     [ | ||||
|         # Unique Index | ||||
|         ([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]), | ||||
|         ([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]), | ||||
|         ([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]), | ||||
|         # GH 5248 Non-Unique Index | ||||
|         ([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]), | ||||
|         ([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]), | ||||
|         ([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]), | ||||
|     ], | ||||
| ) | ||||
| def test_drop_unique_and_non_unique_index( | ||||
|     data, index, axis, drop_labels, expected_data, expected_index | ||||
| ): | ||||
|     ser = Series(data=data, index=index) | ||||
|     result = ser.drop(drop_labels, axis=axis) | ||||
|     expected = Series(data=expected_data, index=expected_index) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, index, drop_labels, axis, error_type, error_desc", | ||||
|     [ | ||||
|         # single string/tuple-like | ||||
|         (range(3), list("abc"), "bc", 0, KeyError, "not found in axis"), | ||||
|         # bad axis | ||||
|         (range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"), | ||||
|         (range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"), | ||||
|     ], | ||||
| ) | ||||
| def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): | ||||
|     ser = Series(data, index=index) | ||||
|     with pytest.raises(error_type, match=error_desc): | ||||
|         ser.drop(drop_labels, axis=axis) | ||||
|  | ||||
|  | ||||
| def test_drop_with_ignore_errors(): | ||||
|     # errors='ignore' | ||||
|     ser = Series(range(3), index=list("abc")) | ||||
|     result = ser.drop("bc", errors="ignore") | ||||
|     tm.assert_series_equal(result, ser) | ||||
|     result = ser.drop(["a", "d"], errors="ignore") | ||||
|     expected = ser.iloc[1:] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # GH 8522 | ||||
|     ser = Series([2, 3], index=[True, False]) | ||||
|     assert is_bool_dtype(ser.index) | ||||
|     assert ser.index.dtype == bool | ||||
|     result = ser.drop(True) | ||||
|     expected = Series([3], index=[False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]]) | ||||
| @pytest.mark.parametrize("drop_labels", [[], [1], [3]]) | ||||
| def test_drop_empty_list(index, drop_labels): | ||||
|     # GH 21494 | ||||
|     expected_index = [i for i in index if i not in drop_labels] | ||||
|     series = Series(index=index, dtype=object).drop(drop_labels) | ||||
|     expected = Series(index=expected_index, dtype=object) | ||||
|     tm.assert_series_equal(series, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, index, drop_labels", | ||||
|     [ | ||||
|         (None, [1, 2, 3], [1, 4]), | ||||
|         (None, [1, 2, 2], [1, 4]), | ||||
|         ([2, 3], [0, 1], [False, True]), | ||||
|     ], | ||||
| ) | ||||
| def test_drop_non_empty_list(data, index, drop_labels): | ||||
|     # GH 21494 and GH 16877 | ||||
|     dtype = object if data is None else None | ||||
|     ser = Series(data=data, index=index, dtype=dtype) | ||||
|     with pytest.raises(KeyError, match="not found in axis"): | ||||
|         ser.drop(drop_labels) | ||||
|  | ||||
|  | ||||
| def test_drop_index_ea_dtype(any_numeric_ea_dtype): | ||||
|     # GH#45860 | ||||
|     df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype)) | ||||
|     idx = Index([df.index[1]]) | ||||
|     result = df.drop(idx) | ||||
|     expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,267 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", Series([False, False, False, False, True, True, False])), | ||||
|         ("last", Series([False, True, True, False, False, False, False])), | ||||
|         (False, Series([False, True, True, False, True, True, False])), | ||||
|     ], | ||||
| ) | ||||
| def test_drop_duplicates(any_numpy_dtype, keep, expected): | ||||
|     tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype)) | ||||
|  | ||||
|     if tc.dtype == "bool": | ||||
|         pytest.skip("tested separately in test_drop_duplicates_bool") | ||||
|  | ||||
|     tm.assert_series_equal(tc.duplicated(keep=keep), expected) | ||||
|     tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) | ||||
|     sc = tc.copy() | ||||
|     return_value = sc.drop_duplicates(keep=keep, inplace=True) | ||||
|     assert return_value is None | ||||
|     tm.assert_series_equal(sc, tc[~expected]) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", Series([False, False, True, True])), | ||||
|         ("last", Series([True, True, False, False])), | ||||
|         (False, Series([True, True, True, True])), | ||||
|     ], | ||||
| ) | ||||
| def test_drop_duplicates_bool(keep, expected): | ||||
|     tc = Series([True, False, True, False]) | ||||
|  | ||||
|     tm.assert_series_equal(tc.duplicated(keep=keep), expected) | ||||
|     tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) | ||||
|     sc = tc.copy() | ||||
|     return_value = sc.drop_duplicates(keep=keep, inplace=True) | ||||
|     tm.assert_series_equal(sc, tc[~expected]) | ||||
|     assert return_value is None | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[], list(range(5))]) | ||||
| def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values): | ||||
|     tc = Series(values, dtype=np.dtype(any_numpy_dtype)) | ||||
|     expected = Series([False] * len(tc), dtype="bool") | ||||
|  | ||||
|     if tc.dtype == "bool": | ||||
|         # 0 -> False and 1-> True | ||||
|         # any other value would be duplicated | ||||
|         tc = tc[:2] | ||||
|         expected = expected[:2] | ||||
|  | ||||
|     tm.assert_series_equal(tc.duplicated(keep=keep), expected) | ||||
|  | ||||
|     result_dropped = tc.drop_duplicates(keep=keep) | ||||
|     tm.assert_series_equal(result_dropped, tc) | ||||
|  | ||||
|     # validate shallow copy | ||||
|     assert result_dropped is not tc | ||||
|  | ||||
|  | ||||
| class TestSeriesDropDuplicates: | ||||
|     @pytest.fixture( | ||||
|         params=["int_", "uint", "float64", "str_", "timedelta64[h]", "datetime64[D]"] | ||||
|     ) | ||||
|     def dtype(self, request): | ||||
|         return request.param | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def cat_series_unused_category(self, dtype, ordered): | ||||
|         # Test case 1 | ||||
|         cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) | ||||
|  | ||||
|         input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) | ||||
|         cat = Categorical(input1, categories=cat_array, ordered=ordered) | ||||
|         tc1 = Series(cat) | ||||
|         return tc1 | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category): | ||||
|         tc1 = cat_series_unused_category | ||||
|  | ||||
|         expected = Series([False, False, False, True]) | ||||
|  | ||||
|         result = tc1.duplicated() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc1.drop_duplicates() | ||||
|         tm.assert_series_equal(result, tc1[~expected]) | ||||
|  | ||||
|         sc = tc1.copy() | ||||
|         return_value = sc.drop_duplicates(inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc1[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool_keeplast( | ||||
|         self, cat_series_unused_category | ||||
|     ): | ||||
|         tc1 = cat_series_unused_category | ||||
|  | ||||
|         expected = Series([False, False, True, False]) | ||||
|  | ||||
|         result = tc1.duplicated(keep="last") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc1.drop_duplicates(keep="last") | ||||
|         tm.assert_series_equal(result, tc1[~expected]) | ||||
|  | ||||
|         sc = tc1.copy() | ||||
|         return_value = sc.drop_duplicates(keep="last", inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc1[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool_keepfalse( | ||||
|         self, cat_series_unused_category | ||||
|     ): | ||||
|         tc1 = cat_series_unused_category | ||||
|  | ||||
|         expected = Series([False, False, True, True]) | ||||
|  | ||||
|         result = tc1.duplicated(keep=False) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc1.drop_duplicates(keep=False) | ||||
|         tm.assert_series_equal(result, tc1[~expected]) | ||||
|  | ||||
|         sc = tc1.copy() | ||||
|         return_value = sc.drop_duplicates(keep=False, inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc1[~expected]) | ||||
|  | ||||
|     @pytest.fixture | ||||
|     def cat_series(self, dtype, ordered): | ||||
|         # no unused categories, unlike cat_series_unused_category | ||||
|         cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) | ||||
|  | ||||
|         input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) | ||||
|         cat = Categorical(input2, categories=cat_array, ordered=ordered) | ||||
|         tc2 = Series(cat) | ||||
|         return tc2 | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool2(self, cat_series): | ||||
|         tc2 = cat_series | ||||
|  | ||||
|         expected = Series([False, False, False, False, True, True, False]) | ||||
|  | ||||
|         result = tc2.duplicated() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc2.drop_duplicates() | ||||
|         tm.assert_series_equal(result, tc2[~expected]) | ||||
|  | ||||
|         sc = tc2.copy() | ||||
|         return_value = sc.drop_duplicates(inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc2[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series): | ||||
|         tc2 = cat_series | ||||
|  | ||||
|         expected = Series([False, True, True, False, False, False, False]) | ||||
|  | ||||
|         result = tc2.duplicated(keep="last") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc2.drop_duplicates(keep="last") | ||||
|         tm.assert_series_equal(result, tc2[~expected]) | ||||
|  | ||||
|         sc = tc2.copy() | ||||
|         return_value = sc.drop_duplicates(keep="last", inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc2[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series): | ||||
|         tc2 = cat_series | ||||
|  | ||||
|         expected = Series([False, True, True, False, True, True, False]) | ||||
|  | ||||
|         result = tc2.duplicated(keep=False) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = tc2.drop_duplicates(keep=False) | ||||
|         tm.assert_series_equal(result, tc2[~expected]) | ||||
|  | ||||
|         sc = tc2.copy() | ||||
|         return_value = sc.drop_duplicates(keep=False, inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc2[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_bool(self, ordered): | ||||
|         tc = Series( | ||||
|             Categorical( | ||||
|                 [True, False, True, False], categories=[True, False], ordered=ordered | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         expected = Series([False, False, True, True]) | ||||
|         tm.assert_series_equal(tc.duplicated(), expected) | ||||
|         tm.assert_series_equal(tc.drop_duplicates(), tc[~expected]) | ||||
|         sc = tc.copy() | ||||
|         return_value = sc.drop_duplicates(inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc[~expected]) | ||||
|  | ||||
|         expected = Series([True, True, False, False]) | ||||
|         tm.assert_series_equal(tc.duplicated(keep="last"), expected) | ||||
|         tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected]) | ||||
|         sc = tc.copy() | ||||
|         return_value = sc.drop_duplicates(keep="last", inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc[~expected]) | ||||
|  | ||||
|         expected = Series([True, True, True, True]) | ||||
|         tm.assert_series_equal(tc.duplicated(keep=False), expected) | ||||
|         tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) | ||||
|         sc = tc.copy() | ||||
|         return_value = sc.drop_duplicates(keep=False, inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(sc, tc[~expected]) | ||||
|  | ||||
|     def test_drop_duplicates_categorical_bool_na(self, nulls_fixture): | ||||
|         # GH#44351 | ||||
|         ser = Series( | ||||
|             Categorical( | ||||
|                 [True, False, True, False, nulls_fixture], | ||||
|                 categories=[True, False], | ||||
|                 ordered=True, | ||||
|             ) | ||||
|         ) | ||||
|         result = ser.drop_duplicates() | ||||
|         expected = Series( | ||||
|             Categorical([True, False, np.nan], categories=[True, False], ordered=True), | ||||
|             index=[0, 1, 4], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_drop_duplicates_ignore_index(self): | ||||
|         # GH#48304 | ||||
|         ser = Series([1, 2, 2, 3]) | ||||
|         result = ser.drop_duplicates(ignore_index=True) | ||||
|         expected = Series([1, 2, 3]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_duplicated_arrow_dtype(self): | ||||
|         pytest.importorskip("pyarrow") | ||||
|         ser = Series([True, False, None, False], dtype="bool[pyarrow]") | ||||
|         result = ser.drop_duplicates() | ||||
|         expected = Series([True, False, None], dtype="bool[pyarrow]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_drop_duplicates_arrow_strings(self): | ||||
|         # GH#54904 | ||||
|         pa = pytest.importorskip("pyarrow") | ||||
|         ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string())) | ||||
|         result = ser.drop_duplicates() | ||||
|         expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string())) | ||||
|         tm.assert_series_equal(result, expecetd) | ||||
| @ -0,0 +1,117 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     IntervalIndex, | ||||
|     NaT, | ||||
|     Period, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDropna: | ||||
|     def test_dropna_empty(self): | ||||
|         ser = Series([], dtype=object) | ||||
|  | ||||
|         assert len(ser.dropna()) == 0 | ||||
|         return_value = ser.dropna(inplace=True) | ||||
|         assert return_value is None | ||||
|         assert len(ser) == 0 | ||||
|  | ||||
|         # invalid axis | ||||
|         msg = "No axis named 1 for object type Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.dropna(axis=1) | ||||
|  | ||||
|     def test_dropna_preserve_name(self, datetime_series): | ||||
|         datetime_series[:5] = np.nan | ||||
|         result = datetime_series.dropna() | ||||
|         assert result.name == datetime_series.name | ||||
|         name = datetime_series.name | ||||
|         ts = datetime_series.copy() | ||||
|         return_value = ts.dropna(inplace=True) | ||||
|         assert return_value is None | ||||
|         assert ts.name == name | ||||
|  | ||||
|     def test_dropna_no_nan(self): | ||||
|         for ser in [ | ||||
|             Series([1, 2, 3], name="x"), | ||||
|             Series([False, True, False], name="x"), | ||||
|         ]: | ||||
|             result = ser.dropna() | ||||
|             tm.assert_series_equal(result, ser) | ||||
|             assert result is not ser | ||||
|  | ||||
|             s2 = ser.copy() | ||||
|             return_value = s2.dropna(inplace=True) | ||||
|             assert return_value is None | ||||
|             tm.assert_series_equal(s2, ser) | ||||
|  | ||||
|     def test_dropna_intervals(self): | ||||
|         ser = Series( | ||||
|             [np.nan, 1, 2, 3], | ||||
|             IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]), | ||||
|         ) | ||||
|  | ||||
|         result = ser.dropna() | ||||
|         expected = ser.iloc[1:] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_dropna_period_dtype(self): | ||||
|         # GH#13737 | ||||
|         ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) | ||||
|         result = ser.dropna() | ||||
|         expected = Series([Period("2011-01", freq="M")]) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_datetime64_tz_dropna(self, unit): | ||||
|         # DatetimeLikeBlock | ||||
|         ser = Series( | ||||
|             [ | ||||
|                 Timestamp("2011-01-01 10:00"), | ||||
|                 NaT, | ||||
|                 Timestamp("2011-01-03 10:00"), | ||||
|                 NaT, | ||||
|             ], | ||||
|             dtype=f"M8[{unit}]", | ||||
|         ) | ||||
|         result = ser.dropna() | ||||
|         expected = Series( | ||||
|             [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], | ||||
|             index=[0, 2], | ||||
|             dtype=f"M8[{unit}]", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # DatetimeTZBlock | ||||
|         idx = DatetimeIndex( | ||||
|             ["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo" | ||||
|         ).as_unit(unit) | ||||
|         ser = Series(idx) | ||||
|         assert ser.dtype == f"datetime64[{unit}, Asia/Tokyo]" | ||||
|         result = ser.dropna() | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), | ||||
|                 Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"), | ||||
|             ], | ||||
|             index=[0, 2], | ||||
|             dtype=f"datetime64[{unit}, Asia/Tokyo]", | ||||
|         ) | ||||
|         assert result.dtype == f"datetime64[{unit}, Asia/Tokyo]" | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [1, 1.5]) | ||||
|     def test_dropna_ignore_index(self, val): | ||||
|         # GH#31725 | ||||
|         ser = Series([1, 2, val], index=[3, 2, 1]) | ||||
|         result = ser.dropna(ignore_index=True) | ||||
|         expected = Series([1, 2, val]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         ser.dropna(ignore_index=True, inplace=True) | ||||
|         tm.assert_series_equal(ser, expected) | ||||
| @ -0,0 +1,7 @@ | ||||
| import numpy as np | ||||
|  | ||||
|  | ||||
| class TestSeriesDtypes: | ||||
|     def test_dtype(self, datetime_series): | ||||
|         assert datetime_series.dtype == np.dtype("float64") | ||||
|         assert datetime_series.dtypes == np.dtype("float64") | ||||
| @ -0,0 +1,77 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Categorical, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", Series([False, False, True, False, True], name="name")), | ||||
|         ("last", Series([True, True, False, False, False], name="name")), | ||||
|         (False, Series([True, True, True, False, True], name="name")), | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated_keep(keep, expected): | ||||
|     ser = Series(["a", "b", "b", "c", "a"], name="name") | ||||
|  | ||||
|     result = ser.duplicated(keep=keep) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, expected", | ||||
|     [ | ||||
|         ("first", Series([False, False, True, False, True])), | ||||
|         ("last", Series([True, True, False, False, False])), | ||||
|         (False, Series([True, True, True, False, True])), | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated_nan_none(keep, expected): | ||||
|     ser = Series([np.nan, 3, 3, None, np.nan], dtype=object) | ||||
|  | ||||
|     result = ser.duplicated(keep=keep) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicated_categorical_bool_na(nulls_fixture): | ||||
|     # GH#44351 | ||||
|     ser = Series( | ||||
|         Categorical( | ||||
|             [True, False, True, False, nulls_fixture], | ||||
|             categories=[True, False], | ||||
|             ordered=True, | ||||
|         ) | ||||
|     ) | ||||
|     result = ser.duplicated() | ||||
|     expected = Series([False, False, True, True, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "keep, vals", | ||||
|     [ | ||||
|         ("last", [True, True, False]), | ||||
|         ("first", [False, True, True]), | ||||
|         (False, [True, True, True]), | ||||
|     ], | ||||
| ) | ||||
| def test_duplicated_mask(keep, vals): | ||||
|     # GH#48150 | ||||
|     ser = Series([1, 2, NA, NA, NA], dtype="Int64") | ||||
|     result = ser.duplicated(keep=keep) | ||||
|     expected = Series([False, False] + vals) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicated_mask_no_duplicated_na(keep): | ||||
|     # GH#48150 | ||||
|     ser = Series([1, 2, NA], dtype="Int64") | ||||
|     result = ser.duplicated(keep=keep) | ||||
|     expected = Series([False, False, False]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,145 @@ | ||||
| from contextlib import nullcontext | ||||
| import copy | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.missing import is_matching_na | ||||
| from pandas.compat.numpy import np_version_gte1p25 | ||||
|  | ||||
| from pandas.core.dtypes.common import is_float | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "arr, idx", | ||||
|     [ | ||||
|         ([1, 2, 3, 4], [0, 2, 1, 3]), | ||||
|         ([1, np.nan, 3, np.nan], [0, 2, 1, 3]), | ||||
|         ( | ||||
|             [1, np.nan, 3, np.nan], | ||||
|             MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_equals(arr, idx): | ||||
|     s1 = Series(arr, index=idx) | ||||
|     s2 = s1.copy() | ||||
|     assert s1.equals(s2) | ||||
|  | ||||
|     s1[1] = 9 | ||||
|     assert not s1.equals(s2) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None] | ||||
| ) | ||||
| def test_equals_list_array(val): | ||||
|     # GH20676 Verify equals operator for list of Numpy arrays | ||||
|     arr = np.array([1, 2]) | ||||
|     s1 = Series([arr, arr]) | ||||
|     s2 = s1.copy() | ||||
|     assert s1.equals(s2) | ||||
|  | ||||
|     s1[1] = val | ||||
|  | ||||
|     cm = ( | ||||
|         tm.assert_produces_warning(FutureWarning, check_stacklevel=False) | ||||
|         if isinstance(val, str) and not np_version_gte1p25 | ||||
|         else nullcontext() | ||||
|     ) | ||||
|     with cm: | ||||
|         assert not s1.equals(s2) | ||||
|  | ||||
|  | ||||
| def test_equals_false_negative(): | ||||
|     # GH8437 Verify false negative behavior of equals function for dtype object | ||||
|     arr = [False, np.nan] | ||||
|     s1 = Series(arr) | ||||
|     s2 = s1.copy() | ||||
|     s3 = Series(index=range(2), dtype=object) | ||||
|     s4 = s3.copy() | ||||
|     s5 = s3.copy() | ||||
|     s6 = s3.copy() | ||||
|  | ||||
|     s3[:-1] = s4[:-1] = s5[0] = s6[0] = False | ||||
|     assert s1.equals(s1) | ||||
|     assert s1.equals(s2) | ||||
|     assert s1.equals(s3) | ||||
|     assert s1.equals(s4) | ||||
|     assert s1.equals(s5) | ||||
|     assert s5.equals(s6) | ||||
|  | ||||
|  | ||||
| def test_equals_matching_nas(): | ||||
|     # matching but not identical NAs | ||||
|     left = Series([np.datetime64("NaT")], dtype=object) | ||||
|     right = Series([np.datetime64("NaT")], dtype=object) | ||||
|     assert left.equals(right) | ||||
|     with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): | ||||
|         assert Index(left).equals(Index(right)) | ||||
|     assert left.array.equals(right.array) | ||||
|  | ||||
|     left = Series([np.timedelta64("NaT")], dtype=object) | ||||
|     right = Series([np.timedelta64("NaT")], dtype=object) | ||||
|     assert left.equals(right) | ||||
|     with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): | ||||
|         assert Index(left).equals(Index(right)) | ||||
|     assert left.array.equals(right.array) | ||||
|  | ||||
|     left = Series([np.float64("NaN")], dtype=object) | ||||
|     right = Series([np.float64("NaN")], dtype=object) | ||||
|     assert left.equals(right) | ||||
|     assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype)) | ||||
|     assert left.array.equals(right.array) | ||||
|  | ||||
|  | ||||
| def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2): | ||||
|     # GH#39650 | ||||
|     left = nulls_fixture | ||||
|     right = nulls_fixture2 | ||||
|     if hasattr(right, "copy"): | ||||
|         right = right.copy() | ||||
|     else: | ||||
|         right = copy.copy(right) | ||||
|  | ||||
|     ser = Series([left], dtype=object) | ||||
|     ser2 = Series([right], dtype=object) | ||||
|  | ||||
|     if is_matching_na(left, right): | ||||
|         assert ser.equals(ser2) | ||||
|     elif (left is None and is_float(right)) or (right is None and is_float(left)): | ||||
|         assert ser.equals(ser2) | ||||
|     else: | ||||
|         assert not ser.equals(ser2) | ||||
|  | ||||
|  | ||||
| def test_equals_none_vs_nan(): | ||||
|     # GH#39650 | ||||
|     ser = Series([1, None], dtype=object) | ||||
|     ser2 = Series([1, np.nan], dtype=object) | ||||
|  | ||||
|     assert ser.equals(ser2) | ||||
|     assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype)) | ||||
|     assert ser.array.equals(ser2.array) | ||||
|  | ||||
|  | ||||
| def test_equals_None_vs_float(): | ||||
|     # GH#44190 | ||||
|     left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) | ||||
|     right = Series([None] * len(left)) | ||||
|  | ||||
|     # these series were found to be equal due to a bug, check that they are correctly | ||||
|     # found to not equal | ||||
|     assert not left.equals(right) | ||||
|     assert not right.equals(left) | ||||
|     assert not left.to_frame().equals(right.to_frame()) | ||||
|     assert not right.to_frame().equals(left.to_frame()) | ||||
|     assert not Index(left, dtype="object").equals(Index(right, dtype="object")) | ||||
|     assert not Index(right, dtype="object").equals(Index(left, dtype="object")) | ||||
| @ -0,0 +1,183 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_basic(): | ||||
|     s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo") | ||||
|     result = s.explode() | ||||
|     expected = pd.Series( | ||||
|         [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo" | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_mixed_type(): | ||||
|     s = pd.Series( | ||||
|         [[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo" | ||||
|     ) | ||||
|     result = s.explode() | ||||
|     expected = pd.Series( | ||||
|         [0, 1, 2, np.nan, None, np.nan, "a", "b"], | ||||
|         index=[0, 0, 0, 1, 2, 3, 4, 4], | ||||
|         dtype=object, | ||||
|         name="foo", | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_empty(): | ||||
|     s = pd.Series(dtype=object) | ||||
|     result = s.explode() | ||||
|     expected = s.copy() | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_nested_lists(): | ||||
|     s = pd.Series([[[1, 2, 3]], [1, 2], 1]) | ||||
|     result = s.explode() | ||||
|     expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_multi_index(): | ||||
|     s = pd.Series( | ||||
|         [[0, 1, 2], np.nan, [], (3, 4)], | ||||
|         name="foo", | ||||
|         index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]), | ||||
|     ) | ||||
|     result = s.explode() | ||||
|     index = pd.MultiIndex.from_tuples( | ||||
|         [("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)], | ||||
|         names=["foo", "bar"], | ||||
|     ) | ||||
|     expected = pd.Series( | ||||
|         [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo" | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_large(): | ||||
|     s = pd.Series([range(256)]).explode() | ||||
|     result = s.explode() | ||||
|     tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| def test_invert_array(): | ||||
|     df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")}) | ||||
|  | ||||
|     listify = df.apply(lambda x: x.array, axis=1) | ||||
|     result = listify.explode() | ||||
|     tm.assert_series_equal(result, df["a"].rename()) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))] | ||||
| ) | ||||
| def test_non_object_dtype(s): | ||||
|     result = s.explode() | ||||
|     tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| def test_typical_usecase(): | ||||
|     df = pd.DataFrame( | ||||
|         [{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}], | ||||
|         columns=["var1", "var2"], | ||||
|     ) | ||||
|     exploded = df.var1.str.split(",").explode() | ||||
|     result = df[["var2"]].join(exploded) | ||||
|     expected = pd.DataFrame( | ||||
|         {"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")}, | ||||
|         columns=["var2", "var1"], | ||||
|         index=[0, 0, 0, 1, 1, 1], | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_nested_EA(): | ||||
|     # a nested EA array | ||||
|     s = pd.Series( | ||||
|         [ | ||||
|             pd.date_range("20170101", periods=3, tz="UTC"), | ||||
|             pd.date_range("20170104", periods=3, tz="UTC"), | ||||
|         ] | ||||
|     ) | ||||
|     result = s.explode() | ||||
|     expected = pd.Series( | ||||
|         pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1] | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_duplicate_index(): | ||||
|     # GH 28005 | ||||
|     s = pd.Series([[1, 2], [3, 4]], index=[0, 0]) | ||||
|     result = s.explode() | ||||
|     expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_ignore_index(): | ||||
|     # GH 34932 | ||||
|     s = pd.Series([[1, 2], [3, 4]]) | ||||
|     result = s.explode(ignore_index=True) | ||||
|     expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_explode_sets(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/35614 | ||||
|     s = pd.Series([{"a", "b", "c"}], index=[1]) | ||||
|     result = s.explode().sort_values() | ||||
|     expected = pd.Series(["a", "b", "c"], index=[1, 1, 1]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_explode_scalars_can_ignore_index(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/40487 | ||||
|     s = pd.Series([1, 2, 3], index=["a", "b", "c"]) | ||||
|     result = s.explode(ignore_index=True) | ||||
|     expected = pd.Series([1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ignore_index", [True, False]) | ||||
| def test_explode_pyarrow_list_type(ignore_index): | ||||
|     # GH 53602 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|  | ||||
|     data = [ | ||||
|         [None, None], | ||||
|         [1], | ||||
|         [], | ||||
|         [2, 3], | ||||
|         None, | ||||
|     ] | ||||
|     ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) | ||||
|     result = ser.explode(ignore_index=ignore_index) | ||||
|     expected = pd.Series( | ||||
|         data=[None, None, 1, None, 2, 3, None], | ||||
|         index=None if ignore_index else [0, 0, 1, 2, 3, 3, 4], | ||||
|         dtype=pd.ArrowDtype(pa.int64()), | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ignore_index", [True, False]) | ||||
| def test_explode_pyarrow_non_list_type(ignore_index): | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|     data = [1, 2, 3] | ||||
|     ser = pd.Series(data, dtype=pd.ArrowDtype(pa.int64())) | ||||
|     result = ser.explode(ignore_index=ignore_index) | ||||
|     expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_str_dtype(): | ||||
|     # https://github.com/pandas-dev/pandas/pull/61623 | ||||
|     ser = pd.Series(["x", "y"], dtype="str") | ||||
|     result = ser.explode() | ||||
|     assert result is not ser | ||||
|     tm.assert_series_equal(result, ser) | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,38 @@ | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestGetNumericData: | ||||
|     def test_get_numeric_data_preserve_dtype( | ||||
|         self, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         # get the numeric data | ||||
|         obj = Series([1, 2, 3]) | ||||
|         result = obj._get_numeric_data() | ||||
|         tm.assert_series_equal(result, obj) | ||||
|  | ||||
|         # returned object is a shallow copy | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             result.iloc[0] = 0 | ||||
|         if using_copy_on_write: | ||||
|             assert obj.iloc[0] == 1 | ||||
|         else: | ||||
|             assert obj.iloc[0] == 0 | ||||
|  | ||||
|         obj = Series([1, "2", 3.0]) | ||||
|         result = obj._get_numeric_data() | ||||
|         expected = Series([], dtype=object, index=Index([], dtype=object)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         obj = Series([True, False, True]) | ||||
|         result = obj._get_numeric_data() | ||||
|         tm.assert_series_equal(result, obj) | ||||
|  | ||||
|         obj = Series(date_range("20130101", periods=3)) | ||||
|         result = obj._get_numeric_data() | ||||
|         expected = Series([], dtype="M8[ns]", index=Index([], dtype=object)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,8 @@ | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_head_tail(string_series): | ||||
|     tm.assert_series_equal(string_series.head(), string_series[:5]) | ||||
|     tm.assert_series_equal(string_series.head(0), string_series[0:0]) | ||||
|     tm.assert_series_equal(string_series.tail(), string_series[-5:]) | ||||
|     tm.assert_series_equal(string_series.tail(0), string_series[0:0]) | ||||
| @ -0,0 +1,56 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     interval_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestInferObjects: | ||||
|     def test_copy(self, index_or_series): | ||||
|         # GH#50096 | ||||
|         # case where we don't need to do inference because it is already non-object | ||||
|         obj = index_or_series(np.array([1, 2, 3], dtype="int64")) | ||||
|  | ||||
|         result = obj.infer_objects(copy=False) | ||||
|         assert tm.shares_memory(result, obj) | ||||
|  | ||||
|         # case where we try to do inference but can't do better than object | ||||
|         obj2 = index_or_series(np.array(["foo", 2], dtype=object)) | ||||
|         result2 = obj2.infer_objects(copy=False) | ||||
|         assert tm.shares_memory(result2, obj2) | ||||
|  | ||||
|     def test_infer_objects_series(self, index_or_series): | ||||
|         # GH#11221 | ||||
|         actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects() | ||||
|         expected = index_or_series([1, 2, 3]) | ||||
|         tm.assert_equal(actual, expected) | ||||
|  | ||||
|         actual = index_or_series(np.array([1, 2, 3, None], dtype="O")).infer_objects() | ||||
|         expected = index_or_series([1.0, 2.0, 3.0, np.nan]) | ||||
|         tm.assert_equal(actual, expected) | ||||
|  | ||||
|         # only soft conversions, unconvertible pass thru unchanged | ||||
|  | ||||
|         obj = index_or_series(np.array([1, 2, 3, None, "a"], dtype="O")) | ||||
|         actual = obj.infer_objects() | ||||
|         expected = index_or_series([1, 2, 3, None, "a"], dtype=object) | ||||
|  | ||||
|         assert actual.dtype == "object" | ||||
|         tm.assert_equal(actual, expected) | ||||
|  | ||||
|     def test_infer_objects_interval(self, index_or_series): | ||||
|         # GH#50090 | ||||
|         ii = interval_range(1, 10) | ||||
|         obj = index_or_series(ii) | ||||
|  | ||||
|         result = obj.astype(object).infer_objects() | ||||
|         tm.assert_equal(result, obj) | ||||
|  | ||||
|     def test_infer_objects_bytes(self): | ||||
|         # GH#49650 | ||||
|         ser = Series([b"a"], dtype="bytes") | ||||
|         expected = ser.copy() | ||||
|         result = ser.infer_objects() | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,192 @@ | ||||
| from io import StringIO | ||||
| from string import ascii_uppercase | ||||
| import textwrap | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._config import using_string_dtype | ||||
|  | ||||
| from pandas.compat import ( | ||||
|     HAS_PYARROW, | ||||
|     PYPY, | ||||
| ) | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_info_categorical_column_just_works(): | ||||
|     n = 2500 | ||||
|     data = np.array(list("abcdefghij")).take( | ||||
|         np.random.default_rng(2).integers(0, 10, size=n, dtype=int) | ||||
|     ) | ||||
|     s = Series(data).astype("category") | ||||
|     s.isna() | ||||
|     buf = StringIO() | ||||
|     s.info(buf=buf) | ||||
|  | ||||
|     s2 = s[s == "d"] | ||||
|     buf = StringIO() | ||||
|     s2.info(buf=buf) | ||||
|  | ||||
|  | ||||
| def test_info_categorical(): | ||||
|     # GH14298 | ||||
|     idx = CategoricalIndex(["a", "b"]) | ||||
|     s = Series(np.zeros(2), index=idx) | ||||
|     buf = StringIO() | ||||
|     s.info(buf=buf) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("verbose", [True, False]) | ||||
| def test_info_series( | ||||
|     lexsorted_two_level_string_multiindex, verbose, using_infer_string | ||||
| ): | ||||
|     index = lexsorted_two_level_string_multiindex | ||||
|     ser = Series(range(len(index)), index=index, name="sth") | ||||
|     buf = StringIO() | ||||
|     ser.info(verbose=verbose, buf=buf) | ||||
|     result = buf.getvalue() | ||||
|  | ||||
|     expected = textwrap.dedent( | ||||
|         """\ | ||||
|         <class 'pandas.core.series.Series'> | ||||
|         MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three') | ||||
|         """ | ||||
|     ) | ||||
|     if verbose: | ||||
|         expected += textwrap.dedent( | ||||
|             """\ | ||||
|             Series name: sth | ||||
|             Non-Null Count  Dtype | ||||
|             --------------  ----- | ||||
|             10 non-null     int64 | ||||
|             """ | ||||
|         ) | ||||
|     qualifier = "" if using_infer_string and HAS_PYARROW else "+" | ||||
|     expected += textwrap.dedent( | ||||
|         f"""\ | ||||
|         dtypes: int64(1) | ||||
|         memory usage: {ser.memory_usage()}.0{qualifier} bytes | ||||
|         """ | ||||
|     ) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_info_memory(): | ||||
|     s = Series([1, 2], dtype="i8") | ||||
|     buf = StringIO() | ||||
|     s.info(buf=buf) | ||||
|     result = buf.getvalue() | ||||
|     memory_bytes = float(s.memory_usage()) | ||||
|     expected = textwrap.dedent( | ||||
|         f"""\ | ||||
|     <class 'pandas.core.series.Series'> | ||||
|     RangeIndex: 2 entries, 0 to 1 | ||||
|     Series name: None | ||||
|     Non-Null Count  Dtype | ||||
|     --------------  ----- | ||||
|     2 non-null      int64 | ||||
|     dtypes: int64(1) | ||||
|     memory usage: {memory_bytes} bytes | ||||
|     """ | ||||
|     ) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_info_wide(): | ||||
|     s = Series(np.random.default_rng(2).standard_normal(101)) | ||||
|     msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         s.info(max_cols=1) | ||||
|  | ||||
|  | ||||
| def test_info_shows_dtypes(): | ||||
|     dtypes = [ | ||||
|         "int64", | ||||
|         "float64", | ||||
|         "datetime64[ns]", | ||||
|         "timedelta64[ns]", | ||||
|         "complex128", | ||||
|         "object", | ||||
|         "bool", | ||||
|     ] | ||||
|     n = 10 | ||||
|     for dtype in dtypes: | ||||
|         s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype)) | ||||
|         buf = StringIO() | ||||
|         s.info(buf=buf) | ||||
|         res = buf.getvalue() | ||||
|         name = f"{n:d} non-null     {dtype}" | ||||
|         assert name in res | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result") | ||||
| def test_info_memory_usage_deep_not_pypy(): | ||||
|     s_with_object_index = Series({"a": [1]}, index=["foo"]) | ||||
|     assert s_with_object_index.memory_usage( | ||||
|         index=True, deep=True | ||||
|     ) > s_with_object_index.memory_usage(index=True) | ||||
|  | ||||
|     s_object = Series({"a": ["a"]}) | ||||
|     assert s_object.memory_usage(deep=True) > s_object.memory_usage() | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result") | ||||
| def test_info_memory_usage_deep_pypy(): | ||||
|     s_with_object_index = Series({"a": [1]}, index=["foo"]) | ||||
|     assert s_with_object_index.memory_usage( | ||||
|         index=True, deep=True | ||||
|     ) == s_with_object_index.memory_usage(index=True) | ||||
|  | ||||
|     s_object = Series({"a": ["a"]}) | ||||
|     assert s_object.memory_usage(deep=True) == s_object.memory_usage() | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "index, plus", | ||||
|     [ | ||||
|         ([1, 2, 3], False), | ||||
|         (Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)), | ||||
|         (Index(list("ABC"), dtype=object), True), | ||||
|         (MultiIndex.from_product([range(3), range(3)]), False), | ||||
|         ( | ||||
|             MultiIndex.from_product([range(3), ["foo", "bar"]]), | ||||
|             not (using_string_dtype() and HAS_PYARROW), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_info_memory_usage_qualified(index, plus): | ||||
|     series = Series(1, index=index) | ||||
|     buf = StringIO() | ||||
|     series.info(buf=buf) | ||||
|     if plus: | ||||
|         assert "+" in buf.getvalue() | ||||
|     else: | ||||
|         assert "+" not in buf.getvalue() | ||||
|  | ||||
|  | ||||
| def test_info_memory_usage_bug_on_multiindex(): | ||||
|     # GH 14308 | ||||
|     # memory usage introspection should not materialize .values | ||||
|     N = 100 | ||||
|     M = len(ascii_uppercase) | ||||
|     index = MultiIndex.from_product( | ||||
|         [list(ascii_uppercase), date_range("20160101", periods=N)], | ||||
|         names=["id", "date"], | ||||
|     ) | ||||
|     s = Series(np.random.default_rng(2).standard_normal(N * M), index=index) | ||||
|  | ||||
|     unstacked = s.unstack("id") | ||||
|     assert s.values.nbytes == unstacked.values.nbytes | ||||
|     assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum() | ||||
|  | ||||
|     # high upper bound | ||||
|     diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True) | ||||
|     assert diff < 2000 | ||||
| @ -0,0 +1,868 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         "linear", | ||||
|         "index", | ||||
|         "values", | ||||
|         "nearest", | ||||
|         "slinear", | ||||
|         "zero", | ||||
|         "quadratic", | ||||
|         "cubic", | ||||
|         "barycentric", | ||||
|         "krogh", | ||||
|         "polynomial", | ||||
|         "spline", | ||||
|         "piecewise_polynomial", | ||||
|         "from_derivatives", | ||||
|         "pchip", | ||||
|         "akima", | ||||
|         "cubicspline", | ||||
|     ] | ||||
| ) | ||||
| def nontemporal_method(request): | ||||
|     """Fixture that returns an (method name, required kwargs) pair. | ||||
|  | ||||
|     This fixture does not include method 'time' as a parameterization; that | ||||
|     method requires a Series with a DatetimeIndex, and is generally tested | ||||
|     separately from these non-temporal methods. | ||||
|     """ | ||||
|     method = request.param | ||||
|     kwargs = {"order": 1} if method in ("spline", "polynomial") else {} | ||||
|     return method, kwargs | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         "linear", | ||||
|         "slinear", | ||||
|         "zero", | ||||
|         "quadratic", | ||||
|         "cubic", | ||||
|         "barycentric", | ||||
|         "krogh", | ||||
|         "polynomial", | ||||
|         "spline", | ||||
|         "piecewise_polynomial", | ||||
|         "from_derivatives", | ||||
|         "pchip", | ||||
|         "akima", | ||||
|         "cubicspline", | ||||
|     ] | ||||
| ) | ||||
| def interp_methods_ind(request): | ||||
|     """Fixture that returns a (method name, required kwargs) pair to | ||||
|     be tested for various Index types. | ||||
|  | ||||
|     This fixture does not include methods - 'time', 'index', 'nearest', | ||||
|     'values' as a parameterization | ||||
|     """ | ||||
|     method = request.param | ||||
|     kwargs = {"order": 1} if method in ("spline", "polynomial") else {} | ||||
|     return method, kwargs | ||||
|  | ||||
|  | ||||
| class TestSeriesInterpolateData: | ||||
|     @pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method") | ||||
|     def test_interpolate_period_values(self): | ||||
|         orig = Series(date_range("2012-01-01", periods=5)) | ||||
|         ser = orig.copy() | ||||
|         ser[2] = pd.NaT | ||||
|  | ||||
|         # period cast | ||||
|         ser_per = ser.dt.to_period("D") | ||||
|         res_per = ser_per.interpolate() | ||||
|         expected_per = orig.dt.to_period("D") | ||||
|         tm.assert_series_equal(res_per, expected_per) | ||||
|  | ||||
|     def test_interpolate(self, datetime_series): | ||||
|         ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) | ||||
|  | ||||
|         ts_copy = ts.copy() | ||||
|         ts_copy[5:10] = np.nan | ||||
|  | ||||
|         linear_interp = ts_copy.interpolate(method="linear") | ||||
|         tm.assert_series_equal(linear_interp, ts) | ||||
|  | ||||
|         ord_ts = Series( | ||||
|             [d.toordinal() for d in datetime_series.index], index=datetime_series.index | ||||
|         ).astype(float) | ||||
|  | ||||
|         ord_ts_copy = ord_ts.copy() | ||||
|         ord_ts_copy[5:10] = np.nan | ||||
|  | ||||
|         time_interp = ord_ts_copy.interpolate(method="time") | ||||
|         tm.assert_series_equal(time_interp, ord_ts) | ||||
|  | ||||
|     def test_interpolate_time_raises_for_non_timeseries(self): | ||||
|         # When method='time' is used on a non-TimeSeries that contains a null | ||||
|         # value, a ValueError should be raised. | ||||
|         non_ts = Series([0, 1, 2, np.nan]) | ||||
|         msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             non_ts.interpolate(method="time") | ||||
|  | ||||
|     def test_interpolate_cubicspline(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series([10, 11, 12, 13]) | ||||
|  | ||||
|         expected = Series( | ||||
|             [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], | ||||
|             index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), | ||||
|         ) | ||||
|         # interpolate at new_index | ||||
|         new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( | ||||
|             float | ||||
|         ) | ||||
|         result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interpolate_pchip(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series(np.sort(np.random.default_rng(2).uniform(size=100))) | ||||
|  | ||||
|         # interpolate at new_index | ||||
|         new_index = ser.index.union( | ||||
|             Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) | ||||
|         ).astype(float) | ||||
|         interp_s = ser.reindex(new_index).interpolate(method="pchip") | ||||
|         # does not blow up, GH5977 | ||||
|         interp_s.loc[49:51] | ||||
|  | ||||
|     def test_interpolate_akima(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series([10, 11, 12, 13]) | ||||
|  | ||||
|         # interpolate at new_index where `der` is zero | ||||
|         expected = Series( | ||||
|             [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], | ||||
|             index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), | ||||
|         ) | ||||
|         new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( | ||||
|             float | ||||
|         ) | ||||
|         interp_s = ser.reindex(new_index).interpolate(method="akima") | ||||
|         tm.assert_series_equal(interp_s.loc[1:3], expected) | ||||
|  | ||||
|         # interpolate at new_index where `der` is a non-zero int | ||||
|         expected = Series( | ||||
|             [11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0], | ||||
|             index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), | ||||
|         ) | ||||
|         new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( | ||||
|             float | ||||
|         ) | ||||
|         interp_s = ser.reindex(new_index).interpolate(method="akima", der=1) | ||||
|         tm.assert_series_equal(interp_s.loc[1:3], expected) | ||||
|  | ||||
|     def test_interpolate_piecewise_polynomial(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series([10, 11, 12, 13]) | ||||
|  | ||||
|         expected = Series( | ||||
|             [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], | ||||
|             index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), | ||||
|         ) | ||||
|         # interpolate at new_index | ||||
|         new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( | ||||
|             float | ||||
|         ) | ||||
|         interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") | ||||
|         tm.assert_series_equal(interp_s.loc[1:3], expected) | ||||
|  | ||||
|     def test_interpolate_from_derivatives(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series([10, 11, 12, 13]) | ||||
|  | ||||
|         expected = Series( | ||||
|             [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], | ||||
|             index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), | ||||
|         ) | ||||
|         # interpolate at new_index | ||||
|         new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( | ||||
|             float | ||||
|         ) | ||||
|         interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") | ||||
|         tm.assert_series_equal(interp_s.loc[1:3], expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "kwargs", | ||||
|         [ | ||||
|             {}, | ||||
|             pytest.param( | ||||
|                 {"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy") | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_interpolate_corners(self, kwargs): | ||||
|         s = Series([np.nan, np.nan]) | ||||
|         tm.assert_series_equal(s.interpolate(**kwargs), s) | ||||
|  | ||||
|         s = Series([], dtype=object).interpolate() | ||||
|         tm.assert_series_equal(s.interpolate(**kwargs), s) | ||||
|  | ||||
|     def test_interpolate_index_values(self): | ||||
|         s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30))) | ||||
|         s.loc[::3] = np.random.default_rng(2).standard_normal(10) | ||||
|  | ||||
|         vals = s.index.values.astype(float) | ||||
|  | ||||
|         result = s.interpolate(method="index") | ||||
|  | ||||
|         expected = s.copy() | ||||
|         bad = isna(expected.values) | ||||
|         good = ~bad | ||||
|         expected = Series( | ||||
|             np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] | ||||
|         ) | ||||
|  | ||||
|         tm.assert_series_equal(result[bad], expected) | ||||
|  | ||||
|         # 'values' is synonymous with 'index' for the method kwarg | ||||
|         other_result = s.interpolate(method="values") | ||||
|  | ||||
|         tm.assert_series_equal(other_result, result) | ||||
|         tm.assert_series_equal(other_result[bad], expected) | ||||
|  | ||||
|     def test_interpolate_non_ts(self): | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||||
|         msg = ( | ||||
|             "time-weighted interpolation only works on Series or DataFrames " | ||||
|             "with a DatetimeIndex" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="time") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "kwargs", | ||||
|         [ | ||||
|             {}, | ||||
|             pytest.param( | ||||
|                 {"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy") | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_nan_interpolate(self, kwargs): | ||||
|         s = Series([0, 1, np.nan, 3]) | ||||
|         result = s.interpolate(**kwargs) | ||||
|         expected = Series([0.0, 1.0, 2.0, 3.0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nan_irregular_index(self): | ||||
|         s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) | ||||
|         result = s.interpolate() | ||||
|         expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nan_str_index(self): | ||||
|         s = Series([0, 1, 2, np.nan], index=list("abcd")) | ||||
|         result = s.interpolate() | ||||
|         expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_quad(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) | ||||
|         result = sq.interpolate(method="quadratic") | ||||
|         expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_scipy_basic(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, 3, np.nan, 12, np.nan, 25]) | ||||
|         # slinear | ||||
|         expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) | ||||
|         result = s.interpolate(method="slinear") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         msg = "The 'downcast' keyword in Series.interpolate is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(method="slinear", downcast="infer") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         # nearest | ||||
|         expected = Series([1, 3, 3, 12, 12, 25]) | ||||
|         result = s.interpolate(method="nearest") | ||||
|         tm.assert_series_equal(result, expected.astype("float")) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(method="nearest", downcast="infer") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         # zero | ||||
|         expected = Series([1, 3, 3, 12, 12, 25]) | ||||
|         result = s.interpolate(method="zero") | ||||
|         tm.assert_series_equal(result, expected.astype("float")) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(method="zero", downcast="infer") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         # quadratic | ||||
|         # GH #15662. | ||||
|         expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) | ||||
|         result = s.interpolate(method="quadratic") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(method="quadratic", downcast="infer") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         # cubic | ||||
|         expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) | ||||
|         result = s.interpolate(method="cubic") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit(self): | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||||
|  | ||||
|         expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) | ||||
|         result = s.interpolate(method="linear", limit=2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("limit", [-1, 0]) | ||||
|     def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): | ||||
|         # GH 9217: make sure limit is greater than zero. | ||||
|         s = Series([1, 2, np.nan, 4]) | ||||
|         method, kwargs = nontemporal_method | ||||
|         with pytest.raises(ValueError, match="Limit must be greater than 0"): | ||||
|             s.interpolate(limit=limit, method=method, **kwargs) | ||||
|  | ||||
|     def test_interpolate_invalid_float_limit(self, nontemporal_method): | ||||
|         # GH 9217: make sure limit is an integer. | ||||
|         s = Series([1, 2, np.nan, 4]) | ||||
|         method, kwargs = nontemporal_method | ||||
|         limit = 2.0 | ||||
|         with pytest.raises(ValueError, match="Limit must be an integer"): | ||||
|             s.interpolate(limit=limit, method=method, **kwargs) | ||||
|  | ||||
|     @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) | ||||
|     def test_interp_invalid_method(self, invalid_method): | ||||
|         s = Series([1, 3, np.nan, 12, np.nan, 25]) | ||||
|  | ||||
|         msg = f"method must be one of.* Got '{invalid_method}' instead" | ||||
|         if invalid_method is None: | ||||
|             msg = "'method' should be a string, not None" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method=invalid_method) | ||||
|  | ||||
|         # When an invalid method and invalid limit (such as -1) are | ||||
|         # provided, the error message reflects the invalid method. | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method=invalid_method, limit=-1) | ||||
|  | ||||
|     def test_interp_invalid_method_and_value(self): | ||||
|         # GH#36624 | ||||
|         ser = Series([1, 3, np.nan, 12, np.nan, 25]) | ||||
|  | ||||
|         msg = "'fill_value' is not a valid keyword for Series.interpolate" | ||||
|         msg2 = "Series.interpolate with method=pad" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|                 ser.interpolate(fill_value=3, method="pad") | ||||
|  | ||||
|     def test_interp_limit_forward(self): | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||||
|  | ||||
|         # Provide 'forward' (the default) explicitly here. | ||||
|         expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) | ||||
|  | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="forward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_unlimited(self): | ||||
|         # these test are for issue #16282 default Limit=None is unlimited | ||||
|         s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) | ||||
|         expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) | ||||
|         result = s.interpolate(method="linear", limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) | ||||
|         result = s.interpolate(method="linear", limit_direction="forward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit_direction="backward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit_bad_direction(self): | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||||
|  | ||||
|         msg = ( | ||||
|             r"Invalid limit_direction: expecting one of \['forward', " | ||||
|             r"'backward', 'both'\], got 'abc'" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="linear", limit=2, limit_direction="abc") | ||||
|  | ||||
|         # raises an error even if no limit is specified. | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="linear", limit_direction="abc") | ||||
|  | ||||
|     # limit_area introduced GH #16284 | ||||
|     def test_interp_limit_area(self): | ||||
|         # These tests are for issue #9218 -- fill NaNs in both directions. | ||||
|         s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) | ||||
|  | ||||
|         expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit_area="inside") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series( | ||||
|             [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] | ||||
|         ) | ||||
|         result = s.interpolate(method="linear", limit_area="inside", limit=1) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) | ||||
|         result = s.interpolate( | ||||
|             method="linear", limit_area="inside", limit_direction="both", limit=1 | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) | ||||
|         result = s.interpolate(method="linear", limit_area="outside") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series( | ||||
|             [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] | ||||
|         ) | ||||
|         result = s.interpolate(method="linear", limit_area="outside", limit=1) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) | ||||
|         result = s.interpolate( | ||||
|             method="linear", limit_area="outside", limit_direction="both", limit=1 | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) | ||||
|         result = s.interpolate( | ||||
|             method="linear", limit_area="outside", limit_direction="backward" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # raises an error even if limit type is wrong. | ||||
|         msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="linear", limit_area="abc") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "method, limit_direction, expected", | ||||
|         [ | ||||
|             ("pad", "backward", "forward"), | ||||
|             ("ffill", "backward", "forward"), | ||||
|             ("backfill", "forward", "backward"), | ||||
|             ("bfill", "forward", "backward"), | ||||
|             ("pad", "both", "forward"), | ||||
|             ("ffill", "both", "forward"), | ||||
|             ("backfill", "both", "backward"), | ||||
|             ("bfill", "both", "backward"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_interp_limit_direction_raises(self, method, limit_direction, expected): | ||||
|         # https://github.com/pandas-dev/pandas/pull/34746 | ||||
|         s = Series([1, 2, 3]) | ||||
|  | ||||
|         msg = f"`limit_direction` must be '{expected}' for method `{method}`" | ||||
|         msg2 = "Series.interpolate with method=" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|                 s.interpolate(method=method, limit_direction=limit_direction) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, expected_data, kwargs", | ||||
|         ( | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan], | ||||
|                 {"method": "pad", "limit_area": "inside"}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan], | ||||
|                 {"method": "pad", "limit_area": "inside", "limit": 1}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0], | ||||
|                 {"method": "pad", "limit_area": "outside"}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan], | ||||
|                 {"method": "pad", "limit_area": "outside", "limit": 1}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], | ||||
|                 {"method": "pad", "limit_area": "outside", "limit": 1}, | ||||
|             ), | ||||
|             ( | ||||
|                 range(5), | ||||
|                 range(5), | ||||
|                 {"method": "pad", "limit_area": "outside", "limit": 1}, | ||||
|             ), | ||||
|         ), | ||||
|     ) | ||||
|     def test_interp_limit_area_with_pad(self, data, expected_data, kwargs): | ||||
|         # GH26796 | ||||
|  | ||||
|         s = Series(data) | ||||
|         expected = Series(expected_data) | ||||
|         msg = "Series.interpolate with method=pad" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(**kwargs) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, expected_data, kwargs", | ||||
|         ( | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan], | ||||
|                 {"method": "bfill", "limit_area": "inside"}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan], | ||||
|                 {"method": "bfill", "limit_area": "inside", "limit": 1}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], | ||||
|                 {"method": "bfill", "limit_area": "outside"}, | ||||
|             ), | ||||
|             ( | ||||
|                 [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan], | ||||
|                 [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan], | ||||
|                 {"method": "bfill", "limit_area": "outside", "limit": 1}, | ||||
|             ), | ||||
|         ), | ||||
|     ) | ||||
|     def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs): | ||||
|         # GH26796 | ||||
|  | ||||
|         s = Series(data) | ||||
|         expected = Series(expected_data) | ||||
|         msg = "Series.interpolate with method=bfill" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.interpolate(**kwargs) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit_direction(self): | ||||
|         # These tests are for issue #9218 -- fill NaNs in both directions. | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 11]) | ||||
|  | ||||
|         expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="backward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) | ||||
|         result = s.interpolate(method="linear", limit=1, limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Check that this works on a longer series of nans. | ||||
|         s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) | ||||
|  | ||||
|         expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series( | ||||
|             [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0] | ||||
|         ) | ||||
|         result = s.interpolate(method="linear", limit=1, limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit_to_ends(self): | ||||
|         # These test are for issue #10420 -- flow back to beginning. | ||||
|         s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) | ||||
|  | ||||
|         expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="backward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) | ||||
|         result = s.interpolate(method="linear", limit=2, limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit_before_ends(self): | ||||
|         # These test are for issue #11115 -- limit ends properly. | ||||
|         s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) | ||||
|  | ||||
|         expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit=1, limit_direction="forward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit=1, limit_direction="backward") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) | ||||
|         result = s.interpolate(method="linear", limit=1, limit_direction="both") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_all_good(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, 2, 3]) | ||||
|         result = s.interpolate(method="polynomial", order=1) | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|         # non-scipy | ||||
|         result = s.interpolate() | ||||
|         tm.assert_series_equal(result, s) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))] | ||||
|     ) | ||||
|     def test_interp_multiIndex(self, check_scipy): | ||||
|         idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) | ||||
|         s = Series([1, 2, np.nan], index=idx) | ||||
|  | ||||
|         expected = s.copy() | ||||
|         expected.loc[2] = 2 | ||||
|         result = s.interpolate() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         msg = "Only `method=linear` interpolation is supported on MultiIndexes" | ||||
|         if check_scipy: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 s.interpolate(method="polynomial", order=1) | ||||
|  | ||||
|     def test_interp_nonmono_raise(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, np.nan, 3], index=[0, 2, 1]) | ||||
|         msg = "krogh interpolation requires that the index be monotonic" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="krogh") | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["nearest", "pad"]) | ||||
|     def test_interp_datetime64(self, method, tz_naive_fixture): | ||||
|         pytest.importorskip("scipy") | ||||
|         df = Series( | ||||
|             [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) | ||||
|         ) | ||||
|         warn = None if method == "nearest" else FutureWarning | ||||
|         msg = "Series.interpolate with method=pad is deprecated" | ||||
|         with tm.assert_produces_warning(warn, match=msg): | ||||
|             result = df.interpolate(method=method) | ||||
|         if warn is not None: | ||||
|             # check the "use ffill instead" is equivalent | ||||
|             alt = df.ffill() | ||||
|             tm.assert_series_equal(result, alt) | ||||
|  | ||||
|         expected = Series( | ||||
|             [1.0, 1.0, 3.0], | ||||
|             index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_pad_datetime64tz_values(self): | ||||
|         # GH#27628 missing.interpolate_2d should handle datetimetz values | ||||
|         dti = date_range("2015-04-05", periods=3, tz="US/Central") | ||||
|         ser = Series(dti) | ||||
|         ser[1] = pd.NaT | ||||
|  | ||||
|         msg = "Series.interpolate with method=pad is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = ser.interpolate(method="pad") | ||||
|         # check the "use ffill instead" is equivalent | ||||
|         alt = ser.ffill() | ||||
|         tm.assert_series_equal(result, alt) | ||||
|  | ||||
|         expected = Series(dti) | ||||
|         expected[1] = expected[0] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interp_limit_no_nans(self): | ||||
|         # GH 7173 | ||||
|         s = Series([1.0, 2.0, 3.0]) | ||||
|         result = s.interpolate(limit=1) | ||||
|         expected = s | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["polynomial", "spline"]) | ||||
|     def test_no_order(self, method): | ||||
|         # see GH-10633, GH-24014 | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([0, 1, np.nan, 3]) | ||||
|         msg = "You must specify the order of the spline or polynomial" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method=method) | ||||
|  | ||||
|     @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) | ||||
|     def test_interpolate_spline_invalid_order(self, order): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([0, 1, np.nan, 3]) | ||||
|         msg = "order needs to be specified and greater than 0" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.interpolate(method="spline", order=order) | ||||
|  | ||||
|     def test_spline(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) | ||||
|         result = s.interpolate(method="spline", order=1) | ||||
|         expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_spline_extrapolate(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) | ||||
|         result3 = s.interpolate(method="spline", order=1, ext=3) | ||||
|         expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) | ||||
|         tm.assert_series_equal(result3, expected3) | ||||
|  | ||||
|         result1 = s.interpolate(method="spline", order=1, ext=0) | ||||
|         expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) | ||||
|         tm.assert_series_equal(result1, expected1) | ||||
|  | ||||
|     def test_spline_smooth(self): | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) | ||||
|         assert ( | ||||
|             s.interpolate(method="spline", order=3, s=0)[5] | ||||
|             != s.interpolate(method="spline", order=3)[5] | ||||
|         ) | ||||
|  | ||||
|     def test_spline_interpolation(self): | ||||
|         # Explicit cast to float to avoid implicit cast when setting np.nan | ||||
|         pytest.importorskip("scipy") | ||||
|         s = Series(np.arange(10) ** 2, dtype="float") | ||||
|         s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan | ||||
|         result1 = s.interpolate(method="spline", order=1) | ||||
|         expected1 = s.interpolate(method="spline", order=1) | ||||
|         tm.assert_series_equal(result1, expected1) | ||||
|  | ||||
|     def test_interp_timedelta64(self): | ||||
|         # GH 6424 | ||||
|         df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) | ||||
|         result = df.interpolate(method="time") | ||||
|         expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # test for non uniform spacing | ||||
|         df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) | ||||
|         result = df.interpolate(method="time") | ||||
|         expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_series_interpolate_method_values(self): | ||||
|         # GH#1646 | ||||
|         rng = date_range("1/1/2000", "1/20/2000", freq="D") | ||||
|         ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) | ||||
|  | ||||
|         ts[::2] = np.nan | ||||
|  | ||||
|         result = ts.interpolate(method="values") | ||||
|         exp = ts.interpolate() | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     def test_series_interpolate_intraday(self): | ||||
|         # #1698 | ||||
|         index = date_range("1/1/2012", periods=4, freq="12D") | ||||
|         ts = Series([0, 12, 24, 36], index) | ||||
|         new_index = index.append(index + pd.DateOffset(days=1)).sort_values() | ||||
|  | ||||
|         exp = ts.reindex(new_index).interpolate(method="time") | ||||
|  | ||||
|         index = date_range("1/1/2012", periods=4, freq="12h") | ||||
|         ts = Series([0, 12, 24, 36], index) | ||||
|         new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() | ||||
|         result = ts.reindex(new_index).interpolate(method="time") | ||||
|  | ||||
|         tm.assert_numpy_array_equal(result.values, exp.values) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ind", | ||||
|         [ | ||||
|             ["a", "b", "c", "d"], | ||||
|             pd.period_range(start="2019-01-01", periods=4), | ||||
|             pd.interval_range(start=0, end=4), | ||||
|         ], | ||||
|     ) | ||||
|     def test_interp_non_timedelta_index(self, interp_methods_ind, ind): | ||||
|         # gh 21662 | ||||
|         df = pd.DataFrame([0, 1, np.nan, 3], index=ind) | ||||
|  | ||||
|         method, kwargs = interp_methods_ind | ||||
|         if method == "pchip": | ||||
|             pytest.importorskip("scipy") | ||||
|  | ||||
|         if method == "linear": | ||||
|             result = df[0].interpolate(**kwargs) | ||||
|             expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|         else: | ||||
|             expected_error = ( | ||||
|                 "Index column must be numeric or datetime type when " | ||||
|                 f"using {method} method other than linear. " | ||||
|                 "Try setting a numeric or datetime index column before " | ||||
|                 "interpolating." | ||||
|             ) | ||||
|             with pytest.raises(ValueError, match=expected_error): | ||||
|                 df[0].interpolate(method=method, **kwargs) | ||||
|  | ||||
|     def test_interpolate_timedelta_index(self, request, interp_methods_ind): | ||||
|         """ | ||||
|         Tests for non numerical index types  - object, period, timedelta | ||||
|         Note that all methods except time, index, nearest and values | ||||
|         are tested here. | ||||
|         """ | ||||
|         # gh 21662 | ||||
|         pytest.importorskip("scipy") | ||||
|         ind = pd.timedelta_range(start=1, periods=4) | ||||
|         df = pd.DataFrame([0, 1, np.nan, 3], index=ind) | ||||
|  | ||||
|         method, kwargs = interp_methods_ind | ||||
|  | ||||
|         if method in {"cubic", "zero"}: | ||||
|             request.applymarker( | ||||
|                 pytest.mark.xfail( | ||||
|                     reason=f"{method} interpolation is not supported for TimedeltaIndex" | ||||
|                 ) | ||||
|             ) | ||||
|         result = df[0].interpolate(method=method, **kwargs) | ||||
|         expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ascending, expected_values", | ||||
|         [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], | ||||
|     ) | ||||
|     def test_interpolate_unsorted_index(self, ascending, expected_values): | ||||
|         # GH 21037 | ||||
|         ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) | ||||
|         result = ts.sort_index(ascending=ascending).interpolate(method="index") | ||||
|         expected = Series(data=expected_values, index=expected_values, dtype=float) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_interpolate_asfreq_raises(self): | ||||
|         ser = Series(["a", None, "b"], dtype=object) | ||||
|         msg2 = "Series.interpolate with object dtype" | ||||
|         msg = "Invalid fill method" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|                 ser.interpolate(method="asfreq") | ||||
|  | ||||
|     def test_interpolate_fill_value(self): | ||||
|         # GH#54920 | ||||
|         pytest.importorskip("scipy") | ||||
|         ser = Series([np.nan, 0, 1, np.nan, 3, np.nan]) | ||||
|         result = ser.interpolate(method="nearest", fill_value=0) | ||||
|         expected = Series([np.nan, 0, 1, 1, 3, 0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,26 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestIsMonotonic: | ||||
|     def test_is_monotonic_numeric(self): | ||||
|         ser = Series(np.random.default_rng(2).integers(0, 10, size=1000)) | ||||
|         assert not ser.is_monotonic_increasing | ||||
|         ser = Series(np.arange(1000)) | ||||
|         assert ser.is_monotonic_increasing is True | ||||
|         assert ser.is_monotonic_increasing is True | ||||
|         ser = Series(np.arange(1000, 0, -1)) | ||||
|         assert ser.is_monotonic_decreasing is True | ||||
|  | ||||
|     def test_is_monotonic_dt64(self): | ||||
|         ser = Series(date_range("20130101", periods=10)) | ||||
|         assert ser.is_monotonic_increasing is True | ||||
|         assert ser.is_monotonic_increasing is True | ||||
|  | ||||
|         ser = Series(list(reversed(ser))) | ||||
|         assert ser.is_monotonic_increasing is False | ||||
|         assert ser.is_monotonic_decreasing is True | ||||
| @ -0,0 +1,40 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Series | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, expected", | ||||
|     [ | ||||
|         (np.random.default_rng(2).integers(0, 10, size=1000), False), | ||||
|         (np.arange(1000), True), | ||||
|         ([], True), | ||||
|         ([np.nan], True), | ||||
|         (["foo", "bar", np.nan], True), | ||||
|         (["foo", "foo", np.nan], False), | ||||
|         (["foo", "bar", np.nan, np.nan], False), | ||||
|     ], | ||||
| ) | ||||
| def test_is_unique(data, expected): | ||||
|     # GH#11946 / GH#25180 | ||||
|     ser = Series(data) | ||||
|     assert ser.is_unique is expected | ||||
|  | ||||
|  | ||||
| def test_is_unique_class_ne(capsys): | ||||
|     # GH#20661 | ||||
|     class Foo: | ||||
|         def __init__(self, val) -> None: | ||||
|             self._value = val | ||||
|  | ||||
|         def __ne__(self, other): | ||||
|             raise Exception("NEQ not supported") | ||||
|  | ||||
|     with capsys.disabled(): | ||||
|         li = [Foo(i) for i in range(5)] | ||||
|         ser = Series(li, index=list(range(5))) | ||||
|  | ||||
|     ser.is_unique | ||||
|     captured = capsys.readouterr() | ||||
|     assert len(captured.err) == 0 | ||||
| @ -0,0 +1,252 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core import algorithms | ||||
| from pandas.core.arrays import PeriodArray | ||||
|  | ||||
|  | ||||
| class TestSeriesIsIn: | ||||
|     def test_isin(self): | ||||
|         s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) | ||||
|  | ||||
|         result = s.isin(["A", "C"]) | ||||
|         expected = Series([True, False, True, False, False, False, True, True]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # GH#16012 | ||||
|         # This specific issue has to have a series over 1e6 in len, but the | ||||
|         # comparison array (in_list) must be large enough so that numpy doesn't | ||||
|         # do a manual masking trick that will avoid this issue altogether | ||||
|         s = Series(list("abcdefghijk" * 10**5)) | ||||
|         # If numpy doesn't do the manual comparison/mask, these | ||||
|         # unorderable mixed types are what cause the exception in numpy | ||||
|         in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6 | ||||
|  | ||||
|         assert s.isin(in_list).sum() == 200000 | ||||
|  | ||||
|     def test_isin_with_string_scalar(self): | ||||
|         # GH#4763 | ||||
|         s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) | ||||
|         msg = ( | ||||
|             r"only list-like objects are allowed to be passed to isin\(\), " | ||||
|             r"you passed a `str`" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s.isin("a") | ||||
|  | ||||
|         s = Series(["aaa", "b", "c"]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             s.isin("aaa") | ||||
|  | ||||
|     def test_isin_datetimelike_mismatched_reso(self): | ||||
|         expected = Series([True, True, False, False, False]) | ||||
|  | ||||
|         ser = Series(date_range("jan-01-2013", "jan-05-2013")) | ||||
|  | ||||
|         # fails on dtype conversion in the first place | ||||
|         day_values = np.asarray(ser[0:2].values).astype("datetime64[D]") | ||||
|         result = ser.isin(day_values) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         dta = ser[:2]._values.astype("M8[s]") | ||||
|         result = ser.isin(dta) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_isin_datetimelike_mismatched_reso_list(self): | ||||
|         expected = Series([True, True, False, False, False]) | ||||
|  | ||||
|         ser = Series(date_range("jan-01-2013", "jan-05-2013")) | ||||
|  | ||||
|         dta = ser[:2]._values.astype("M8[s]") | ||||
|         result = ser.isin(list(dta)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_isin_with_i8(self): | ||||
|         # GH#5021 | ||||
|  | ||||
|         expected = Series([True, True, False, False, False]) | ||||
|         expected2 = Series([False, True, False, False, False]) | ||||
|  | ||||
|         # datetime64[ns] | ||||
|         s = Series(date_range("jan-01-2013", "jan-05-2013")) | ||||
|  | ||||
|         result = s.isin(s[0:2]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.isin(s[0:2].values) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.isin([s[1]]) | ||||
|         tm.assert_series_equal(result, expected2) | ||||
|  | ||||
|         result = s.isin([np.datetime64(s[1])]) | ||||
|         tm.assert_series_equal(result, expected2) | ||||
|  | ||||
|         result = s.isin(set(s[0:2])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # timedelta64[ns] | ||||
|         s = Series(pd.to_timedelta(range(5), unit="d")) | ||||
|         result = s.isin(s[0:2]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) | ||||
|     def test_isin_empty(self, empty): | ||||
|         # see GH#16991 | ||||
|         s = Series(["a", "b"]) | ||||
|         expected = Series([False, False]) | ||||
|  | ||||
|         result = s.isin(empty) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_isin_read_only(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/37174 | ||||
|         arr = np.array([1, 2, 3]) | ||||
|         arr.setflags(write=False) | ||||
|         s = Series([1, 2, 3]) | ||||
|         result = s.isin(arr) | ||||
|         expected = Series([True, True, True]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [object, None]) | ||||
|     def test_isin_dt64_values_vs_ints(self, dtype): | ||||
|         # GH#36621 dont cast integers to datetimes for isin | ||||
|         dti = date_range("2013-01-01", "2013-01-05") | ||||
|         ser = Series(dti) | ||||
|  | ||||
|         comps = np.asarray([1356998400000000000], dtype=dtype) | ||||
|  | ||||
|         res = dti.isin(comps) | ||||
|         expected = np.array([False] * len(dti), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         res = ser.isin(comps) | ||||
|         tm.assert_series_equal(res, Series(expected)) | ||||
|  | ||||
|         res = pd.core.algorithms.isin(ser, comps) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|     def test_isin_tzawareness_mismatch(self): | ||||
|         dti = date_range("2013-01-01", "2013-01-05") | ||||
|         ser = Series(dti) | ||||
|  | ||||
|         other = dti.tz_localize("UTC") | ||||
|  | ||||
|         res = dti.isin(other) | ||||
|         expected = np.array([False] * len(dti), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         res = ser.isin(other) | ||||
|         tm.assert_series_equal(res, Series(expected)) | ||||
|  | ||||
|         res = pd.core.algorithms.isin(ser, other) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|     def test_isin_period_freq_mismatch(self): | ||||
|         dti = date_range("2013-01-01", "2013-01-05") | ||||
|         pi = dti.to_period("M") | ||||
|         ser = Series(pi) | ||||
|  | ||||
|         # We construct another PeriodIndex with the same i8 values | ||||
|         #  but different dtype | ||||
|         dtype = dti.to_period("Y").dtype | ||||
|         other = PeriodArray._simple_new(pi.asi8, dtype=dtype) | ||||
|  | ||||
|         res = pi.isin(other) | ||||
|         expected = np.array([False] * len(pi), dtype=bool) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|         res = ser.isin(other) | ||||
|         tm.assert_series_equal(res, Series(expected)) | ||||
|  | ||||
|         res = pd.core.algorithms.isin(ser, other) | ||||
|         tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]]) | ||||
|     def test_isin_float_in_int_series(self, values): | ||||
|         # GH#19356 GH#21804 | ||||
|         ser = Series(values) | ||||
|         result = ser.isin([-9, -0.5]) | ||||
|         expected = Series([True, False]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,values,expected", | ||||
|         [ | ||||
|             ([0, 1, 0], [1], [False, True, False]), | ||||
|             ([0, 1, 0], [1, pd.NA], [False, True, False]), | ||||
|             ([0, pd.NA, 0], [1, 0], [True, False, True]), | ||||
|             ([0, 1, pd.NA], [1, pd.NA], [False, True, True]), | ||||
|             ([0, 1, pd.NA], [1, np.nan], [False, True, False]), | ||||
|             ([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_isin_masked_types(self, dtype, data, values, expected): | ||||
|         # GH#42405 | ||||
|         ser = Series(data, dtype=dtype) | ||||
|  | ||||
|         result = ser.isin(values) | ||||
|         expected = Series(expected, dtype="boolean") | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch): | ||||
|     # https://github.com/pandas-dev/pandas/issues/37094 | ||||
|     # combination of object dtype for the values | ||||
|     # and > _MINIMUM_COMP_ARR_LEN elements | ||||
|     min_isin_comp = 5 | ||||
|     ser = Series([1, 2, np.nan] * min_isin_comp) | ||||
|     with monkeypatch.context() as m: | ||||
|         m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp) | ||||
|         result = ser.isin({"foo", "bar"}) | ||||
|     expected = Series([False] * 3 * min_isin_comp) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "array,expected", | ||||
|     [ | ||||
|         ( | ||||
|             [0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j], | ||||
|             Series([False, True, True, False, True, True, True], dtype=bool), | ||||
|         ) | ||||
|     ], | ||||
| ) | ||||
| def test_isin_complex_numbers(array, expected): | ||||
|     # GH 17927 | ||||
|     result = Series(array).isin([1j, 1 + 1j, 1 + 2j]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,is_in", | ||||
|     [([1, [2]], [1]), (["simple str", [{"values": 3}]], ["simple str"])], | ||||
| ) | ||||
| def test_isin_filtering_with_mixed_object_types(data, is_in): | ||||
|     # GH 20883 | ||||
|  | ||||
|     ser = Series(data) | ||||
|     result = ser.isin(is_in) | ||||
|     expected = Series([True, False]) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("data", [[1, 2, 3], [1.0, 2.0, 3.0]]) | ||||
| @pytest.mark.parametrize("isin", [[1, 2], [1.0, 2.0]]) | ||||
| def test_isin_filtering_on_iterable(data, isin): | ||||
|     # GH 50234 | ||||
|  | ||||
|     ser = Series(data) | ||||
|     result = ser.isin(i for i in isin) | ||||
|     expected_result = Series([True, True, False]) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected_result) | ||||
| @ -0,0 +1,35 @@ | ||||
| """ | ||||
| We also test Series.notna in this file. | ||||
| """ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Period, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestIsna: | ||||
|     def test_isna_period_dtype(self): | ||||
|         # GH#13737 | ||||
|         ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")]) | ||||
|  | ||||
|         expected = Series([False, True]) | ||||
|  | ||||
|         result = ser.isna() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.notna() | ||||
|         tm.assert_series_equal(result, ~expected) | ||||
|  | ||||
|     def test_isna(self): | ||||
|         ser = Series([0, 5.4, 3, np.nan, -0.001]) | ||||
|         expected = Series([False, False, False, True, False]) | ||||
|         tm.assert_series_equal(ser.isna(), expected) | ||||
|         tm.assert_series_equal(ser.notna(), ~expected) | ||||
|  | ||||
|         ser = Series(["hi", "", np.nan]) | ||||
|         expected = Series([False, False, True]) | ||||
|         tm.assert_series_equal(ser.isna(), expected) | ||||
|         tm.assert_series_equal(ser.notna(), ~expected) | ||||
| @ -0,0 +1,59 @@ | ||||
| """ | ||||
| Series.item method, mainly testing that we get python scalars as opposed to | ||||
| numpy scalars. | ||||
| """ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestItem: | ||||
|     def test_item(self): | ||||
|         # We are testing that we get python scalars as opposed to numpy scalars | ||||
|         ser = Series([1]) | ||||
|         result = ser.item() | ||||
|         assert result == 1 | ||||
|         assert result == ser.iloc[0] | ||||
|         assert isinstance(result, int)  # i.e. not np.int64 | ||||
|  | ||||
|         ser = Series([0.5], index=[3]) | ||||
|         result = ser.item() | ||||
|         assert isinstance(result, float) | ||||
|         assert result == 0.5 | ||||
|  | ||||
|         ser = Series([1, 2]) | ||||
|         msg = "can only convert an array of size 1" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.item() | ||||
|  | ||||
|         dti = date_range("2016-01-01", periods=2) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             dti.item() | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Series(dti).item() | ||||
|  | ||||
|         val = dti[:1].item() | ||||
|         assert isinstance(val, Timestamp) | ||||
|         val = Series(dti)[:1].item() | ||||
|         assert isinstance(val, Timestamp) | ||||
|  | ||||
|         tdi = dti - dti | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             tdi.item() | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             Series(tdi).item() | ||||
|  | ||||
|         val = tdi[:1].item() | ||||
|         assert isinstance(val, Timedelta) | ||||
|         val = Series(tdi)[:1].item() | ||||
|         assert isinstance(val, Timedelta) | ||||
|  | ||||
|         # Case where ser[0] would not work | ||||
|         ser = Series(dti, index=[5, 6]) | ||||
|         val = ser.iloc[:1].item() | ||||
|         assert val == dti[0] | ||||
| @ -0,0 +1,604 @@ | ||||
| from collections import ( | ||||
|     Counter, | ||||
|     defaultdict, | ||||
| ) | ||||
| from decimal import Decimal | ||||
| import math | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     bdate_range, | ||||
|     date_range, | ||||
|     isna, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_series_map_box_timedelta(): | ||||
|     # GH#11349 | ||||
|     ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) | ||||
|  | ||||
|     def f(x): | ||||
|         return x.total_seconds() | ||||
|  | ||||
|     ser.map(f) | ||||
|  | ||||
|  | ||||
| def test_map_callable(datetime_series): | ||||
|     with np.errstate(all="ignore"): | ||||
|         tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series)) | ||||
|  | ||||
|     # map function element-wise | ||||
|     tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series)) | ||||
|  | ||||
|     # empty series | ||||
|     s = Series(dtype=object, name="foo", index=Index([], name="bar")) | ||||
|     rs = s.map(lambda x: x) | ||||
|     tm.assert_series_equal(s, rs) | ||||
|  | ||||
|     # check all metadata (GH 9322) | ||||
|     assert s is not rs | ||||
|     assert s.index is rs.index | ||||
|     assert s.dtype == rs.dtype | ||||
|     assert s.name == rs.name | ||||
|  | ||||
|     # index but no data | ||||
|     s = Series(index=[1, 2, 3], dtype=np.float64) | ||||
|     rs = s.map(lambda x: x) | ||||
|     tm.assert_series_equal(s, rs) | ||||
|  | ||||
|  | ||||
| def test_map_same_length_inference_bug(): | ||||
|     s = Series([1, 2]) | ||||
|  | ||||
|     def f(x): | ||||
|         return (x, x + 1) | ||||
|  | ||||
|     s = Series([1, 2, 3]) | ||||
|     result = s.map(f) | ||||
|     expected = Series([(1, 2), (2, 3), (3, 4)]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     s = Series(["foo,bar"]) | ||||
|     result = s.map(lambda x: x.split(",")) | ||||
|     expected = Series([("foo", "bar")]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_map_box_timestamps(): | ||||
|     # GH#2689, GH#2627 | ||||
|     ser = Series(date_range("1/1/2000", periods=3)) | ||||
|  | ||||
|     def func(x): | ||||
|         return (x.hour, x.day, x.month) | ||||
|  | ||||
|     result = ser.map(func) | ||||
|     expected = Series([(0, 1, 1), (0, 2, 1), (0, 3, 1)]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_series_stringdtype(any_string_dtype, using_infer_string): | ||||
|     # map test on StringDType, GH#40823 | ||||
|     ser1 = Series( | ||||
|         data=["cat", "dog", "rabbit"], | ||||
|         index=["id1", "id2", "id3"], | ||||
|         dtype=any_string_dtype, | ||||
|     ) | ||||
|     ser2 = Series(["id3", "id2", "id1", "id7000"], dtype=any_string_dtype) | ||||
|     result = ser2.map(ser1) | ||||
|  | ||||
|     item = pd.NA | ||||
|     if ser2.dtype == object: | ||||
|         item = np.nan | ||||
|  | ||||
|     expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype) | ||||
|     if using_infer_string and any_string_dtype == "object": | ||||
|         expected = expected.astype("str") | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, expected_dtype", | ||||
|     [(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], "str")], | ||||
| ) | ||||
| def test_map_categorical_with_nan_values(data, expected_dtype): | ||||
|     # GH 20714 bug fixed in: GH 24275 | ||||
|     def func(val): | ||||
|         return val.split("-")[0] | ||||
|  | ||||
|     s = Series(data, dtype="category") | ||||
|  | ||||
|     result = s.map(func, na_action="ignore") | ||||
|     expected = Series(["1", "1", np.nan], dtype=expected_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_empty_integer_series(): | ||||
|     # GH52384 | ||||
|     s = Series([], dtype=int) | ||||
|     result = s.map(lambda x: x) | ||||
|     tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| def test_map_empty_integer_series_with_datetime_index(): | ||||
|     # GH 21245 | ||||
|     s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int) | ||||
|     result = s.map(lambda x: x) | ||||
|     tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [str, lambda x: str(x)]) | ||||
| def test_map_simple_str_callables_same_as_astype( | ||||
|     string_series, func, using_infer_string | ||||
| ): | ||||
|     # test that we are evaluating row-by-row first | ||||
|     # before vectorized evaluation | ||||
|     result = string_series.map(func) | ||||
|     expected = string_series.astype(str if not using_infer_string else "str") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_list_raises(string_series): | ||||
|     with pytest.raises(TypeError, match="'list' object is not callable"): | ||||
|         string_series.map([lambda x: x]) | ||||
|  | ||||
|  | ||||
| def test_map(): | ||||
|     data = { | ||||
|         "A": [0.0, 1.0, 2.0, 3.0, 4.0], | ||||
|         "B": [0.0, 1.0, 0.0, 1.0, 0.0], | ||||
|         "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], | ||||
|         "D": bdate_range("1/1/2009", periods=5), | ||||
|     } | ||||
|  | ||||
|     source = Series(data["B"], index=data["C"]) | ||||
|     target = Series(data["C"][:4], index=data["D"][:4]) | ||||
|  | ||||
|     merged = target.map(source) | ||||
|  | ||||
|     for k, v in merged.items(): | ||||
|         assert v == source[target[k]] | ||||
|  | ||||
|     # input could be a dict | ||||
|     merged = target.map(source.to_dict()) | ||||
|  | ||||
|     for k, v in merged.items(): | ||||
|         assert v == source[target[k]] | ||||
|  | ||||
|  | ||||
| def test_map_datetime(datetime_series): | ||||
|     # function | ||||
|     result = datetime_series.map(lambda x: x * 2) | ||||
|     tm.assert_series_equal(result, datetime_series * 2) | ||||
|  | ||||
|  | ||||
| def test_map_category(): | ||||
|     # GH 10324 | ||||
|     a = Series([1, 2, 3, 4]) | ||||
|     b = Series(["even", "odd", "even", "odd"], dtype="category") | ||||
|     c = Series(["even", "odd", "even", "odd"]) | ||||
|  | ||||
|     exp = Series(["odd", "even", "odd", np.nan], dtype="category") | ||||
|     tm.assert_series_equal(a.map(b), exp) | ||||
|     exp = Series(["odd", "even", "odd", np.nan]) | ||||
|     tm.assert_series_equal(a.map(c), exp) | ||||
|  | ||||
|  | ||||
| def test_map_category_numeric(): | ||||
|     a = Series(["a", "b", "c", "d"]) | ||||
|     b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) | ||||
|     c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) | ||||
|  | ||||
|     exp = Series([np.nan, 1, 2, 3]) | ||||
|     tm.assert_series_equal(a.map(b), exp) | ||||
|     exp = Series([np.nan, 1, 2, 3]) | ||||
|     tm.assert_series_equal(a.map(c), exp) | ||||
|  | ||||
|  | ||||
| def test_map_category_string(): | ||||
|     a = Series(["a", "b", "c", "d"]) | ||||
|     b = Series( | ||||
|         ["B", "C", "D", "E"], | ||||
|         dtype="category", | ||||
|         index=pd.CategoricalIndex(["b", "c", "d", "e"]), | ||||
|     ) | ||||
|     c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) | ||||
|  | ||||
|     exp = Series( | ||||
|         pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"]) | ||||
|     ) | ||||
|     tm.assert_series_equal(a.map(b), exp) | ||||
|     exp = Series([np.nan, "B", "C", "D"]) | ||||
|     tm.assert_series_equal(a.map(c), exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning") | ||||
| def test_map_empty(request, index): | ||||
|     if isinstance(index, MultiIndex): | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail( | ||||
|                 reason="Initializing a Series from a MultiIndex is not supported" | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|     s = Series(index) | ||||
|     result = s.map({}) | ||||
|  | ||||
|     expected = Series(np.nan, index=s.index) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_compat(): | ||||
|     # related GH 8024 | ||||
|     s = Series([True, True, False], index=[1, 2, 3]) | ||||
|     result = s.map({True: "foo", False: "bar"}) | ||||
|     expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_int(): | ||||
|     left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) | ||||
|     right = Series({1: 11, 2: 22, 3: 33}) | ||||
|  | ||||
|     assert left.dtype == np.float64 | ||||
|     assert issubclass(right.dtype.type, np.integer) | ||||
|  | ||||
|     merged = left.map(right) | ||||
|     assert merged.dtype == np.float64 | ||||
|     assert isna(merged["d"]) | ||||
|     assert not isna(merged["c"]) | ||||
|  | ||||
|  | ||||
| def test_map_type_inference(): | ||||
|     s = Series(range(3)) | ||||
|     s2 = s.map(lambda x: np.where(x == 0, 0, 1)) | ||||
|     assert issubclass(s2.dtype.type, np.integer) | ||||
|  | ||||
|  | ||||
| def test_map_decimal(string_series): | ||||
|     result = string_series.map(lambda x: Decimal(str(x))) | ||||
|     assert result.dtype == np.object_ | ||||
|     assert isinstance(result.iloc[0], Decimal) | ||||
|  | ||||
|  | ||||
| def test_map_na_exclusion(): | ||||
|     s = Series([1.5, np.nan, 3, np.nan, 5]) | ||||
|  | ||||
|     result = s.map(lambda x: x * 2, na_action="ignore") | ||||
|     exp = s * 2 | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_map_dict_with_tuple_keys(): | ||||
|     """ | ||||
|     Due to new MultiIndex-ing behaviour in v0.14.0, | ||||
|     dicts with tuple keys passed to map were being | ||||
|     converted to a multi-index, preventing tuple values | ||||
|     from being mapped properly. | ||||
|     """ | ||||
|     # GH 18496 | ||||
|     df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) | ||||
|     label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} | ||||
|  | ||||
|     df["labels"] = df["a"].map(label_mappings) | ||||
|     df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index) | ||||
|     # All labels should be filled now | ||||
|     tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) | ||||
|  | ||||
|  | ||||
| def test_map_counter(): | ||||
|     s = Series(["a", "b", "c"], index=[1, 2, 3]) | ||||
|     counter = Counter() | ||||
|     counter["b"] = 5 | ||||
|     counter["c"] += 1 | ||||
|     result = s.map(counter) | ||||
|     expected = Series([0, 5, 1], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_defaultdict(): | ||||
|     s = Series([1, 2, 3], index=["a", "b", "c"]) | ||||
|     default_dict = defaultdict(lambda: "blank") | ||||
|     default_dict[1] = "stuff" | ||||
|     result = s.map(default_dict) | ||||
|     expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_dict_na_key(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/17648 | ||||
|     # Checks that np.nan key is appropriately mapped | ||||
|     s = Series([1, 2, np.nan]) | ||||
|     expected = Series(["a", "b", "c"]) | ||||
|     result = s.map({1: "a", 2: "b", np.nan: "c"}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_action", [None, "ignore"]) | ||||
| def test_map_defaultdict_na_key(na_action): | ||||
|     # GH 48813 | ||||
|     s = Series([1, 2, np.nan]) | ||||
|     default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"}) | ||||
|     result = s.map(default_map, na_action=na_action) | ||||
|     expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_action", [None, "ignore"]) | ||||
| def test_map_defaultdict_missing_key(na_action): | ||||
|     # GH 48813 | ||||
|     s = Series([1, 2, np.nan]) | ||||
|     default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"}) | ||||
|     result = s.map(default_map, na_action=na_action) | ||||
|     expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_action", [None, "ignore"]) | ||||
| def test_map_defaultdict_unmutated(na_action): | ||||
|     # GH 48813 | ||||
|     s = Series([1, 2, np.nan]) | ||||
|     default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"}) | ||||
|     expected_default_map = default_map.copy() | ||||
|     s.map(default_map, na_action=na_action) | ||||
|     assert default_map == expected_default_map | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("arg_func", [dict, Series]) | ||||
| def test_map_dict_ignore_na(arg_func): | ||||
|     # GH#47527 | ||||
|     mapping = arg_func({1: 10, np.nan: 42}) | ||||
|     ser = Series([1, np.nan, 2]) | ||||
|     result = ser.map(mapping, na_action="ignore") | ||||
|     expected = Series([10, np.nan, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_defaultdict_ignore_na(): | ||||
|     # GH#47527 | ||||
|     mapping = defaultdict(int, {1: 10, np.nan: 42}) | ||||
|     ser = Series([1, np.nan, 2]) | ||||
|     result = ser.map(mapping) | ||||
|     expected = Series([10, 42, 0]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "na_action, expected", | ||||
|     [(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))], | ||||
| ) | ||||
| def test_map_categorical_na_ignore(na_action, expected): | ||||
|     # GH#47527 | ||||
|     values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2]) | ||||
|     ser = Series(values) | ||||
|     result = ser.map({1: 10, np.nan: 42}, na_action=na_action) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_dict_subclass_with_missing(): | ||||
|     """ | ||||
|     Test Series.map with a dictionary subclass that defines __missing__, | ||||
|     i.e. sets a default value (GH #15999). | ||||
|     """ | ||||
|  | ||||
|     class DictWithMissing(dict): | ||||
|         def __missing__(self, key): | ||||
|             return "missing" | ||||
|  | ||||
|     s = Series([1, 2, 3]) | ||||
|     dictionary = DictWithMissing({3: "three"}) | ||||
|     result = s.map(dictionary) | ||||
|     expected = Series(["missing", "missing", "three"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_dict_subclass_without_missing(): | ||||
|     class DictWithoutMissing(dict): | ||||
|         pass | ||||
|  | ||||
|     s = Series([1, 2, 3]) | ||||
|     dictionary = DictWithoutMissing({3: "three"}) | ||||
|     result = s.map(dictionary) | ||||
|     expected = Series([np.nan, np.nan, "three"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_abc_mapping(non_dict_mapping_subclass): | ||||
|     # https://github.com/pandas-dev/pandas/issues/29733 | ||||
|     # Check collections.abc.Mapping support as mapper for Series.map | ||||
|     s = Series([1, 2, 3]) | ||||
|     not_a_dictionary = non_dict_mapping_subclass({3: "three"}) | ||||
|     result = s.map(not_a_dictionary) | ||||
|     expected = Series([np.nan, np.nan, "three"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_abc_mapping_with_missing(non_dict_mapping_subclass): | ||||
|     # https://github.com/pandas-dev/pandas/issues/29733 | ||||
|     # Check collections.abc.Mapping support as mapper for Series.map | ||||
|     class NonDictMappingWithMissing(non_dict_mapping_subclass): | ||||
|         def __missing__(self, key): | ||||
|             return "missing" | ||||
|  | ||||
|     s = Series([1, 2, 3]) | ||||
|     not_a_dictionary = NonDictMappingWithMissing({3: "three"}) | ||||
|     result = s.map(not_a_dictionary) | ||||
|     # __missing__ is a dict concept, not a Mapping concept, | ||||
|     # so it should not change the result! | ||||
|     expected = Series([np.nan, np.nan, "three"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_box_dt64(unit): | ||||
|     vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] | ||||
|     ser = Series(vals).dt.as_unit(unit) | ||||
|     assert ser.dtype == f"datetime64[{unit}]" | ||||
|     # boxed value must be Timestamp instance | ||||
|     res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") | ||||
|     exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_map_box_dt64tz(unit): | ||||
|     vals = [ | ||||
|         pd.Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|         pd.Timestamp("2011-01-02", tz="US/Eastern"), | ||||
|     ] | ||||
|     ser = Series(vals).dt.as_unit(unit) | ||||
|     assert ser.dtype == f"datetime64[{unit}, US/Eastern]" | ||||
|     res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") | ||||
|     exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_map_box_td64(unit): | ||||
|     # timedelta | ||||
|     vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] | ||||
|     ser = Series(vals).dt.as_unit(unit) | ||||
|     assert ser.dtype == f"timedelta64[{unit}]" | ||||
|     res = ser.map(lambda x: f"{type(x).__name__}_{x.days}") | ||||
|     exp = Series(["Timedelta_1", "Timedelta_2"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_map_box_period(): | ||||
|     # period | ||||
|     vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] | ||||
|     ser = Series(vals) | ||||
|     assert ser.dtype == "Period[M]" | ||||
|     res = ser.map(lambda x: f"{type(x).__name__}_{x.freqstr}") | ||||
|     exp = Series(["Period_M", "Period_M"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("na_action", [None, "ignore"]) | ||||
| def test_map_categorical(na_action, using_infer_string): | ||||
|     values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) | ||||
|     s = Series(values, name="XX", index=list("abcdefg")) | ||||
|  | ||||
|     result = s.map(lambda x: x.lower(), na_action=na_action) | ||||
|     exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) | ||||
|     exp = Series(exp_values, name="XX", index=list("abcdefg")) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|     tm.assert_categorical_equal(result.values, exp_values) | ||||
|  | ||||
|     result = s.map(lambda x: "A", na_action=na_action) | ||||
|     exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|     assert result.dtype == object if not using_infer_string else "str" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "na_action, expected", | ||||
|     ( | ||||
|         [None, Series(["A", "B", "nan"], name="XX")], | ||||
|         [ | ||||
|             "ignore", | ||||
|             Series( | ||||
|                 ["A", "B", np.nan], | ||||
|                 name="XX", | ||||
|                 dtype=pd.CategoricalDtype(list("DCBA"), True), | ||||
|             ), | ||||
|         ], | ||||
|     ), | ||||
| ) | ||||
| def test_map_categorical_na_action(na_action, expected): | ||||
|     dtype = pd.CategoricalDtype(list("DCBA"), ordered=True) | ||||
|     values = pd.Categorical(list("AB") + [np.nan], dtype=dtype) | ||||
|     s = Series(values, name="XX") | ||||
|     result = s.map(str, na_action=na_action) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_map_datetimetz(): | ||||
|     values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo") | ||||
|     s = Series(values, name="XX") | ||||
|  | ||||
|     # keep tz | ||||
|     result = s.map(lambda x: x + pd.offsets.Day()) | ||||
|     exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize( | ||||
|         "Asia/Tokyo" | ||||
|     ) | ||||
|     exp = Series(exp_values, name="XX") | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     result = s.map(lambda x: x.hour) | ||||
|     exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     # not vectorized | ||||
|     def f(x): | ||||
|         if not isinstance(x, pd.Timestamp): | ||||
|             raise ValueError | ||||
|         return str(x.tz) | ||||
|  | ||||
|     result = s.map(f) | ||||
|     exp = Series(["Asia/Tokyo"] * 25, name="XX") | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "vals,mapping,exp", | ||||
|     [ | ||||
|         (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]), | ||||
|         (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3), | ||||
|         (list(range(3)), {0: 42}, [42] + [np.nan] * 3), | ||||
|     ], | ||||
| ) | ||||
| def test_map_missing_mixed(vals, mapping, exp): | ||||
|     # GH20495 | ||||
|     s = Series(vals + [np.nan]) | ||||
|     result = s.map(mapping) | ||||
|     exp = Series(exp) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|  | ||||
| def test_map_scalar_on_date_time_index_aware_series(): | ||||
|     # GH 25959 | ||||
|     # Calling map on a localized time series should not cause an error | ||||
|     series = Series( | ||||
|         np.arange(10, dtype=np.float64), | ||||
|         index=date_range("2020-01-01", periods=10, tz="UTC"), | ||||
|         name="ts", | ||||
|     ) | ||||
|     result = Series(series.index).map(lambda x: 1) | ||||
|     tm.assert_series_equal(result, Series(np.ones(len(series)), dtype="int64")) | ||||
|  | ||||
|  | ||||
| def test_map_float_to_string_precision(): | ||||
|     # GH 13228 | ||||
|     ser = Series(1 / 3) | ||||
|     result = ser.map(lambda val: str(val)).to_dict() | ||||
|     expected = {0: "0.3333333333333333"} | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| def test_map_to_timedelta(): | ||||
|     list_of_valid_strings = ["00:00:01", "00:00:02"] | ||||
|     a = pd.to_timedelta(list_of_valid_strings) | ||||
|     b = Series(list_of_valid_strings).map(pd.to_timedelta) | ||||
|     tm.assert_series_equal(Series(a), b) | ||||
|  | ||||
|     list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] | ||||
|  | ||||
|     a = pd.to_timedelta(list_of_strings) | ||||
|     ser = Series(list_of_strings) | ||||
|     b = ser.map(pd.to_timedelta) | ||||
|     tm.assert_series_equal(Series(a), b) | ||||
|  | ||||
|  | ||||
| def test_map_type(): | ||||
|     # GH 46719 | ||||
|     s = Series([3, "string", float], index=["a", "b", "c"]) | ||||
|     result = s.map(type) | ||||
|     expected = Series([int, str, type], index=["a", "b", "c"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,82 @@ | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestMatmul: | ||||
|     def test_matmul(self): | ||||
|         # matmul test is for GH#10259 | ||||
|         a = Series( | ||||
|             np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"] | ||||
|         ) | ||||
|         b = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 4)), | ||||
|             index=["1", "2", "3"], | ||||
|             columns=["p", "q", "r", "s"], | ||||
|         ).T | ||||
|  | ||||
|         # Series @ DataFrame -> Series | ||||
|         result = operator.matmul(a, b) | ||||
|         expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # DataFrame @ Series -> Series | ||||
|         result = operator.matmul(b.T, a) | ||||
|         expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Series @ Series -> scalar | ||||
|         result = operator.matmul(a, a) | ||||
|         expected = np.dot(a.values, a.values) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # GH#21530 | ||||
|         # vector (1D np.array) @ Series (__rmatmul__) | ||||
|         result = operator.matmul(a.values, a) | ||||
|         expected = np.dot(a.values, a.values) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # GH#21530 | ||||
|         # vector (1D list) @ Series (__rmatmul__) | ||||
|         result = operator.matmul(a.values.tolist(), a) | ||||
|         expected = np.dot(a.values, a.values) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # GH#21530 | ||||
|         # matrix (2D np.array) @ Series (__rmatmul__) | ||||
|         result = operator.matmul(b.T.values, a) | ||||
|         expected = np.dot(b.T.values, a.values) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # GH#21530 | ||||
|         # matrix (2D nested lists) @ Series (__rmatmul__) | ||||
|         result = operator.matmul(b.T.values.tolist(), a) | ||||
|         expected = np.dot(b.T.values, a.values) | ||||
|         tm.assert_almost_equal(result, expected) | ||||
|  | ||||
|         # mixed dtype DataFrame @ Series | ||||
|         a["p"] = int(a.p) | ||||
|         result = operator.matmul(b.T, a) | ||||
|         expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # different dtypes DataFrame @ Series | ||||
|         a = a.astype(int) | ||||
|         result = operator.matmul(b.T, a) | ||||
|         expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)" | ||||
|         # exception raised is of type Exception | ||||
|         with pytest.raises(Exception, match=msg): | ||||
|             a.dot(a.values[:3]) | ||||
|         msg = "matrices are not aligned" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             a.dot(b.T) | ||||
| @ -0,0 +1,248 @@ | ||||
| """ | ||||
| Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" | ||||
| but are implicitly also testing nsmallest_foo. | ||||
| """ | ||||
| from itertools import product | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
| main_dtypes = [ | ||||
|     "datetime", | ||||
|     "datetimetz", | ||||
|     "timedelta", | ||||
|     "int8", | ||||
|     "int16", | ||||
|     "int32", | ||||
|     "int64", | ||||
|     "float32", | ||||
|     "float64", | ||||
|     "uint8", | ||||
|     "uint16", | ||||
|     "uint32", | ||||
|     "uint64", | ||||
| ] | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def s_main_dtypes(): | ||||
|     """ | ||||
|     A DataFrame with many dtypes | ||||
|  | ||||
|     * datetime | ||||
|     * datetimetz | ||||
|     * timedelta | ||||
|     * [u]int{8,16,32,64} | ||||
|     * float{32,64} | ||||
|  | ||||
|     The columns are the name of the dtype. | ||||
|     """ | ||||
|     df = pd.DataFrame( | ||||
|         { | ||||
|             "datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]), | ||||
|             "datetimetz": pd.to_datetime( | ||||
|                 ["2003", "2002", "2001", "2002", "2005"] | ||||
|             ).tz_localize("US/Eastern"), | ||||
|             "timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     for dtype in [ | ||||
|         "int8", | ||||
|         "int16", | ||||
|         "int32", | ||||
|         "int64", | ||||
|         "float32", | ||||
|         "float64", | ||||
|         "uint8", | ||||
|         "uint16", | ||||
|         "uint32", | ||||
|         "uint64", | ||||
|     ]: | ||||
|         df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) | ||||
|  | ||||
|     return df | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=main_dtypes) | ||||
| def s_main_dtypes_split(request, s_main_dtypes): | ||||
|     """Each series in s_main_dtypes.""" | ||||
|     return s_main_dtypes[request.param] | ||||
|  | ||||
|  | ||||
| def assert_check_nselect_boundary(vals, dtype, method): | ||||
|     # helper function for 'test_boundary_{dtype}' tests | ||||
|     ser = Series(vals, dtype=dtype) | ||||
|     result = getattr(ser, method)(3) | ||||
|     expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1] | ||||
|     expected = ser.loc[expected_idxr] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestSeriesNLargestNSmallest: | ||||
|     @pytest.mark.parametrize( | ||||
|         "r", | ||||
|         [ | ||||
|             Series([3.0, 2, 1, 2, "5"], dtype="object"), | ||||
|             Series([3.0, 2, 1, 2, 5], dtype="object"), | ||||
|             # not supported on some archs | ||||
|             # Series([3., 2, 1, 2, 5], dtype='complex256'), | ||||
|             Series([3.0, 2, 1, 2, 5], dtype="complex128"), | ||||
|             Series(list("abcde")), | ||||
|             Series(list("abcde"), dtype="category"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_nlargest_error(self, r): | ||||
|         dt = r.dtype | ||||
|         msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}" | ||||
|         args = 2, len(r), 0, -1 | ||||
|         methods = r.nlargest, r.nsmallest | ||||
|         for method, arg in product(methods, args): | ||||
|             with pytest.raises(TypeError, match=msg): | ||||
|                 method(arg) | ||||
|  | ||||
|     def test_nsmallest_nlargest(self, s_main_dtypes_split): | ||||
|         # float, int, datetime64 (use i8), timedelts64 (same), | ||||
|         # object that are numbers, object that are strings | ||||
|         ser = s_main_dtypes_split | ||||
|  | ||||
|         tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]]) | ||||
|         tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]]) | ||||
|  | ||||
|         empty = ser.iloc[0:0] | ||||
|         tm.assert_series_equal(ser.nsmallest(0), empty) | ||||
|         tm.assert_series_equal(ser.nsmallest(-1), empty) | ||||
|         tm.assert_series_equal(ser.nlargest(0), empty) | ||||
|         tm.assert_series_equal(ser.nlargest(-1), empty) | ||||
|  | ||||
|         tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values()) | ||||
|         tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values()) | ||||
|         tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]]) | ||||
|         tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]]) | ||||
|  | ||||
|     def test_nlargest_misc(self): | ||||
|         ser = Series([3.0, np.nan, 1, 2, 5]) | ||||
|         result = ser.nlargest() | ||||
|         expected = ser.iloc[[4, 0, 3, 2, 1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         result = ser.nsmallest() | ||||
|         expected = ser.iloc[[2, 3, 0, 4, 1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         msg = 'keep must be either "first", "last"' | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.nsmallest(keep="invalid") | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.nlargest(keep="invalid") | ||||
|  | ||||
|         # GH#15297 | ||||
|         ser = Series([1] * 5, index=[1, 2, 3, 4, 5]) | ||||
|         expected_first = Series([1] * 3, index=[1, 2, 3]) | ||||
|         expected_last = Series([1] * 3, index=[5, 4, 3]) | ||||
|  | ||||
|         result = ser.nsmallest(3) | ||||
|         tm.assert_series_equal(result, expected_first) | ||||
|  | ||||
|         result = ser.nsmallest(3, keep="last") | ||||
|         tm.assert_series_equal(result, expected_last) | ||||
|  | ||||
|         result = ser.nlargest(3) | ||||
|         tm.assert_series_equal(result, expected_first) | ||||
|  | ||||
|         result = ser.nlargest(3, keep="last") | ||||
|         tm.assert_series_equal(result, expected_last) | ||||
|  | ||||
|     @pytest.mark.parametrize("n", range(1, 5)) | ||||
|     def test_nlargest_n(self, n): | ||||
|         # GH 13412 | ||||
|         ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) | ||||
|         result = ser.nlargest(n) | ||||
|         expected = ser.sort_values(ascending=False).head(n) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.nsmallest(n) | ||||
|         expected = ser.sort_values().head(n) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype): | ||||
|         # GH#21426 | ||||
|         dtype_info = np.iinfo(any_int_numpy_dtype) | ||||
|         min_val, max_val = dtype_info.min, dtype_info.max | ||||
|         vals = [min_val, min_val + 1, max_val - 1, max_val] | ||||
|         assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method) | ||||
|  | ||||
|     def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype): | ||||
|         # GH#21426 | ||||
|         dtype_info = np.finfo(float_numpy_dtype) | ||||
|         min_val, max_val = dtype_info.min, dtype_info.max | ||||
|         min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype) | ||||
|         vals = [min_val, min_2nd, max_2nd, max_val] | ||||
|         assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) | ||||
|     def test_nlargest_boundary_datetimelike(self, nselect_method, dtype): | ||||
|         # GH#21426 | ||||
|         # use int64 bounds and +1 to min_val since true minimum is NaT | ||||
|         # (include min_val/NaT at end to maintain same expected_idxr) | ||||
|         dtype_info = np.iinfo("int64") | ||||
|         min_val, max_val = dtype_info.min, dtype_info.max | ||||
|         vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val] | ||||
|         assert_check_nselect_boundary(vals, dtype, nselect_method) | ||||
|  | ||||
|     def test_nlargest_duplicate_keep_all_ties(self): | ||||
|         # see GH#16818 | ||||
|         ser = Series([10, 9, 8, 7, 7, 7, 7, 6]) | ||||
|         result = ser.nlargest(4, keep="all") | ||||
|         expected = Series([10, 9, 8, 7, 7, 7, 7]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.nsmallest(2, keep="all") | ||||
|         expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data,expected", [([True, False], [True]), ([True, False, True, True], [True])] | ||||
|     ) | ||||
|     def test_nlargest_boolean(self, data, expected): | ||||
|         # GH#26154 : ensure True > False | ||||
|         ser = Series(data) | ||||
|         result = ser.nlargest(1) | ||||
|         expected = Series(expected) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nlargest_nullable(self, any_numeric_ea_dtype): | ||||
|         # GH#42816 | ||||
|         dtype = any_numeric_ea_dtype | ||||
|         if dtype.startswith("UInt"): | ||||
|             # Can't cast from negative float to uint on some platforms | ||||
|             arr = np.random.default_rng(2).integers(1, 10, 10) | ||||
|         else: | ||||
|             arr = np.random.default_rng(2).standard_normal(10) | ||||
|         arr = arr.astype(dtype.lower(), copy=False) | ||||
|  | ||||
|         ser = Series(arr.copy(), dtype=dtype) | ||||
|         ser[1] = pd.NA | ||||
|         result = ser.nlargest(5) | ||||
|  | ||||
|         expected = ( | ||||
|             Series(np.delete(arr, 1), index=ser.index.delete(1)) | ||||
|             .nlargest(5) | ||||
|             .astype(dtype) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_nsmallest_nan_when_keep_is_all(self): | ||||
|         # GH#46589 | ||||
|         s = Series([1, 2, 3, 3, 3, None]) | ||||
|         result = s.nsmallest(3, keep="all") | ||||
|         expected = Series([1.0, 2.0, 3.0, 3.0, 3.0]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         s = Series([1, 2, None, None, None]) | ||||
|         result = s.nsmallest(3, keep="all") | ||||
|         expected = Series([1, 2, None, None, None]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,24 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     Series, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def test_nunique(): | ||||
|     # basics.rst doc example | ||||
|     series = Series(np.random.default_rng(2).standard_normal(500)) | ||||
|     series[20:500] = np.nan | ||||
|     series[10:20] = 5000 | ||||
|     result = series.nunique() | ||||
|     assert result == 11 | ||||
|  | ||||
|  | ||||
| def test_nunique_categorical(): | ||||
|     # GH#18051 | ||||
|     ser = Series(Categorical([])) | ||||
|     assert ser.nunique() == 0 | ||||
|  | ||||
|     ser = Series(Categorical([np.nan])) | ||||
|     assert ser.nunique() == 0 | ||||
| @ -0,0 +1,128 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesPctChange: | ||||
|     def test_pct_change(self, datetime_series): | ||||
|         msg = ( | ||||
|             "The 'fill_method' keyword being not None and the 'limit' keyword in " | ||||
|             "Series.pct_change are deprecated" | ||||
|         ) | ||||
|  | ||||
|         rs = datetime_series.pct_change(fill_method=None) | ||||
|         tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) | ||||
|  | ||||
|         rs = datetime_series.pct_change(2) | ||||
|         filled = datetime_series.ffill() | ||||
|         tm.assert_series_equal(rs, filled / filled.shift(2) - 1) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs = datetime_series.pct_change(fill_method="bfill", limit=1) | ||||
|         filled = datetime_series.bfill(limit=1) | ||||
|         tm.assert_series_equal(rs, filled / filled.shift(1) - 1) | ||||
|  | ||||
|         rs = datetime_series.pct_change(freq="5D") | ||||
|         filled = datetime_series.ffill() | ||||
|         tm.assert_series_equal( | ||||
|             rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) | ||||
|         ) | ||||
|  | ||||
|     def test_pct_change_with_duplicate_axis(self): | ||||
|         # GH#28664 | ||||
|         common_idx = date_range("2019-11-14", periods=5, freq="D") | ||||
|         result = Series(range(5), common_idx).pct_change(freq="B") | ||||
|  | ||||
|         # the reason that the expected should be like this is documented at PR 28681 | ||||
|         expected = Series([np.nan, np.inf, np.nan, np.nan, 3.0], common_idx) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_pct_change_shift_over_nas(self): | ||||
|         s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) | ||||
|  | ||||
|         msg = "The default fill_method='pad' in Series.pct_change is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             chg = s.pct_change() | ||||
|  | ||||
|         expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) | ||||
|         tm.assert_series_equal(chg, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "freq, periods, fill_method, limit", | ||||
|         [ | ||||
|             ("5B", 5, None, None), | ||||
|             ("3B", 3, None, None), | ||||
|             ("3B", 3, "bfill", None), | ||||
|             ("7B", 7, "pad", 1), | ||||
|             ("7B", 7, "bfill", 3), | ||||
|             ("14B", 14, None, None), | ||||
|         ], | ||||
|     ) | ||||
|     def test_pct_change_periods_freq( | ||||
|         self, freq, periods, fill_method, limit, datetime_series | ||||
|     ): | ||||
|         msg = ( | ||||
|             "The 'fill_method' keyword being not None and the 'limit' keyword in " | ||||
|             "Series.pct_change are deprecated" | ||||
|         ) | ||||
|  | ||||
|         # GH#7292 | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs_freq = datetime_series.pct_change( | ||||
|                 freq=freq, fill_method=fill_method, limit=limit | ||||
|             ) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs_periods = datetime_series.pct_change( | ||||
|                 periods, fill_method=fill_method, limit=limit | ||||
|             ) | ||||
|         tm.assert_series_equal(rs_freq, rs_periods) | ||||
|  | ||||
|         empty_ts = Series(index=datetime_series.index, dtype=object) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs_freq = empty_ts.pct_change( | ||||
|                 freq=freq, fill_method=fill_method, limit=limit | ||||
|             ) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs_periods = empty_ts.pct_change( | ||||
|                 periods, fill_method=fill_method, limit=limit | ||||
|             ) | ||||
|         tm.assert_series_equal(rs_freq, rs_periods) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) | ||||
| def test_pct_change_with_duplicated_indices(fill_method): | ||||
|     # GH30463 | ||||
|     s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) | ||||
|  | ||||
|     warn = None if fill_method is None else FutureWarning | ||||
|     msg = ( | ||||
|         "The 'fill_method' keyword being not None and the 'limit' keyword in " | ||||
|         "Series.pct_change are deprecated" | ||||
|     ) | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = s.pct_change(fill_method=fill_method) | ||||
|  | ||||
|     expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pct_change_no_warning_na_beginning(): | ||||
|     # GH#54981 | ||||
|     ser = Series([None, None, 1, 2, 3]) | ||||
|     result = ser.pct_change() | ||||
|     expected = Series([np.nan, np.nan, np.nan, 1, 0.5]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_pct_change_empty(): | ||||
|     # GH 57056 | ||||
|     ser = Series([], dtype="float64") | ||||
|     expected = ser.copy() | ||||
|     result = ser.pct_change(periods=0) | ||||
|     tm.assert_series_equal(expected, result) | ||||
| @ -0,0 +1,13 @@ | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_pop(): | ||||
|     # GH#6600 | ||||
|     ser = Series([0, 4, 0], index=["A", "B", "C"], name=4) | ||||
|  | ||||
|     result = ser.pop("B") | ||||
|     assert result == 4 | ||||
|  | ||||
|     expected = Series([0, 0], index=["A", "C"], name=4) | ||||
|     tm.assert_series_equal(ser, expected) | ||||
| @ -0,0 +1,247 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_integer | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.indexes.datetimes import Timestamp | ||||
|  | ||||
|  | ||||
| class TestSeriesQuantile: | ||||
|     def test_quantile(self, datetime_series): | ||||
|         q = datetime_series.quantile(0.1) | ||||
|         assert q == np.percentile(datetime_series.dropna(), 10) | ||||
|  | ||||
|         q = datetime_series.quantile(0.9) | ||||
|         assert q == np.percentile(datetime_series.dropna(), 90) | ||||
|  | ||||
|         # object dtype | ||||
|         q = Series(datetime_series, dtype=object).quantile(0.9) | ||||
|         assert q == np.percentile(datetime_series.dropna(), 90) | ||||
|  | ||||
|         # datetime64[ns] dtype | ||||
|         dts = datetime_series.index.to_series() | ||||
|         q = dts.quantile(0.2) | ||||
|         assert q == Timestamp("2000-01-10 19:12:00") | ||||
|  | ||||
|         # timedelta64[ns] dtype | ||||
|         tds = dts.diff() | ||||
|         q = tds.quantile(0.25) | ||||
|         assert q == pd.to_timedelta("24:00:00") | ||||
|  | ||||
|         # GH7661 | ||||
|         result = Series([np.timedelta64("NaT")]).sum() | ||||
|         assert result == pd.Timedelta(0) | ||||
|  | ||||
|         msg = "percentiles should all be in the interval \\[0, 1\\]" | ||||
|         for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 datetime_series.quantile(invalid) | ||||
|  | ||||
|         s = Series(np.random.default_rng(2).standard_normal(100)) | ||||
|         percentile_array = [-0.5, 0.25, 1.5] | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.quantile(percentile_array) | ||||
|  | ||||
|     def test_quantile_multi(self, datetime_series, unit): | ||||
|         datetime_series.index = datetime_series.index.as_unit(unit) | ||||
|         qs = [0.1, 0.9] | ||||
|         result = datetime_series.quantile(qs) | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 np.percentile(datetime_series.dropna(), 10), | ||||
|                 np.percentile(datetime_series.dropna(), 90), | ||||
|             ], | ||||
|             index=qs, | ||||
|             name=datetime_series.name, | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         dts = datetime_series.index.to_series() | ||||
|         dts.name = "xxx" | ||||
|         result = dts.quantile((0.2, 0.2)) | ||||
|         expected = Series( | ||||
|             [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")], | ||||
|             index=[0.2, 0.2], | ||||
|             name="xxx", | ||||
|             dtype=f"M8[{unit}]", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = datetime_series.quantile([]) | ||||
|         expected = Series( | ||||
|             [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_quantile_interpolation(self, datetime_series): | ||||
|         # see gh-10174 | ||||
|  | ||||
|         # interpolation = linear (default case) | ||||
|         q = datetime_series.quantile(0.1, interpolation="linear") | ||||
|         assert q == np.percentile(datetime_series.dropna(), 10) | ||||
|         q1 = datetime_series.quantile(0.1) | ||||
|         assert q1 == np.percentile(datetime_series.dropna(), 10) | ||||
|  | ||||
|         # test with and without interpolation keyword | ||||
|         assert q == q1 | ||||
|  | ||||
|     def test_quantile_interpolation_dtype(self): | ||||
|         # GH #10174 | ||||
|  | ||||
|         # interpolation = linear (default case) | ||||
|         q = Series([1, 3, 4]).quantile(0.5, interpolation="lower") | ||||
|         assert q == np.percentile(np.array([1, 3, 4]), 50) | ||||
|         assert is_integer(q) | ||||
|  | ||||
|         q = Series([1, 3, 4]).quantile(0.5, interpolation="higher") | ||||
|         assert q == np.percentile(np.array([1, 3, 4]), 50) | ||||
|         assert is_integer(q) | ||||
|  | ||||
|     def test_quantile_nan(self): | ||||
|         # GH 13098 | ||||
|         ser = Series([1, 2, 3, 4, np.nan]) | ||||
|         result = ser.quantile(0.5) | ||||
|         expected = 2.5 | ||||
|         assert result == expected | ||||
|  | ||||
|         # all nan/empty | ||||
|         s1 = Series([], dtype=object) | ||||
|         cases = [s1, Series([np.nan, np.nan])] | ||||
|  | ||||
|         for ser in cases: | ||||
|             res = ser.quantile(0.5) | ||||
|             assert np.isnan(res) | ||||
|  | ||||
|             res = ser.quantile([0.5]) | ||||
|             tm.assert_series_equal(res, Series([np.nan], index=[0.5])) | ||||
|  | ||||
|             res = ser.quantile([0.2, 0.3]) | ||||
|             tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "case", | ||||
|         [ | ||||
|             [ | ||||
|                 Timestamp("2011-01-01"), | ||||
|                 Timestamp("2011-01-02"), | ||||
|                 Timestamp("2011-01-03"), | ||||
|             ], | ||||
|             [ | ||||
|                 Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2011-01-02", tz="US/Eastern"), | ||||
|                 Timestamp("2011-01-03", tz="US/Eastern"), | ||||
|             ], | ||||
|             [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], | ||||
|             # NaT | ||||
|             [ | ||||
|                 Timestamp("2011-01-01"), | ||||
|                 Timestamp("2011-01-02"), | ||||
|                 Timestamp("2011-01-03"), | ||||
|                 pd.NaT, | ||||
|             ], | ||||
|             [ | ||||
|                 Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|                 Timestamp("2011-01-02", tz="US/Eastern"), | ||||
|                 Timestamp("2011-01-03", tz="US/Eastern"), | ||||
|                 pd.NaT, | ||||
|             ], | ||||
|             [ | ||||
|                 pd.Timedelta("1 days"), | ||||
|                 pd.Timedelta("2 days"), | ||||
|                 pd.Timedelta("3 days"), | ||||
|                 pd.NaT, | ||||
|             ], | ||||
|         ], | ||||
|     ) | ||||
|     def test_quantile_box(self, case): | ||||
|         ser = Series(case, name="XXX") | ||||
|         res = ser.quantile(0.5) | ||||
|         assert res == case[1] | ||||
|  | ||||
|         res = ser.quantile([0.5]) | ||||
|         exp = Series([case[1]], index=[0.5], name="XXX") | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_datetime_timedelta_quantiles(self): | ||||
|         # covers #9694 | ||||
|         assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5)) | ||||
|         assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5)) | ||||
|  | ||||
|     def test_quantile_nat(self): | ||||
|         res = Series([pd.NaT, pd.NaT]).quantile(0.5) | ||||
|         assert res is pd.NaT | ||||
|  | ||||
|         res = Series([pd.NaT, pd.NaT]).quantile([0.5]) | ||||
|         tm.assert_series_equal(res, Series([pd.NaT], index=[0.5])) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "values, dtype", | ||||
|         [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")], | ||||
|     ) | ||||
|     def test_quantile_sparse(self, values, dtype): | ||||
|         ser = Series(values, dtype=dtype) | ||||
|         result = ser.quantile([0.5]) | ||||
|         expected = Series(np.asarray(ser)).quantile([0.5]).astype("Sparse[float]") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_quantile_empty_float64(self): | ||||
|         # floats | ||||
|         ser = Series([], dtype="float64") | ||||
|  | ||||
|         res = ser.quantile(0.5) | ||||
|         assert np.isnan(res) | ||||
|  | ||||
|         res = ser.quantile([0.5]) | ||||
|         exp = Series([np.nan], index=[0.5]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_quantile_empty_int64(self): | ||||
|         # int | ||||
|         ser = Series([], dtype="int64") | ||||
|  | ||||
|         res = ser.quantile(0.5) | ||||
|         assert np.isnan(res) | ||||
|  | ||||
|         res = ser.quantile([0.5]) | ||||
|         exp = Series([np.nan], index=[0.5]) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_quantile_empty_dt64(self): | ||||
|         # datetime | ||||
|         ser = Series([], dtype="datetime64[ns]") | ||||
|  | ||||
|         res = ser.quantile(0.5) | ||||
|         assert res is pd.NaT | ||||
|  | ||||
|         res = ser.quantile([0.5]) | ||||
|         exp = Series([pd.NaT], index=[0.5], dtype=ser.dtype) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", [int, float, "Int64"]) | ||||
|     def test_quantile_dtypes(self, dtype): | ||||
|         result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25)) | ||||
|         expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25)) | ||||
|         if dtype == "Int64": | ||||
|             expected = expected.astype("Float64") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_quantile_all_na(self, any_int_ea_dtype): | ||||
|         # GH#50681 | ||||
|         ser = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = ser.quantile([0.1, 0.5]) | ||||
|         expected = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype, index=[0.1, 0.5]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_quantile_dtype_size(self, any_int_ea_dtype): | ||||
|         # GH#50681 | ||||
|         ser = Series([pd.NA, pd.NA, 1], dtype=any_int_ea_dtype) | ||||
|         result = ser.quantile([0.1, 0.5]) | ||||
|         expected = Series([1, 1], dtype=any_int_ea_dtype, index=[0.1, 0.5]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,563 @@ | ||||
| from itertools import chain | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas._libs.algos import ( | ||||
|     Infinity, | ||||
|     NegInfinity, | ||||
| ) | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import CategoricalDtype | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def ser(): | ||||
|     return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         ["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])], | ||||
|         ["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])], | ||||
|         ["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])], | ||||
|         ["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])], | ||||
|         ["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])], | ||||
|     ], | ||||
|     ids=lambda x: x[0], | ||||
| ) | ||||
| def results(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         "object", | ||||
|         "float64", | ||||
|         "int64", | ||||
|         "Float64", | ||||
|         "Int64", | ||||
|         pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")), | ||||
|         pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")), | ||||
|         pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")), | ||||
|         "string[python]", | ||||
|         "str", | ||||
|     ] | ||||
| ) | ||||
| def dtype(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def expected_dtype(dtype, method, pct=False): | ||||
|     exp_dtype = "float64" | ||||
|     # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]: | ||||
|     if dtype in ["string[pyarrow]"]: | ||||
|         exp_dtype = "Float64" | ||||
|     elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]: | ||||
|         if method == "average" or pct: | ||||
|             exp_dtype = "double[pyarrow]" | ||||
|         else: | ||||
|             exp_dtype = "uint64[pyarrow]" | ||||
|  | ||||
|     return exp_dtype | ||||
|  | ||||
|  | ||||
| class TestSeriesRank: | ||||
|     def test_rank(self, datetime_series): | ||||
|         sp_stats = pytest.importorskip("scipy.stats") | ||||
|  | ||||
|         datetime_series[::2] = np.nan | ||||
|         datetime_series[:10:3] = 4.0 | ||||
|  | ||||
|         ranks = datetime_series.rank() | ||||
|         oranks = datetime_series.astype("O").rank() | ||||
|  | ||||
|         tm.assert_series_equal(ranks, oranks) | ||||
|  | ||||
|         mask = np.isnan(datetime_series) | ||||
|         filled = datetime_series.fillna(np.inf) | ||||
|  | ||||
|         # rankdata returns a ndarray | ||||
|         exp = Series(sp_stats.rankdata(filled), index=filled.index, name="ts") | ||||
|         exp[mask] = np.nan | ||||
|  | ||||
|         tm.assert_series_equal(ranks, exp) | ||||
|  | ||||
|         iseries = Series(np.arange(5).repeat(2)) | ||||
|  | ||||
|         iranks = iseries.rank() | ||||
|         exp = iseries.astype(float).rank() | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|         iseries = Series(np.arange(5)) + 1.0 | ||||
|         exp = iseries / 5.0 | ||||
|         iranks = iseries.rank(pct=True) | ||||
|  | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         iseries = Series(np.repeat(1, 100)) | ||||
|         exp = Series(np.repeat(0.505, 100)) | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         # Explicit cast to float to avoid implicit cast when setting nan | ||||
|         iseries = iseries.astype("float") | ||||
|         iseries[1] = np.nan | ||||
|         exp = Series(np.repeat(50.0 / 99.0, 100)) | ||||
|         exp[1] = np.nan | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         iseries = Series(np.arange(5)) + 1.0 | ||||
|         iseries[4] = np.nan | ||||
|         exp = iseries / 4.0 | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         iseries = Series(np.repeat(np.nan, 100)) | ||||
|         exp = iseries.copy() | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         # Explicit cast to float to avoid implicit cast when setting nan | ||||
|         iseries = Series(np.arange(5), dtype="float") + 1 | ||||
|         iseries[4] = np.nan | ||||
|         exp = iseries / 4.0 | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         rng = date_range("1/1/1990", periods=5) | ||||
|         # Explicit cast to float to avoid implicit cast when setting nan | ||||
|         iseries = Series(np.arange(5), rng, dtype="float") + 1 | ||||
|         iseries.iloc[4] = np.nan | ||||
|         exp = iseries / 4.0 | ||||
|         iranks = iseries.rank(pct=True) | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) | ||||
|         exp = Series([2, 1, 3, 5, 4, 6.0]) | ||||
|         iranks = iseries.rank() | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         # GH 5968 | ||||
|         iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]") | ||||
|         exp = Series([3, 2, 1, np.nan]) | ||||
|         iranks = iseries.rank() | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|         values = np.array( | ||||
|             [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40], | ||||
|             dtype="float64", | ||||
|         ) | ||||
|         random_order = np.random.default_rng(2).permutation(len(values)) | ||||
|         iseries = Series(values[random_order]) | ||||
|         exp = Series(random_order + 1.0, dtype="float64") | ||||
|         iranks = iseries.rank() | ||||
|         tm.assert_series_equal(iranks, exp) | ||||
|  | ||||
|     def test_rank_categorical(self): | ||||
|         # GH issue #15420 rank incorrectly orders ordered categories | ||||
|  | ||||
|         # Test ascending/descending ranking for ordered categoricals | ||||
|         exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) | ||||
|         exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) | ||||
|         ordered = Series( | ||||
|             ["first", "second", "third", "fourth", "fifth", "sixth"] | ||||
|         ).astype( | ||||
|             CategoricalDtype( | ||||
|                 categories=["first", "second", "third", "fourth", "fifth", "sixth"], | ||||
|                 ordered=True, | ||||
|             ) | ||||
|         ) | ||||
|         tm.assert_series_equal(ordered.rank(), exp) | ||||
|         tm.assert_series_equal(ordered.rank(ascending=False), exp_desc) | ||||
|  | ||||
|         # Unordered categoricals should be ranked as objects | ||||
|         unordered = Series( | ||||
|             ["first", "second", "third", "fourth", "fifth", "sixth"] | ||||
|         ).astype( | ||||
|             CategoricalDtype( | ||||
|                 categories=["first", "second", "third", "fourth", "fifth", "sixth"], | ||||
|                 ordered=False, | ||||
|             ) | ||||
|         ) | ||||
|         exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0]) | ||||
|         res = unordered.rank() | ||||
|         tm.assert_series_equal(res, exp_unordered) | ||||
|  | ||||
|         unordered1 = Series([1, 2, 3, 4, 5, 6]).astype( | ||||
|             CategoricalDtype([1, 2, 3, 4, 5, 6], False) | ||||
|         ) | ||||
|         exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) | ||||
|         res1 = unordered1.rank() | ||||
|         tm.assert_series_equal(res1, exp_unordered1) | ||||
|  | ||||
|         # Test na_option for rank data | ||||
|         na_ser = Series( | ||||
|             ["first", "second", "third", "fourth", "fifth", "sixth", np.nan] | ||||
|         ).astype( | ||||
|             CategoricalDtype( | ||||
|                 ["first", "second", "third", "fourth", "fifth", "sixth", "seventh"], | ||||
|                 True, | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|         exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0]) | ||||
|         exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) | ||||
|         exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.nan]) | ||||
|  | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top) | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot) | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep) | ||||
|  | ||||
|         # Test na_option for rank data with ascending False | ||||
|         exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) | ||||
|         exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0]) | ||||
|         exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.nan]) | ||||
|  | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top) | ||||
|         tm.assert_series_equal( | ||||
|             na_ser.rank(na_option="bottom", ascending=False), exp_bot | ||||
|         ) | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep) | ||||
|  | ||||
|         # Test invalid values for na_option | ||||
|         msg = "na_option must be one of 'keep', 'top', or 'bottom'" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             na_ser.rank(na_option="bad", ascending=False) | ||||
|  | ||||
|         # invalid type | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             na_ser.rank(na_option=True, ascending=False) | ||||
|  | ||||
|         # Test with pct=True | ||||
|         na_ser = Series(["first", "second", "third", "fourth", np.nan]).astype( | ||||
|             CategoricalDtype(["first", "second", "third", "fourth"], True) | ||||
|         ) | ||||
|         exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2]) | ||||
|         exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0]) | ||||
|         exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.nan]) | ||||
|  | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top) | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot) | ||||
|         tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep) | ||||
|  | ||||
|     def test_rank_signature(self): | ||||
|         s = Series([0, 1]) | ||||
|         s.rank(method="average") | ||||
|         msg = "No axis named average for object type Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             s.rank("average") | ||||
|  | ||||
|     def test_rank_tie_methods(self, ser, results, dtype, using_infer_string): | ||||
|         method, exp = results | ||||
|         if ( | ||||
|             dtype == "int64" | ||||
|             or dtype == "Int64" | ||||
|             or (not using_infer_string and dtype == "str") | ||||
|         ): | ||||
|             pytest.skip("int64/str does not support NaN") | ||||
|  | ||||
|         ser = ser if dtype is None else ser.astype(dtype) | ||||
|         result = ser.rank(method=method) | ||||
|         tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method))) | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [True, False]) | ||||
|     @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) | ||||
|     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, na_value, pos_inf, neg_inf", | ||||
|         [ | ||||
|             ("object", None, Infinity(), NegInfinity()), | ||||
|             ("float64", np.nan, np.inf, -np.inf), | ||||
|             ("Float64", NA, np.inf, -np.inf), | ||||
|             pytest.param( | ||||
|                 "float64[pyarrow]", | ||||
|                 NA, | ||||
|                 np.inf, | ||||
|                 -np.inf, | ||||
|                 marks=td.skip_if_no("pyarrow"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_rank_tie_methods_on_infs_nans( | ||||
|         self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf | ||||
|     ): | ||||
|         pytest.importorskip("scipy") | ||||
|         if dtype == "float64[pyarrow]": | ||||
|             if method == "average": | ||||
|                 exp_dtype = "float64[pyarrow]" | ||||
|             else: | ||||
|                 exp_dtype = "uint64[pyarrow]" | ||||
|         else: | ||||
|             exp_dtype = "float64" | ||||
|  | ||||
|         chunk = 3 | ||||
|         in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk | ||||
|         iseries = Series(in_arr, dtype=dtype) | ||||
|         exp_ranks = { | ||||
|             "average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]), | ||||
|             "min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]), | ||||
|             "max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]), | ||||
|             "first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]), | ||||
|             "dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]), | ||||
|         } | ||||
|         ranks = exp_ranks[method] | ||||
|         if na_option == "top": | ||||
|             order = [ranks[1], ranks[0], ranks[2]] | ||||
|         elif na_option == "bottom": | ||||
|             order = [ranks[0], ranks[2], ranks[1]] | ||||
|         else: | ||||
|             order = [ranks[0], [np.nan] * chunk, ranks[1]] | ||||
|         expected = order if ascending else order[::-1] | ||||
|         expected = list(chain.from_iterable(expected)) | ||||
|         result = iseries.rank(method=method, na_option=na_option, ascending=ascending) | ||||
|         tm.assert_series_equal(result, Series(expected, dtype=exp_dtype)) | ||||
|  | ||||
|     def test_rank_desc_mix_nans_infs(self): | ||||
|         # GH 19538 | ||||
|         # check descending ranking when mix nans and infs | ||||
|         iseries = Series([1, np.nan, np.inf, -np.inf, 25]) | ||||
|         result = iseries.rank(ascending=False) | ||||
|         exp = Series([3, np.nan, 1, 4, 2], dtype="float64") | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "op, value", | ||||
|         [ | ||||
|             [operator.add, 0], | ||||
|             [operator.add, 1e6], | ||||
|             [operator.mul, 1e-6], | ||||
|         ], | ||||
|     ) | ||||
|     def test_rank_methods_series(self, method, op, value): | ||||
|         sp_stats = pytest.importorskip("scipy.stats") | ||||
|  | ||||
|         xs = np.random.default_rng(2).standard_normal(9) | ||||
|         xs = np.concatenate([xs[i:] for i in range(0, 9, 2)])  # add duplicates | ||||
|         np.random.default_rng(2).shuffle(xs) | ||||
|  | ||||
|         index = [chr(ord("a") + i) for i in range(len(xs))] | ||||
|         vals = op(xs, value) | ||||
|         ts = Series(vals, index=index) | ||||
|         result = ts.rank(method=method) | ||||
|         sprank = sp_stats.rankdata(vals, method if method != "first" else "ordinal") | ||||
|         expected = Series(sprank, index=index).astype("float64") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser, exp", | ||||
|         [ | ||||
|             ([1], [1]), | ||||
|             ([2], [1]), | ||||
|             ([0], [1]), | ||||
|             ([2, 2], [1, 1]), | ||||
|             ([1, 2, 3], [1, 2, 3]), | ||||
|             ([4, 2, 1], [3, 2, 1]), | ||||
|             ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), | ||||
|             ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_rank_dense_method(self, dtype, ser, exp): | ||||
|         if ser[0] < 0 and dtype.startswith("str"): | ||||
|             exp = exp[::-1] | ||||
|         s = Series(ser).astype(dtype) | ||||
|         result = s.rank(method="dense") | ||||
|         expected = Series(exp).astype(expected_dtype(dtype, "dense")) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rank_descending(self, ser, results, dtype, using_infer_string): | ||||
|         method, _ = results | ||||
|         if dtype == "int64" or (not using_infer_string and dtype == "str"): | ||||
|             s = ser.dropna() | ||||
|         else: | ||||
|             s = ser.astype(dtype) | ||||
|  | ||||
|         res = s.rank(ascending=False) | ||||
|         if dtype.startswith("str"): | ||||
|             expected = (s.astype("float64").max() - s.astype("float64")).rank() | ||||
|         else: | ||||
|             expected = (s.max() - s).rank() | ||||
|         tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average"))) | ||||
|  | ||||
|         if dtype.startswith("str"): | ||||
|             expected = (s.astype("float64").max() - s.astype("float64")).rank( | ||||
|                 method=method | ||||
|             ) | ||||
|         else: | ||||
|             expected = (s.max() - s).rank(method=method) | ||||
|         res2 = s.rank(method=method, ascending=False) | ||||
|         tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method))) | ||||
|  | ||||
|     def test_rank_int(self, ser, results): | ||||
|         method, exp = results | ||||
|         s = ser.dropna().astype("i8") | ||||
|  | ||||
|         result = s.rank(method=method) | ||||
|         expected = Series(exp).dropna() | ||||
|         expected.index = result.index | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rank_object_bug(self): | ||||
|         # GH 13445 | ||||
|  | ||||
|         # smoke tests | ||||
|         Series([np.nan] * 32).astype(object).rank(ascending=True) | ||||
|         Series([np.nan] * 32).astype(object).rank(ascending=False) | ||||
|  | ||||
|     def test_rank_modify_inplace(self): | ||||
|         # GH 18521 | ||||
|         # Check rank does not mutate series | ||||
|         s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT]) | ||||
|         expected = s.copy() | ||||
|  | ||||
|         s.rank() | ||||
|         result = s | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rank_ea_small_values(self): | ||||
|         # GH#52471 | ||||
|         ser = Series( | ||||
|             [5.4954145e29, -9.791984e-21, 9.3715776e-26, NA, 1.8790257e-28], | ||||
|             dtype="Float64", | ||||
|         ) | ||||
|         result = ser.rank(method="min") | ||||
|         expected = Series([4, 1, 3, np.nan, 2]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| # GH15630, pct should be on 100% basis when method='dense' | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser, exp", | ||||
|     [ | ||||
|         ([1], [1.0]), | ||||
|         ([1, 2], [1.0 / 2, 2.0 / 2]), | ||||
|         ([2, 2], [1.0, 1.0]), | ||||
|         ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]), | ||||
|         ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), | ||||
|         ([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]), | ||||
|         ([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]), | ||||
|         ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), | ||||
|     ], | ||||
| ) | ||||
| def test_rank_dense_pct(dtype, ser, exp): | ||||
|     if ser[0] < 0 and dtype.startswith("str"): | ||||
|         exp = exp[::-1] | ||||
|     s = Series(ser).astype(dtype) | ||||
|     result = s.rank(method="dense", pct=True) | ||||
|     expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser, exp", | ||||
|     [ | ||||
|         ([1], [1.0]), | ||||
|         ([1, 2], [1.0 / 2, 2.0 / 2]), | ||||
|         ([2, 2], [1.0 / 2, 1.0 / 2]), | ||||
|         ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]), | ||||
|         ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), | ||||
|         ([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]), | ||||
|         ([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]), | ||||
|         ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), | ||||
|     ], | ||||
| ) | ||||
| def test_rank_min_pct(dtype, ser, exp): | ||||
|     if ser[0] < 0 and dtype.startswith("str"): | ||||
|         exp = exp[::-1] | ||||
|     s = Series(ser).astype(dtype) | ||||
|     result = s.rank(method="min", pct=True) | ||||
|     expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser, exp", | ||||
|     [ | ||||
|         ([1], [1.0]), | ||||
|         ([1, 2], [1.0 / 2, 2.0 / 2]), | ||||
|         ([2, 2], [1.0, 1.0]), | ||||
|         ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]), | ||||
|         ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), | ||||
|         ([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]), | ||||
|         ([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]), | ||||
|         ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), | ||||
|     ], | ||||
| ) | ||||
| def test_rank_max_pct(dtype, ser, exp): | ||||
|     if ser[0] < 0 and dtype.startswith("str"): | ||||
|         exp = exp[::-1] | ||||
|     s = Series(ser).astype(dtype) | ||||
|     result = s.rank(method="max", pct=True) | ||||
|     expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser, exp", | ||||
|     [ | ||||
|         ([1], [1.0]), | ||||
|         ([1, 2], [1.0 / 2, 2.0 / 2]), | ||||
|         ([2, 2], [1.5 / 2, 1.5 / 2]), | ||||
|         ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]), | ||||
|         ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), | ||||
|         ([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]), | ||||
|         ([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]), | ||||
|         ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), | ||||
|     ], | ||||
| ) | ||||
| def test_rank_average_pct(dtype, ser, exp): | ||||
|     if ser[0] < 0 and dtype.startswith("str"): | ||||
|         exp = exp[::-1] | ||||
|     s = Series(ser).astype(dtype) | ||||
|     result = s.rank(method="average", pct=True) | ||||
|     expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser, exp", | ||||
|     [ | ||||
|         ([1], [1.0]), | ||||
|         ([1, 2], [1.0 / 2, 2.0 / 2]), | ||||
|         ([2, 2], [1.0 / 2, 2.0 / 2.0]), | ||||
|         ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]), | ||||
|         ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), | ||||
|         ([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]), | ||||
|         ([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]), | ||||
|         ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), | ||||
|     ], | ||||
| ) | ||||
| def test_rank_first_pct(dtype, ser, exp): | ||||
|     if ser[0] < 0 and dtype.startswith("str"): | ||||
|         exp = exp[::-1] | ||||
|     s = Series(ser).astype(dtype) | ||||
|     result = s.rank(method="first", pct=True) | ||||
|     expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.single_cpu | ||||
| def test_pct_max_many_rows(): | ||||
|     # GH 18271 | ||||
|     s = Series(np.arange(2**24 + 1)) | ||||
|     result = s.rank(pct=True).max() | ||||
|     assert result == 1 | ||||
| @ -0,0 +1,443 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Categorical, | ||||
|     Float64Dtype, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     NaT, | ||||
|     Period, | ||||
|     PeriodIndex, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
|     isna, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_reindex(datetime_series, string_series): | ||||
|     identity = string_series.reindex(string_series.index) | ||||
|  | ||||
|     assert tm.shares_memory(string_series.index, identity.index) | ||||
|  | ||||
|     assert identity.index.is_(string_series.index) | ||||
|     assert identity.index.identical(string_series.index) | ||||
|  | ||||
|     subIndex = string_series.index[10:20] | ||||
|     subSeries = string_series.reindex(subIndex) | ||||
|  | ||||
|     for idx, val in subSeries.items(): | ||||
|         assert val == string_series[idx] | ||||
|  | ||||
|     subIndex2 = datetime_series.index[10:20] | ||||
|     subTS = datetime_series.reindex(subIndex2) | ||||
|  | ||||
|     for idx, val in subTS.items(): | ||||
|         assert val == datetime_series[idx] | ||||
|     stuffSeries = datetime_series.reindex(subIndex) | ||||
|  | ||||
|     assert np.isnan(stuffSeries).all() | ||||
|  | ||||
|     # This is extremely important for the Cython code to not screw up | ||||
|     nonContigIndex = datetime_series.index[::2] | ||||
|     subNonContig = datetime_series.reindex(nonContigIndex) | ||||
|     for idx, val in subNonContig.items(): | ||||
|         assert val == datetime_series[idx] | ||||
|  | ||||
|     # return a copy the same index here | ||||
|     result = datetime_series.reindex() | ||||
|     assert result is not datetime_series | ||||
|  | ||||
|  | ||||
| def test_reindex_nan(): | ||||
|     ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8]) | ||||
|  | ||||
|     i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2] | ||||
|     tm.assert_series_equal(ts.reindex(i), ts.iloc[j]) | ||||
|  | ||||
|     ts.index = ts.index.astype("object") | ||||
|  | ||||
|     # reindex coerces index.dtype to float, loc/iloc doesn't | ||||
|     tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) | ||||
|  | ||||
|  | ||||
| def test_reindex_series_add_nat(): | ||||
|     rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") | ||||
|     series = Series(rng) | ||||
|  | ||||
|     result = series.reindex(range(15)) | ||||
|     assert np.issubdtype(result.dtype, np.dtype("M8[ns]")) | ||||
|  | ||||
|     mask = result.isna() | ||||
|     assert mask[-5:].all() | ||||
|     assert not mask[:-5].any() | ||||
|  | ||||
|  | ||||
| def test_reindex_with_datetimes(): | ||||
|     rng = date_range("1/1/2000", periods=20) | ||||
|     ts = Series(np.random.default_rng(2).standard_normal(20), index=rng) | ||||
|  | ||||
|     result = ts.reindex(list(ts.index[5:10])) | ||||
|     expected = ts[5:10] | ||||
|     expected.index = expected.index._with_freq(None) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = ts[list(ts.index[5:10])] | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_corner(datetime_series): | ||||
|     # (don't forget to fix this) I think it's fixed | ||||
|     empty = Series(index=[]) | ||||
|     empty.reindex(datetime_series.index, method="pad")  # it works | ||||
|  | ||||
|     # corner case: pad empty series | ||||
|     reindexed = empty.reindex(datetime_series.index, method="pad") | ||||
|  | ||||
|     # pass non-Index | ||||
|     reindexed = datetime_series.reindex(list(datetime_series.index)) | ||||
|     datetime_series.index = datetime_series.index._with_freq(None) | ||||
|     tm.assert_series_equal(datetime_series, reindexed) | ||||
|  | ||||
|     # bad fill method | ||||
|     ts = datetime_series[::2] | ||||
|     msg = ( | ||||
|         r"Invalid fill method\. Expecting pad \(ffill\), backfill " | ||||
|         r"\(bfill\) or nearest\. Got foo" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ts.reindex(datetime_series.index, method="foo") | ||||
|  | ||||
|  | ||||
| def test_reindex_pad(): | ||||
|     s = Series(np.arange(10), dtype="int64") | ||||
|     s2 = s[::2] | ||||
|  | ||||
|     reindexed = s2.reindex(s.index, method="pad") | ||||
|     reindexed2 = s2.reindex(s.index, method="ffill") | ||||
|     tm.assert_series_equal(reindexed, reindexed2) | ||||
|  | ||||
|     expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8]) | ||||
|     tm.assert_series_equal(reindexed, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_pad2(): | ||||
|     # GH4604 | ||||
|     s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) | ||||
|     new_index = ["a", "g", "c", "f"] | ||||
|     expected = Series([1, 1, 3, 3], index=new_index) | ||||
|  | ||||
|     # this changes dtype because the ffill happens after | ||||
|     result = s.reindex(new_index).ffill() | ||||
|     tm.assert_series_equal(result, expected.astype("float64")) | ||||
|  | ||||
|     msg = "The 'downcast' keyword in ffill is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = s.reindex(new_index).ffill(downcast="infer") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     expected = Series([1, 5, 3, 5], index=new_index) | ||||
|     result = s.reindex(new_index, method="ffill") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_inference(): | ||||
|     # inference of new dtype | ||||
|     s = Series([True, False, False, True], index=list("abcd")) | ||||
|     new_index = "agc" | ||||
|     msg = "Downcasting object dtype arrays on" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = s.reindex(list(new_index)).ffill() | ||||
|     expected = Series([True, True, False], index=list(new_index)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_downcasting(): | ||||
|     # GH4618 shifted series downcasting | ||||
|     s = Series(False, index=range(5)) | ||||
|     msg = "Downcasting object dtype arrays on" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = s.shift(1).bfill() | ||||
|     expected = Series(False, index=range(5)) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_nearest(): | ||||
|     s = Series(np.arange(10, dtype="int64")) | ||||
|     target = [0.1, 0.9, 1.5, 2.0] | ||||
|     result = s.reindex(target, method="nearest") | ||||
|     expected = Series(np.around(target).astype("int64"), target) | ||||
|     tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     result = s.reindex(target, method="nearest", tolerance=0.2) | ||||
|     expected = Series([0, 1, np.nan, 2], target) | ||||
|     tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3]) | ||||
|     expected = Series([0, np.nan, np.nan, 2], target) | ||||
|     tm.assert_series_equal(expected, result) | ||||
|  | ||||
|  | ||||
| def test_reindex_int(datetime_series): | ||||
|     ts = datetime_series[::2] | ||||
|     int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index) | ||||
|  | ||||
|     # this should work fine | ||||
|     reindexed_int = int_ts.reindex(datetime_series.index) | ||||
|  | ||||
|     # if NaNs introduced | ||||
|     assert reindexed_int.dtype == np.float64 | ||||
|  | ||||
|     # NO NaNs introduced | ||||
|     reindexed_int = int_ts.reindex(int_ts.index[::2]) | ||||
|     assert reindexed_int.dtype == np.dtype(int) | ||||
|  | ||||
|  | ||||
| def test_reindex_bool(datetime_series): | ||||
|     # A series other than float, int, string, or object | ||||
|     ts = datetime_series[::2] | ||||
|     bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) | ||||
|  | ||||
|     # this should work fine | ||||
|     reindexed_bool = bool_ts.reindex(datetime_series.index) | ||||
|  | ||||
|     # if NaNs introduced | ||||
|     assert reindexed_bool.dtype == np.object_ | ||||
|  | ||||
|     # NO NaNs introduced | ||||
|     reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) | ||||
|     assert reindexed_bool.dtype == np.bool_ | ||||
|  | ||||
|  | ||||
| def test_reindex_bool_pad(datetime_series): | ||||
|     # fail | ||||
|     ts = datetime_series[5:] | ||||
|     bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) | ||||
|     filled_bool = bool_ts.reindex(datetime_series.index, method="pad") | ||||
|     assert isna(filled_bool[:5]).all() | ||||
|  | ||||
|  | ||||
| def test_reindex_categorical(): | ||||
|     index = date_range("20000101", periods=3) | ||||
|  | ||||
|     # reindexing to an invalid Categorical | ||||
|     s = Series(["a", "b", "c"], dtype="category") | ||||
|     result = s.reindex(index) | ||||
|     expected = Series( | ||||
|         Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) | ||||
|     ) | ||||
|     expected.index = index | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # partial reindexing | ||||
|     expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"])) | ||||
|     expected.index = [1, 2] | ||||
|     result = s.reindex([1, 2]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"])) | ||||
|     expected.index = [2, 3] | ||||
|     result = s.reindex([2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_astype_order_consistency(): | ||||
|     # GH#17444 | ||||
|     ser = Series([1, 2, 3], index=[2, 0, 1]) | ||||
|     new_index = [0, 1, 2] | ||||
|     temp_dtype = "category" | ||||
|     new_dtype = str | ||||
|     result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype) | ||||
|     expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_fill_value(): | ||||
|     # ----------------------------------------------------------- | ||||
|     # floats | ||||
|     floats = Series([1.0, 2.0, 3.0]) | ||||
|     result = floats.reindex([1, 2, 3]) | ||||
|     expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = floats.reindex([1, 2, 3], fill_value=0) | ||||
|     expected = Series([2.0, 3.0, 0], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # ----------------------------------------------------------- | ||||
|     # ints | ||||
|     ints = Series([1, 2, 3]) | ||||
|  | ||||
|     result = ints.reindex([1, 2, 3]) | ||||
|     expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # don't upcast | ||||
|     result = ints.reindex([1, 2, 3], fill_value=0) | ||||
|     expected = Series([2, 3, 0], index=[1, 2, 3]) | ||||
|     assert issubclass(result.dtype.type, np.integer) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # ----------------------------------------------------------- | ||||
|     # objects | ||||
|     objects = Series([1, 2, 3], dtype=object) | ||||
|  | ||||
|     result = objects.reindex([1, 2, 3]) | ||||
|     expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = objects.reindex([1, 2, 3], fill_value="foo") | ||||
|     expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # ------------------------------------------------------------ | ||||
|     # bools | ||||
|     bools = Series([True, False, True]) | ||||
|  | ||||
|     result = bools.reindex([1, 2, 3]) | ||||
|     expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = bools.reindex([1, 2, 3], fill_value=False) | ||||
|     expected = Series([False, True, False], index=[1, 2, 3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_array_manager_not_yet_implemented | ||||
| @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) | ||||
| @pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) | ||||
| def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): | ||||
|     # https://github.com/pandas-dev/pandas/issues/42921 | ||||
|     if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): | ||||
|         # use the scalar that is not compatible with the dtype for this test | ||||
|         fill_value = Timestamp(0) | ||||
|  | ||||
|     ser = Series([NaT], dtype=dtype) | ||||
|  | ||||
|     result = ser.reindex([0, 1], fill_value=fill_value) | ||||
|     expected = Series([NaT, fill_value], index=[0, 1], dtype=object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_datetimeindexes_tz_naive_and_aware(): | ||||
|     # GH 8306 | ||||
|     idx = date_range("20131101", tz="America/Chicago", periods=7) | ||||
|     newidx = date_range("20131103", periods=10, freq="h") | ||||
|     s = Series(range(7), index=idx) | ||||
|     msg = ( | ||||
|         r"Cannot compare dtypes datetime64\[ns, America/Chicago\] " | ||||
|         r"and datetime64\[ns\]" | ||||
|     ) | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         s.reindex(newidx, method="ffill") | ||||
|  | ||||
|  | ||||
| def test_reindex_empty_series_tz_dtype(): | ||||
|     # GH 20869 | ||||
|     result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1]) | ||||
|     expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]") | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "p_values, o_values, values, expected_values", | ||||
|     [ | ||||
|         ( | ||||
|             [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], | ||||
|             [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], | ||||
|             [1.0, 1.0], | ||||
|             [1.0, 1.0, np.nan], | ||||
|         ), | ||||
|         ( | ||||
|             [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], | ||||
|             [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], | ||||
|             [1.0, 1.0], | ||||
|             [1.0, 1.0], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values): | ||||
|     # GH#28337 | ||||
|     period_index = PeriodIndex(p_values) | ||||
|     object_index = Index(o_values) | ||||
|  | ||||
|     ser = Series(values, index=period_index) | ||||
|     result = ser.reindex(object_index) | ||||
|     expected = Series(expected_values, index=object_index) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_too_many_args(): | ||||
|     # GH 40980 | ||||
|     ser = Series([1, 2]) | ||||
|     msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser.reindex([2, 3], False) | ||||
|  | ||||
|  | ||||
| def test_reindex_double_index(): | ||||
|     # GH 40980 | ||||
|     ser = Series([1, 2]) | ||||
|     msg = r"reindex\(\) got multiple values for argument 'index'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser.reindex([2, 3], index=[3, 4]) | ||||
|  | ||||
|  | ||||
| def test_reindex_no_posargs(): | ||||
|     # GH 40980 | ||||
|     ser = Series([1, 2]) | ||||
|     result = ser.reindex(index=[1, 0]) | ||||
|     expected = Series([2, 1], index=[1, 0]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) | ||||
| def test_reindex_empty_with_level(values): | ||||
|     # GH41170 | ||||
|     ser = Series( | ||||
|         range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object" | ||||
|     ) | ||||
|     result = ser.reindex(np.array(["b"]), level=0) | ||||
|     expected = Series( | ||||
|         index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object" | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_missing_category(): | ||||
|     # GH#18185 | ||||
|     ser = Series([1, 2, 3, 1], dtype="category") | ||||
|     msg = r"Cannot setitem on a Categorical with a new category \(-1\)" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         ser.reindex([1, 2, 3, 4, 5], fill_value=-1) | ||||
|  | ||||
|  | ||||
| def test_reindexing_with_float64_NA_log(): | ||||
|     # GH 47055 | ||||
|     s = Series([1.0, NA], dtype=Float64Dtype()) | ||||
|     s_reindex = s.reindex(range(3)) | ||||
|     result = s_reindex.values._data | ||||
|     expected = np.array([1, np.nan, np.nan]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|     with tm.assert_produces_warning(None): | ||||
|         result_log = np.log(s_reindex) | ||||
|         expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype()) | ||||
|         tm.assert_series_equal(result_log, expected_log) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"]) | ||||
| def test_reindex_expand_nonnano_nat(dtype): | ||||
|     # GH 53497 | ||||
|     ser = Series(np.array([1], dtype=f"{dtype}[s]")) | ||||
|     result = ser.reindex(RangeIndex(2)) | ||||
|     expected = Series( | ||||
|         np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]") | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,41 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_reindex_like(datetime_series): | ||||
|     other = datetime_series[::2] | ||||
|     tm.assert_series_equal( | ||||
|         datetime_series.reindex(other.index), datetime_series.reindex_like(other) | ||||
|     ) | ||||
|  | ||||
|     # GH#7179 | ||||
|     day1 = datetime(2013, 3, 5) | ||||
|     day2 = datetime(2013, 5, 5) | ||||
|     day3 = datetime(2014, 3, 5) | ||||
|  | ||||
|     series1 = Series([5, None, None], [day1, day2, day3]) | ||||
|     series2 = Series([None, None], [day1, day3]) | ||||
|  | ||||
|     result = series1.reindex_like(series2, method="pad") | ||||
|     expected = Series([5, np.nan], index=[day1, day3]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reindex_like_nearest(): | ||||
|     ser = Series(np.arange(10, dtype="int64")) | ||||
|  | ||||
|     target = [0.1, 0.9, 1.5, 2.0] | ||||
|     other = ser.reindex(target, method="nearest") | ||||
|     expected = Series(np.around(target).astype("int64"), target) | ||||
|  | ||||
|     result = ser.reindex_like(other, method="nearest") | ||||
|     tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     result = ser.reindex_like(other, method="nearest", tolerance=1) | ||||
|     tm.assert_series_equal(expected, result) | ||||
|     result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4]) | ||||
|     tm.assert_series_equal(expected, result) | ||||
| @ -0,0 +1,184 @@ | ||||
| from datetime import datetime | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     array, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestRename: | ||||
|     def test_rename(self, datetime_series): | ||||
|         ts = datetime_series | ||||
|         renamer = lambda x: x.strftime("%Y%m%d") | ||||
|         renamed = ts.rename(renamer) | ||||
|         assert renamed.index[0] == renamer(ts.index[0]) | ||||
|  | ||||
|         # dict | ||||
|         rename_dict = dict(zip(ts.index, renamed.index)) | ||||
|         renamed2 = ts.rename(rename_dict) | ||||
|         tm.assert_series_equal(renamed, renamed2) | ||||
|  | ||||
|     def test_rename_partial_dict(self): | ||||
|         # partial dict | ||||
|         ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") | ||||
|         renamed = ser.rename({"b": "foo", "d": "bar"}) | ||||
|         tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) | ||||
|  | ||||
|     def test_rename_retain_index_name(self): | ||||
|         # index with name | ||||
|         renamer = Series( | ||||
|             np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" | ||||
|         ) | ||||
|         renamed = renamer.rename({}) | ||||
|         assert renamed.index.name == renamer.index.name | ||||
|  | ||||
|     def test_rename_by_series(self): | ||||
|         ser = Series(range(5), name="foo") | ||||
|         renamer = Series({1: 10, 2: 20}) | ||||
|         result = ser.rename(renamer) | ||||
|         expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rename_set_name(self, using_infer_string): | ||||
|         ser = Series(range(4), index=list("abcd")) | ||||
|         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: | ||||
|             result = ser.rename(name) | ||||
|             assert result.name == name | ||||
|             if using_infer_string: | ||||
|                 tm.assert_extension_array_equal(result.index.values, ser.index.values) | ||||
|             else: | ||||
|                 tm.assert_numpy_array_equal(result.index.values, ser.index.values) | ||||
|             assert ser.name is None | ||||
|  | ||||
|     def test_rename_set_name_inplace(self, using_infer_string): | ||||
|         ser = Series(range(3), index=list("abc")) | ||||
|         for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: | ||||
|             ser.rename(name, inplace=True) | ||||
|             assert ser.name == name | ||||
|             exp = np.array(["a", "b", "c"], dtype=np.object_) | ||||
|             if using_infer_string: | ||||
|                 exp = array(exp, dtype="str") | ||||
|                 tm.assert_extension_array_equal(ser.index.values, exp) | ||||
|             else: | ||||
|                 tm.assert_numpy_array_equal(ser.index.values, exp) | ||||
|  | ||||
|     def test_rename_axis_supported(self): | ||||
|         # Supporting axis for compatibility, detailed in GH-18589 | ||||
|         ser = Series(range(5)) | ||||
|         ser.rename({}, axis=0) | ||||
|         ser.rename({}, axis="index") | ||||
|  | ||||
|         with pytest.raises(ValueError, match="No axis named 5"): | ||||
|             ser.rename({}, axis=5) | ||||
|  | ||||
|     def test_rename_inplace(self, datetime_series): | ||||
|         renamer = lambda x: x.strftime("%Y%m%d") | ||||
|         expected = renamer(datetime_series.index[0]) | ||||
|  | ||||
|         datetime_series.rename(renamer, inplace=True) | ||||
|         assert datetime_series.index[0] == expected | ||||
|  | ||||
|     def test_rename_with_custom_indexer(self): | ||||
|         # GH 27814 | ||||
|         class MyIndexer: | ||||
|             pass | ||||
|  | ||||
|         ix = MyIndexer() | ||||
|         ser = Series([1, 2, 3]).rename(ix) | ||||
|         assert ser.name is ix | ||||
|  | ||||
|     def test_rename_with_custom_indexer_inplace(self): | ||||
|         # GH 27814 | ||||
|         class MyIndexer: | ||||
|             pass | ||||
|  | ||||
|         ix = MyIndexer() | ||||
|         ser = Series([1, 2, 3]) | ||||
|         ser.rename(ix, inplace=True) | ||||
|         assert ser.name is ix | ||||
|  | ||||
|     def test_rename_callable(self): | ||||
|         # GH 17407 | ||||
|         ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex")) | ||||
|         result = ser.rename(str) | ||||
|         expected = ser.rename(lambda i: str(i)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         assert result.name == expected.name | ||||
|  | ||||
|     def test_rename_none(self): | ||||
|         # GH 40977 | ||||
|         ser = Series([1, 2], name="foo") | ||||
|         result = ser.rename(None) | ||||
|         expected = Series([1, 2]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rename_series_with_multiindex(self): | ||||
|         # issue #43659 | ||||
|         arrays = [ | ||||
|             ["bar", "baz", "baz", "foo", "qux"], | ||||
|             ["one", "one", "two", "two", "one"], | ||||
|         ] | ||||
|  | ||||
|         index = MultiIndex.from_arrays(arrays, names=["first", "second"]) | ||||
|         ser = Series(np.ones(5), index=index) | ||||
|         result = ser.rename(index={"one": "yes"}, level="second", errors="raise") | ||||
|  | ||||
|         arrays_expected = [ | ||||
|             ["bar", "baz", "baz", "foo", "qux"], | ||||
|             ["yes", "yes", "two", "two", "yes"], | ||||
|         ] | ||||
|  | ||||
|         index_expected = MultiIndex.from_arrays( | ||||
|             arrays_expected, names=["first", "second"] | ||||
|         ) | ||||
|         series_expected = Series(np.ones(5), index=index_expected) | ||||
|  | ||||
|         tm.assert_series_equal(result, series_expected) | ||||
|  | ||||
|     def test_rename_series_with_multiindex_keeps_ea_dtypes(self): | ||||
|         # GH21055 | ||||
|         arrays = [ | ||||
|             Index([1, 2, 3], dtype="Int64").astype("category"), | ||||
|             Index([1, 2, 3], dtype="Int64"), | ||||
|         ] | ||||
|         mi = MultiIndex.from_arrays(arrays, names=["A", "B"]) | ||||
|         ser = Series(1, index=mi) | ||||
|         result = ser.rename({1: 4}, level=1) | ||||
|  | ||||
|         arrays_expected = [ | ||||
|             Index([1, 2, 3], dtype="Int64").astype("category"), | ||||
|             Index([4, 2, 3], dtype="Int64"), | ||||
|         ] | ||||
|         mi_expected = MultiIndex.from_arrays(arrays_expected, names=["A", "B"]) | ||||
|         expected = Series(1, index=mi_expected) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_rename_error_arg(self): | ||||
|         # GH 46889 | ||||
|         ser = Series(["foo", "bar"]) | ||||
|         match = re.escape("[2] not found in axis") | ||||
|         with pytest.raises(KeyError, match=match): | ||||
|             ser.rename({2: 9}, errors="raise") | ||||
|  | ||||
|     def test_rename_copy_false(self, using_copy_on_write, warn_copy_on_write): | ||||
|         # GH 46889 | ||||
|         ser = Series(["foo", "bar"]) | ||||
|         ser_orig = ser.copy() | ||||
|         shallow_copy = ser.rename({1: 9}, copy=False) | ||||
|         with tm.assert_cow_warning(warn_copy_on_write): | ||||
|             ser[0] = "foobar" | ||||
|         if using_copy_on_write: | ||||
|             assert ser_orig[0] == shallow_copy[0] | ||||
|             assert ser_orig[1] == shallow_copy[9] | ||||
|         else: | ||||
|             assert ser[0] == shallow_copy[0] | ||||
|             assert ser[1] == shallow_copy[9] | ||||
| @ -0,0 +1,47 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesRenameAxis: | ||||
|     def test_rename_axis_mapper(self): | ||||
|         # GH 19978 | ||||
|         mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) | ||||
|         ser = Series(list(range(len(mi))), index=mi) | ||||
|  | ||||
|         result = ser.rename_axis(index={"ll": "foo"}) | ||||
|         assert result.index.names == ["foo", "nn"] | ||||
|  | ||||
|         result = ser.rename_axis(index=str.upper, axis=0) | ||||
|         assert result.index.names == ["LL", "NN"] | ||||
|  | ||||
|         result = ser.rename_axis(index=["foo", "goo"]) | ||||
|         assert result.index.names == ["foo", "goo"] | ||||
|  | ||||
|         with pytest.raises(TypeError, match="unexpected"): | ||||
|             ser.rename_axis(columns="wrong") | ||||
|  | ||||
|     def test_rename_axis_inplace(self, datetime_series): | ||||
|         # GH 15704 | ||||
|         expected = datetime_series.rename_axis("foo") | ||||
|         result = datetime_series | ||||
|         no_return = result.rename_axis("foo", inplace=True) | ||||
|  | ||||
|         assert no_return is None | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}]) | ||||
|     def test_rename_axis_none(self, kwargs): | ||||
|         # GH 25034 | ||||
|         index = Index(list("abc"), name="foo") | ||||
|         ser = Series([1, 2, 3], index=index) | ||||
|  | ||||
|         result = ser.rename_axis(**kwargs) | ||||
|         expected_index = index.rename(None) if kwargs else index | ||||
|         expected = Series([1, 2, 3], index=expected_index) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,40 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestRepeat: | ||||
|     def test_repeat(self): | ||||
|         ser = Series(np.random.default_rng(2).standard_normal(3), index=["a", "b", "c"]) | ||||
|  | ||||
|         reps = ser.repeat(5) | ||||
|         exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5)) | ||||
|         tm.assert_series_equal(reps, exp) | ||||
|  | ||||
|         to_rep = [2, 3, 4] | ||||
|         reps = ser.repeat(to_rep) | ||||
|         exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep)) | ||||
|         tm.assert_series_equal(reps, exp) | ||||
|  | ||||
|     def test_numpy_repeat(self): | ||||
|         ser = Series(np.arange(3), name="x") | ||||
|         expected = Series( | ||||
|             ser.values.repeat(2), name="x", index=ser.index.values.repeat(2) | ||||
|         ) | ||||
|         tm.assert_series_equal(np.repeat(ser, 2), expected) | ||||
|  | ||||
|         msg = "the 'axis' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.repeat(ser, 2, axis=0) | ||||
|  | ||||
|     def test_repeat_with_multiindex(self): | ||||
|         # GH#9361, fixed by  GH#7891 | ||||
|         m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) | ||||
|         data = ["a", "b", "c", "d"] | ||||
|         m_df = Series(data, index=m_idx) | ||||
|         assert m_df.repeat(3).shape == (3 * len(data),) | ||||
| @ -0,0 +1,819 @@ | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import IntervalArray | ||||
|  | ||||
|  | ||||
| class TestSeriesReplace: | ||||
|     def test_replace_explicit_none(self): | ||||
|         # GH#36984 if the user explicitly passes value=None, give it to them | ||||
|         ser = pd.Series([0, 0, ""], dtype=object) | ||||
|         result = ser.replace("", None) | ||||
|         expected = pd.Series([0, 0, None], dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Cast column 2 to object to avoid implicit cast when setting entry to "" | ||||
|         df = pd.DataFrame(np.zeros((3, 3))).astype({2: object}) | ||||
|         df.iloc[2, 2] = "" | ||||
|         result = df.replace("", None) | ||||
|         expected = pd.DataFrame( | ||||
|             { | ||||
|                 0: np.zeros(3), | ||||
|                 1: np.zeros(3), | ||||
|                 2: np.array([0.0, 0.0, None], dtype=object), | ||||
|             } | ||||
|         ) | ||||
|         assert expected.iloc[2, 2] is None | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # GH#19998 same thing with object dtype | ||||
|         ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) | ||||
|         result = ser.replace("a", None) | ||||
|         expected = pd.Series([10, 20, 30, None, None, "b", None]) | ||||
|         assert expected.iloc[-1] is None | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_noop_doesnt_downcast(self): | ||||
|         # GH#44498 | ||||
|         ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) | ||||
|         res = ser.replace({np.nan: None})  # should be a no-op | ||||
|         tm.assert_series_equal(res, ser) | ||||
|         assert res.dtype == object | ||||
|  | ||||
|         # same thing but different calling convention | ||||
|         res = ser.replace(np.nan, None) | ||||
|         tm.assert_series_equal(res, ser) | ||||
|         assert res.dtype == object | ||||
|  | ||||
|     def test_replace(self): | ||||
|         N = 50 | ||||
|         ser = pd.Series(np.random.default_rng(2).standard_normal(N)) | ||||
|         ser[0:4] = np.nan | ||||
|         ser[6:10] = 0 | ||||
|  | ||||
|         # replace list with a single value | ||||
|         return_value = ser.replace([np.nan], -1, inplace=True) | ||||
|         assert return_value is None | ||||
|  | ||||
|         exp = ser.fillna(-1) | ||||
|         tm.assert_series_equal(ser, exp) | ||||
|  | ||||
|         rs = ser.replace(0.0, np.nan) | ||||
|         ser[ser == 0.0] = np.nan | ||||
|         tm.assert_series_equal(rs, ser) | ||||
|  | ||||
|         ser = pd.Series( | ||||
|             np.fabs(np.random.default_rng(2).standard_normal(N)), | ||||
|             pd.date_range("2020-01-01", periods=N), | ||||
|             dtype=object, | ||||
|         ) | ||||
|         ser[:5] = np.nan | ||||
|         ser[6:10] = "foo" | ||||
|         ser[20:30] = "bar" | ||||
|  | ||||
|         # replace list with a single value | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs = ser.replace([np.nan, "foo", "bar"], -1) | ||||
|  | ||||
|         assert (rs[:5] == -1).all() | ||||
|         assert (rs[6:10] == -1).all() | ||||
|         assert (rs[20:30] == -1).all() | ||||
|         assert (pd.isna(ser[:5])).all() | ||||
|  | ||||
|         # replace with different values | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) | ||||
|  | ||||
|         assert (rs[:5] == -1).all() | ||||
|         assert (rs[6:10] == -2).all() | ||||
|         assert (rs[20:30] == -3).all() | ||||
|         assert (pd.isna(ser[:5])).all() | ||||
|  | ||||
|         # replace with different values with 2 lists | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) | ||||
|         tm.assert_series_equal(rs, rs2) | ||||
|  | ||||
|         # replace inplace | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) | ||||
|         assert return_value is None | ||||
|  | ||||
|         assert (ser[:5] == -1).all() | ||||
|         assert (ser[6:10] == -1).all() | ||||
|         assert (ser[20:30] == -1).all() | ||||
|  | ||||
|     def test_replace_nan_with_inf(self): | ||||
|         ser = pd.Series([np.nan, 0, np.inf]) | ||||
|         tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) | ||||
|  | ||||
|         ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) | ||||
|         tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) | ||||
|         filled = ser.copy() | ||||
|         filled[4] = 0 | ||||
|         tm.assert_series_equal(ser.replace(np.inf, 0), filled) | ||||
|  | ||||
|     def test_replace_listlike_value_listlike_target(self, datetime_series): | ||||
|         ser = pd.Series(datetime_series.index) | ||||
|         tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) | ||||
|  | ||||
|         # malformed | ||||
|         msg = r"Replacement lists must match in length\. Expecting 3 got 2" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.replace([1, 2, 3], [np.nan, 0]) | ||||
|  | ||||
|         # ser is dt64 so can't hold 1 or 2, so this replace is a no-op | ||||
|         result = ser.replace([1, 2], [np.nan, 0]) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|         ser = pd.Series([0, 1, 2, 3, 4]) | ||||
|         result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) | ||||
|         tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) | ||||
|  | ||||
|     def test_replace_gh5319(self): | ||||
|         # API change from 0.12? | ||||
|         # GH 5319 | ||||
|         ser = pd.Series([0, np.nan, 2, 3, 4]) | ||||
|         expected = ser.ffill() | ||||
|         msg = ( | ||||
|             "Series.replace without 'value' and with non-dict-like " | ||||
|             "'to_replace' is deprecated" | ||||
|         ) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = ser.replace([np.nan]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         ser = pd.Series([0, np.nan, 2, 3, 4]) | ||||
|         expected = ser.ffill() | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = ser.replace(np.nan) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_datetime64(self): | ||||
|         # GH 5797 | ||||
|         ser = pd.Series(pd.date_range("20130101", periods=5)) | ||||
|         expected = ser.copy() | ||||
|         expected.loc[2] = pd.Timestamp("20120101") | ||||
|         result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_nat_with_tz(self): | ||||
|         # GH 11792: Test with replacing NaT in a list with tz data | ||||
|         ts = pd.Timestamp("2015/01/01", tz="UTC") | ||||
|         s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) | ||||
|         result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) | ||||
|         expected = pd.Series([pd.Timestamp.min, ts], dtype=object) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_timedelta_td64(self): | ||||
|         tdi = pd.timedelta_range(0, periods=5) | ||||
|         ser = pd.Series(tdi) | ||||
|  | ||||
|         # Using a single dict argument means we go through replace_list | ||||
|         result = ser.replace({ser[1]: ser[3]}) | ||||
|  | ||||
|         expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_with_single_list(self): | ||||
|         ser = pd.Series([0, 1, 2, 3, 4]) | ||||
|         msg2 = ( | ||||
|             "Series.replace without 'value' and with non-dict-like " | ||||
|             "'to_replace' is deprecated" | ||||
|         ) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|             result = ser.replace([1, 2, 3]) | ||||
|         tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) | ||||
|  | ||||
|         s = ser.copy() | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|             return_value = s.replace([1, 2, 3], inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) | ||||
|  | ||||
|         # make sure things don't get corrupted when fillna call fails | ||||
|         s = ser.copy() | ||||
|         msg = ( | ||||
|             r"Invalid fill method\. Expecting pad \(ffill\) or backfill " | ||||
|             r"\(bfill\)\. Got crash_cymbal" | ||||
|         ) | ||||
|         msg3 = "The 'method' keyword in Series.replace is deprecated" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg3): | ||||
|                 return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") | ||||
|             assert return_value is None | ||||
|         tm.assert_series_equal(s, ser) | ||||
|  | ||||
|     def test_replace_mixed_types(self): | ||||
|         ser = pd.Series(np.arange(5), dtype="int64") | ||||
|  | ||||
|         def check_replace(to_rep, val, expected): | ||||
|             sc = ser.copy() | ||||
|             result = ser.replace(to_rep, val) | ||||
|             return_value = sc.replace(to_rep, val, inplace=True) | ||||
|             assert return_value is None | ||||
|             tm.assert_series_equal(expected, result) | ||||
|             tm.assert_series_equal(expected, sc) | ||||
|  | ||||
|         # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 | ||||
|         tr, v = [3], [3.0] | ||||
|         check_replace(tr, v, ser) | ||||
|         # Note this matches what we get with the scalars 3 and 3.0 | ||||
|         check_replace(tr[0], v[0], ser) | ||||
|  | ||||
|         # MUST upcast to float | ||||
|         e = pd.Series([0, 1, 2, 3.5, 4]) | ||||
|         tr, v = [3], [3.5] | ||||
|         check_replace(tr, v, e) | ||||
|  | ||||
|         # casts to object | ||||
|         e = pd.Series([0, 1, 2, 3.5, "a"]) | ||||
|         tr, v = [3, 4], [3.5, "a"] | ||||
|         check_replace(tr, v, e) | ||||
|  | ||||
|         # again casts to object | ||||
|         e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) | ||||
|         tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] | ||||
|         check_replace(tr, v, e) | ||||
|  | ||||
|         # casts to object | ||||
|         e = pd.Series([0, 1, 2, 3.5, True], dtype="object") | ||||
|         tr, v = [3, 4], [3.5, True] | ||||
|         check_replace(tr, v, e) | ||||
|  | ||||
|         # test an object with dates + floats + integers + strings | ||||
|         dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D")) | ||||
|         result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) | ||||
|         expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_bool_with_string_no_op(self): | ||||
|         s = pd.Series([True, False, True]) | ||||
|         result = s.replace("fun", "in-the-sun") | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|     def test_replace_bool_with_string(self): | ||||
|         # nonexistent elements | ||||
|         s = pd.Series([True, False, True]) | ||||
|         result = s.replace(True, "2u") | ||||
|         expected = pd.Series(["2u", False, "2u"]) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_bool_with_bool(self): | ||||
|         s = pd.Series([True, False, True]) | ||||
|         result = s.replace(True, False) | ||||
|         expected = pd.Series([False] * len(s)) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_with_dict_with_bool_keys(self): | ||||
|         s = pd.Series([True, False, True]) | ||||
|         result = s.replace({"asdf": "asdb", True: "yes"}) | ||||
|         expected = pd.Series(["yes", False, "yes"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_Int_with_na(self, any_int_ea_dtype): | ||||
|         # GH 38267 | ||||
|         result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA) | ||||
|         expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA) | ||||
|         result.replace(1, pd.NA, inplace=True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace2(self): | ||||
|         N = 50 | ||||
|         ser = pd.Series( | ||||
|             np.fabs(np.random.default_rng(2).standard_normal(N)), | ||||
|             pd.date_range("2020-01-01", periods=N), | ||||
|             dtype=object, | ||||
|         ) | ||||
|         ser[:5] = np.nan | ||||
|         ser[6:10] = "foo" | ||||
|         ser[20:30] = "bar" | ||||
|  | ||||
|         # replace list with a single value | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs = ser.replace([np.nan, "foo", "bar"], -1) | ||||
|  | ||||
|         assert (rs[:5] == -1).all() | ||||
|         assert (rs[6:10] == -1).all() | ||||
|         assert (rs[20:30] == -1).all() | ||||
|         assert (pd.isna(ser[:5])).all() | ||||
|  | ||||
|         # replace with different values | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) | ||||
|  | ||||
|         assert (rs[:5] == -1).all() | ||||
|         assert (rs[6:10] == -2).all() | ||||
|         assert (rs[20:30] == -3).all() | ||||
|         assert (pd.isna(ser[:5])).all() | ||||
|  | ||||
|         # replace with different values with 2 lists | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) | ||||
|         tm.assert_series_equal(rs, rs2) | ||||
|  | ||||
|         # replace inplace | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) | ||||
|         assert return_value is None | ||||
|         assert (ser[:5] == -1).all() | ||||
|         assert (ser[6:10] == -1).all() | ||||
|         assert (ser[20:30] == -1).all() | ||||
|  | ||||
|     @pytest.mark.parametrize("inplace", [True, False]) | ||||
|     def test_replace_cascade(self, inplace): | ||||
|         # Test that replaced values are not replaced again | ||||
|         # GH #50778 | ||||
|         ser = pd.Series([1, 2, 3]) | ||||
|         expected = pd.Series([2, 3, 4]) | ||||
|  | ||||
|         res = ser.replace([1, 2, 3], [2, 3, 4], inplace=inplace) | ||||
|         if inplace: | ||||
|             tm.assert_series_equal(ser, expected) | ||||
|         else: | ||||
|             tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): | ||||
|         # GH 32621, GH#44940 | ||||
|         ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) | ||||
|         expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) | ||||
|         result = ser.replace({"one": "1", "two": "2"}) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_with_empty_dictlike(self): | ||||
|         # GH 15289 | ||||
|         s = pd.Series(list("abcd")) | ||||
|         tm.assert_series_equal(s, s.replace({})) | ||||
|  | ||||
|         empty_series = pd.Series([]) | ||||
|         tm.assert_series_equal(s, s.replace(empty_series)) | ||||
|  | ||||
|     def test_replace_string_with_number(self): | ||||
|         # GH 15743 | ||||
|         s = pd.Series([1, 2, 3]) | ||||
|         result = s.replace("2", np.nan) | ||||
|         expected = pd.Series([1, 2, 3]) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_replacer_equals_replacement(self): | ||||
|         # GH 20656 | ||||
|         # make sure all replacers are matching against original values | ||||
|         s = pd.Series(["a", "b"]) | ||||
|         expected = pd.Series(["b", "a"]) | ||||
|         result = s.replace({"a": "b", "b": "a"}) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_unicode_with_number(self): | ||||
|         # GH 15743 | ||||
|         s = pd.Series([1, 2, 3]) | ||||
|         result = s.replace("2", np.nan) | ||||
|         expected = pd.Series([1, 2, 3]) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     def test_replace_mixed_types_with_string(self): | ||||
|         # Testing mixed | ||||
|         s = pd.Series([1, 2, 3, "4", 4, 5]) | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = s.replace([2, "4"], np.nan) | ||||
|         expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "categorical, numeric", | ||||
|         [ | ||||
|             (pd.Categorical(["A"], categories=["A", "B"]), [1]), | ||||
|             (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_replace_categorical(self, categorical, numeric, using_infer_string): | ||||
|         # GH 24971, GH#23305 | ||||
|         ser = pd.Series(categorical) | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         msg = "with CategoricalDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = ser.replace({"A": 1, "B": 2}) | ||||
|         expected = pd.Series(numeric).astype("category") | ||||
|         if 2 not in expected.cat.categories: | ||||
|             # i.e. categories should be [1, 2] even if there are no "B"s present | ||||
|             # GH#44940 | ||||
|             expected = expected.cat.add_categories(2) | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])] | ||||
|     ) | ||||
|     def test_replace_categorical_inplace(self, data, data_exp): | ||||
|         # GH 53358 | ||||
|         result = pd.Series(data, dtype="category") | ||||
|         msg = "with CategoricalDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result.replace(to_replace="a", value="b", inplace=True) | ||||
|         expected = pd.Series(data_exp, dtype="category") | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_categorical_single(self): | ||||
|         # GH 26988 | ||||
|         dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") | ||||
|         s = pd.Series(dti) | ||||
|         c = s.astype("category") | ||||
|  | ||||
|         expected = c.copy() | ||||
|         expected = expected.cat.add_categories("foo") | ||||
|         expected[2] = "foo" | ||||
|         expected = expected.cat.remove_unused_categories() | ||||
|         assert c[2] != "foo" | ||||
|  | ||||
|         msg = "with CategoricalDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = c.replace(c[2], "foo") | ||||
|         tm.assert_series_equal(expected, result) | ||||
|         assert c[2] != "foo"  # ensure non-inplace call does not alter original | ||||
|  | ||||
|         msg = "with CategoricalDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             return_value = c.replace(c[2], "foo", inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(expected, c) | ||||
|  | ||||
|         first_value = c[0] | ||||
|         msg = "with CategoricalDtype is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             return_value = c.replace(c[1], c[0], inplace=True) | ||||
|         assert return_value is None | ||||
|         assert c[0] == c[1] == first_value  # test replacing with existing value | ||||
|  | ||||
|     def test_replace_with_no_overflowerror(self): | ||||
|         # GH 25616 | ||||
|         # casts to object without Exception from OverflowError | ||||
|         s = pd.Series([0, 1, 2, 3, 4]) | ||||
|         result = s.replace([3], ["100000000000000000000"]) | ||||
|         expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) | ||||
|         result = s.replace(["100000000000000000000"], [1]) | ||||
|         expected = pd.Series([0, 1, "100000000000000000001"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser, to_replace, exp", | ||||
|         [ | ||||
|             ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), | ||||
|             (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_replace_commutative(self, ser, to_replace, exp): | ||||
|         # GH 16051 | ||||
|         # DataFrame.replace() overwrites when values are non-numeric | ||||
|  | ||||
|         series = pd.Series(ser) | ||||
|  | ||||
|         expected = pd.Series(exp) | ||||
|         result = series.replace(to_replace) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] | ||||
|     ) | ||||
|     def test_replace_no_cast(self, ser, exp): | ||||
|         # GH 9113 | ||||
|         # BUG: replace int64 dtype with bool coerces to int64 | ||||
|  | ||||
|         series = pd.Series(ser) | ||||
|         result = series.replace(2, True) | ||||
|         expected = pd.Series(exp) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_invalid_to_replace(self): | ||||
|         # GH 18634 | ||||
|         # API: replace() should raise an exception if invalid argument is given | ||||
|         series = pd.Series(["a", "b", "c "]) | ||||
|         msg = ( | ||||
|             r"Expecting 'to_replace' to be either a scalar, array-like, " | ||||
|             r"dict or None, got invalid type.*" | ||||
|         ) | ||||
|         msg2 = ( | ||||
|             "Series.replace without 'value' and with non-dict-like " | ||||
|             "'to_replace' is deprecated" | ||||
|         ) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|                 series.replace(lambda x: x.strip()) | ||||
|  | ||||
|     @pytest.mark.parametrize("frame", [False, True]) | ||||
|     def test_replace_nonbool_regex(self, frame): | ||||
|         obj = pd.Series(["a", "b", "c "]) | ||||
|         if frame: | ||||
|             obj = obj.to_frame() | ||||
|  | ||||
|         msg = "'to_replace' must be 'None' if 'regex' is not a bool" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             obj.replace(to_replace=["a"], regex="foo") | ||||
|  | ||||
|     @pytest.mark.parametrize("frame", [False, True]) | ||||
|     def test_replace_empty_copy(self, frame): | ||||
|         obj = pd.Series([], dtype=np.float64) | ||||
|         if frame: | ||||
|             obj = obj.to_frame() | ||||
|  | ||||
|         res = obj.replace(4, 5, inplace=True) | ||||
|         assert res is None | ||||
|  | ||||
|         res = obj.replace(4, 5, inplace=False) | ||||
|         tm.assert_equal(res, obj) | ||||
|         assert res is not obj | ||||
|  | ||||
|     def test_replace_only_one_dictlike_arg(self, fixed_now_ts): | ||||
|         # GH#33340 | ||||
|  | ||||
|         ser = pd.Series([1, 2, "A", fixed_now_ts, True]) | ||||
|         to_replace = {0: 1, 2: "A"} | ||||
|         value = "foo" | ||||
|         msg = "Series.replace cannot use dict-like to_replace and non-None value" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.replace(to_replace, value) | ||||
|  | ||||
|         to_replace = 1 | ||||
|         value = {0: "foo", 2: "bar"} | ||||
|         msg = "Series.replace cannot use dict-value and non-None to_replace" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.replace(to_replace, value) | ||||
|  | ||||
|     def test_replace_extension_other(self, frame_or_series): | ||||
|         # https://github.com/pandas-dev/pandas/issues/34530 | ||||
|         obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) | ||||
|         result = obj.replace("", "")  # no exception | ||||
|         # should not have changed dtype | ||||
|         tm.assert_equal(obj, result) | ||||
|  | ||||
|     def _check_replace_with_method(self, ser: pd.Series): | ||||
|         df = ser.to_frame() | ||||
|  | ||||
|         msg1 = "The 'method' keyword in Series.replace is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg1): | ||||
|             res = ser.replace(ser[1], method="pad") | ||||
|         expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|         msg2 = "The 'method' keyword in DataFrame.replace is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|             res_df = df.replace(ser[1], method="pad") | ||||
|         tm.assert_frame_equal(res_df, expected.to_frame()) | ||||
|  | ||||
|         ser2 = ser.copy() | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg1): | ||||
|             res2 = ser2.replace(ser[1], method="pad", inplace=True) | ||||
|         assert res2 is None | ||||
|         tm.assert_series_equal(ser2, expected) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg2): | ||||
|             res_df2 = df.replace(ser[1], method="pad", inplace=True) | ||||
|         assert res_df2 is None | ||||
|         tm.assert_frame_equal(df, expected.to_frame()) | ||||
|  | ||||
|     def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype): | ||||
|         arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype) | ||||
|         ser = pd.Series(arr) | ||||
|  | ||||
|         self._check_replace_with_method(ser) | ||||
|  | ||||
|     @pytest.mark.parametrize("as_categorical", [True, False]) | ||||
|     def test_replace_interval_with_method(self, as_categorical): | ||||
|         # in particular interval that can't hold NA | ||||
|  | ||||
|         idx = pd.IntervalIndex.from_breaks(range(4)) | ||||
|         ser = pd.Series(idx) | ||||
|         if as_categorical: | ||||
|             ser = ser.astype("category") | ||||
|  | ||||
|         self._check_replace_with_method(ser) | ||||
|  | ||||
|     @pytest.mark.parametrize("as_period", [True, False]) | ||||
|     @pytest.mark.parametrize("as_categorical", [True, False]) | ||||
|     def test_replace_datetimelike_with_method(self, as_period, as_categorical): | ||||
|         idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific") | ||||
|         if as_period: | ||||
|             idx = idx.tz_localize(None).to_period("D") | ||||
|  | ||||
|         ser = pd.Series(idx) | ||||
|         ser.iloc[-2] = pd.NaT | ||||
|         if as_categorical: | ||||
|             ser = ser.astype("category") | ||||
|  | ||||
|         self._check_replace_with_method(ser) | ||||
|  | ||||
|     def test_replace_with_compiled_regex(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/35680 | ||||
|         s = pd.Series(["a", "b", "c"]) | ||||
|         regex = re.compile("^a$") | ||||
|         result = s.replace({regex: "z"}, regex=True) | ||||
|         expected = pd.Series(["z", "b", "c"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_pandas_replace_na(self): | ||||
|         # GH#43344 | ||||
|         # GH#56599 | ||||
|         ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA, "AA"], dtype="string") | ||||
|         regex_mapping = { | ||||
|             "AA": "CC", | ||||
|             "BB": "CC", | ||||
|             "EE": "CC", | ||||
|             "CC": "CC-REPL", | ||||
|         } | ||||
|         result = ser.replace(regex_mapping, regex=True) | ||||
|         exp = pd.Series( | ||||
|             ["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA, "CC"], dtype="string" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, input_data, to_replace, expected_data", | ||||
|         [ | ||||
|             ("bool", [True, False], {True: False}, [False, False]), | ||||
|             ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), | ||||
|             ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), | ||||
|             ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), | ||||
|             ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), | ||||
|             ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), | ||||
|             ( | ||||
|                 pd.IntervalDtype("int64"), | ||||
|                 IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), | ||||
|                 {pd.Interval(1, 2): pd.Interval(10, 20)}, | ||||
|                 IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), | ||||
|             ), | ||||
|             ( | ||||
|                 pd.IntervalDtype("float64"), | ||||
|                 IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), | ||||
|                 {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, | ||||
|                 IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), | ||||
|             ), | ||||
|             ( | ||||
|                 pd.PeriodDtype("M"), | ||||
|                 [pd.Period("2020-05", freq="M")], | ||||
|                 {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, | ||||
|                 [pd.Period("2020-06", freq="M")], | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): | ||||
|         # GH#33484 | ||||
|         ser = pd.Series(input_data, dtype=dtype) | ||||
|         result = ser.replace(to_replace) | ||||
|         expected = pd.Series(expected_data, dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_string_dtype(self): | ||||
|         # GH#40732, GH#44940 | ||||
|         ser = pd.Series(["one", "two", np.nan], dtype="string") | ||||
|         res = ser.replace({"one": "1", "two": "2"}) | ||||
|         expected = pd.Series(["1", "2", np.nan], dtype="string") | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|         # GH#31644 | ||||
|         ser2 = pd.Series(["A", np.nan], dtype="string") | ||||
|         res2 = ser2.replace("A", "B") | ||||
|         expected2 = pd.Series(["B", np.nan], dtype="string") | ||||
|         tm.assert_series_equal(res2, expected2) | ||||
|  | ||||
|         ser3 = pd.Series(["A", "B"], dtype="string") | ||||
|         res3 = ser3.replace("A", pd.NA) | ||||
|         expected3 = pd.Series([pd.NA, "B"], dtype="string") | ||||
|         tm.assert_series_equal(res3, expected3) | ||||
|  | ||||
|     def test_replace_string_dtype_list_to_replace(self): | ||||
|         # GH#41215, GH#44940 | ||||
|         ser = pd.Series(["abc", "def"], dtype="string") | ||||
|         res = ser.replace(["abc", "any other string"], "xyz") | ||||
|         expected = pd.Series(["xyz", "def"], dtype="string") | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     def test_replace_string_dtype_regex(self): | ||||
|         # GH#31644 | ||||
|         ser = pd.Series(["A", "B"], dtype="string") | ||||
|         res = ser.replace(r".", "C", regex=True) | ||||
|         expected = pd.Series(["C", "C"], dtype="string") | ||||
|         tm.assert_series_equal(res, expected) | ||||
|  | ||||
|     def test_replace_nullable_numeric(self): | ||||
|         # GH#40732, GH#44940 | ||||
|  | ||||
|         floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) | ||||
|         assert floats.replace({1.0: 9}).dtype == floats.dtype | ||||
|         assert floats.replace(1.0, 9).dtype == floats.dtype | ||||
|         assert floats.replace({1.0: 9.0}).dtype == floats.dtype | ||||
|         assert floats.replace(1.0, 9.0).dtype == floats.dtype | ||||
|  | ||||
|         res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) | ||||
|         assert res.dtype == floats.dtype | ||||
|  | ||||
|         ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) | ||||
|         assert ints.replace({1: 9}).dtype == ints.dtype | ||||
|         assert ints.replace(1, 9).dtype == ints.dtype | ||||
|         assert ints.replace({1: 9.0}).dtype == ints.dtype | ||||
|         assert ints.replace(1, 9.0).dtype == ints.dtype | ||||
|  | ||||
|         # nullable (for now) raises instead of casting | ||||
|         with pytest.raises(TypeError, match="Invalid value"): | ||||
|             ints.replace({1: 9.5}) | ||||
|         with pytest.raises(TypeError, match="Invalid value"): | ||||
|             ints.replace(1, 9.5) | ||||
|  | ||||
|     @pytest.mark.parametrize("regex", [False, True]) | ||||
|     def test_replace_regex_dtype_series(self, regex): | ||||
|         # GH-48644 | ||||
|         series = pd.Series(["0"], dtype=object) | ||||
|         expected = pd.Series([1]) | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = series.replace(to_replace="0", value=1, regex=regex) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("regex", [False, True]) | ||||
|     def test_replace_regex_dtype_series_string(self, regex): | ||||
|         series = pd.Series(["0"], dtype="str") | ||||
|         expected = pd.Series([1], dtype="int64") | ||||
|         msg = "Downcasting behavior in `replace`" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = series.replace(to_replace="0", value=1, regex=regex) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_different_int_types(self, any_int_numpy_dtype): | ||||
|         # GH#45311 | ||||
|         labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype) | ||||
|  | ||||
|         maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype) | ||||
|         map_dict = dict(zip(maps.values, maps.index)) | ||||
|  | ||||
|         result = labs.replace(map_dict) | ||||
|         expected = labs.replace({0: 0, 2: 1, 1: 2}) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [2, np.nan, 2.0]) | ||||
|     def test_replace_value_none_dtype_numeric(self, val): | ||||
|         # GH#48231 | ||||
|         ser = pd.Series([1, val]) | ||||
|         result = ser.replace(val, None) | ||||
|         expected = pd.Series([1, None], dtype=object) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_replace_change_dtype_series(self): | ||||
|         # GH#25797 | ||||
|         df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object) | ||||
|         df["Test"] = df["Test"].replace([True], [np.nan]) | ||||
|         expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object) | ||||
|         df["Test"] = df["Test"].replace([None], [np.nan]) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|         df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object) | ||||
|         df["Test"] = df["Test"].fillna(np.nan) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("dtype", ["object", "Int64"]) | ||||
|     def test_replace_na_in_obj_column(self, dtype): | ||||
|         # GH#47480 | ||||
|         ser = pd.Series([0, 1, pd.NA], dtype=dtype) | ||||
|         expected = pd.Series([0, 2, pd.NA], dtype=dtype) | ||||
|         result = ser.replace(to_replace=1, value=2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         ser.replace(to_replace=1, value=2, inplace=True) | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [0, 0.5]) | ||||
|     def test_replace_numeric_column_with_na(self, val): | ||||
|         # GH#50758 | ||||
|         ser = pd.Series([val, 1]) | ||||
|         expected = pd.Series([val, pd.NA]) | ||||
|         result = ser.replace(to_replace=1, value=pd.NA) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         ser.replace(to_replace=1, value=pd.NA, inplace=True) | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_replace_ea_float_with_bool(self): | ||||
|         # GH#55398 | ||||
|         ser = pd.Series([0.0], dtype="Float64") | ||||
|         expected = ser.copy() | ||||
|         result = ser.replace(False, 1.0) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         ser = pd.Series([False], dtype="boolean") | ||||
|         expected = ser.copy() | ||||
|         result = ser.replace(0.0, True) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,225 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     RangeIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
|     option_context, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestResetIndex: | ||||
|     def test_reset_index_dti_round_trip(self): | ||||
|         dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) | ||||
|         d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti) | ||||
|         d2 = d1.reset_index() | ||||
|         assert d2.dtypes.iloc[0] == np.dtype("M8[ns]") | ||||
|         d3 = d2.set_index("index") | ||||
|         tm.assert_frame_equal(d1, d3, check_names=False) | ||||
|  | ||||
|         # GH#2329 | ||||
|         stamp = datetime(2012, 11, 22) | ||||
|         df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) | ||||
|         df = df.set_index("Date") | ||||
|  | ||||
|         assert df.index[0] == stamp | ||||
|         assert df.reset_index()["Date"].iloc[0] == stamp | ||||
|  | ||||
|     def test_reset_index(self): | ||||
|         df = DataFrame( | ||||
|             1.1 * np.arange(120).reshape((30, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=Index([f"i-{i}" for i in range(30)], dtype=object), | ||||
|         )[:5] | ||||
|         ser = df.stack(future_stack=True) | ||||
|         ser.index.names = ["hash", "category"] | ||||
|  | ||||
|         ser.name = "value" | ||||
|         df = ser.reset_index() | ||||
|         assert "value" in df | ||||
|  | ||||
|         df = ser.reset_index(name="value2") | ||||
|         assert "value2" in df | ||||
|  | ||||
|         # check inplace | ||||
|         s = ser.reset_index(drop=True) | ||||
|         s2 = ser | ||||
|         return_value = s2.reset_index(drop=True, inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(s, s2) | ||||
|  | ||||
|         # level | ||||
|         index = MultiIndex( | ||||
|             levels=[["bar"], ["one", "two", "three"], [0, 1]], | ||||
|             codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], | ||||
|         ) | ||||
|         s = Series(np.random.default_rng(2).standard_normal(6), index=index) | ||||
|         rs = s.reset_index(level=1) | ||||
|         assert len(rs.columns) == 2 | ||||
|  | ||||
|         rs = s.reset_index(level=[0, 2], drop=True) | ||||
|         tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) | ||||
|         assert isinstance(rs, Series) | ||||
|  | ||||
|     def test_reset_index_name(self): | ||||
|         s = Series([1, 2, 3], index=Index(range(3), name="x")) | ||||
|         assert s.reset_index().index.name is None | ||||
|         assert s.reset_index(drop=True).index.name is None | ||||
|  | ||||
|     def test_reset_index_level(self): | ||||
|         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) | ||||
|  | ||||
|         for levels in ["A", "B"], [0, 1]: | ||||
|             # With MultiIndex | ||||
|             s = df.set_index(["A", "B"])["C"] | ||||
|  | ||||
|             result = s.reset_index(level=levels[0]) | ||||
|             tm.assert_frame_equal(result, df.set_index("B")) | ||||
|  | ||||
|             result = s.reset_index(level=levels[:1]) | ||||
|             tm.assert_frame_equal(result, df.set_index("B")) | ||||
|  | ||||
|             result = s.reset_index(level=levels) | ||||
|             tm.assert_frame_equal(result, df) | ||||
|  | ||||
|             result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) | ||||
|             tm.assert_frame_equal(result, df[["C"]]) | ||||
|  | ||||
|             with pytest.raises(KeyError, match="Level E "): | ||||
|                 s.reset_index(level=["A", "E"]) | ||||
|  | ||||
|             # With single-level Index | ||||
|             s = df.set_index("A")["B"] | ||||
|  | ||||
|             result = s.reset_index(level=levels[0]) | ||||
|             tm.assert_frame_equal(result, df[["A", "B"]]) | ||||
|  | ||||
|             result = s.reset_index(level=levels[:1]) | ||||
|             tm.assert_frame_equal(result, df[["A", "B"]]) | ||||
|  | ||||
|             result = s.reset_index(level=levels[0], drop=True) | ||||
|             tm.assert_series_equal(result, df["B"]) | ||||
|  | ||||
|             with pytest.raises(IndexError, match="Too many levels"): | ||||
|                 s.reset_index(level=[0, 1, 2]) | ||||
|  | ||||
|         # Check that .reset_index([],drop=True) doesn't fail | ||||
|         result = Series(range(4)).reset_index([], drop=True) | ||||
|         expected = Series(range(4)) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_reset_index_range(self): | ||||
|         # GH 12071 | ||||
|         s = Series(range(2), name="A", dtype="int64") | ||||
|         series_result = s.reset_index() | ||||
|         assert isinstance(series_result.index, RangeIndex) | ||||
|         series_expected = DataFrame( | ||||
|             [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) | ||||
|         ) | ||||
|         tm.assert_frame_equal(series_result, series_expected) | ||||
|  | ||||
|     def test_reset_index_drop_errors(self): | ||||
|         #  GH 20925 | ||||
|  | ||||
|         # KeyError raised for series index when passed level name is missing | ||||
|         s = Series(range(4)) | ||||
|         with pytest.raises(KeyError, match="does not match index name"): | ||||
|             s.reset_index("wrong", drop=True) | ||||
|         with pytest.raises(KeyError, match="does not match index name"): | ||||
|             s.reset_index("wrong") | ||||
|  | ||||
|         # KeyError raised for series when level to be dropped is missing | ||||
|         s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) | ||||
|         with pytest.raises(KeyError, match="not found"): | ||||
|             s.reset_index("wrong", drop=True) | ||||
|  | ||||
|     def test_reset_index_with_drop(self): | ||||
|         arrays = [ | ||||
|             ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], | ||||
|             ["one", "two", "one", "two", "one", "two", "one", "two"], | ||||
|         ] | ||||
|         tuples = zip(*arrays) | ||||
|         index = MultiIndex.from_tuples(tuples) | ||||
|         data = np.random.default_rng(2).standard_normal(8) | ||||
|         ser = Series(data, index=index) | ||||
|         ser.iloc[3] = np.nan | ||||
|  | ||||
|         deleveled = ser.reset_index() | ||||
|         assert isinstance(deleveled, DataFrame) | ||||
|         assert len(deleveled.columns) == len(ser.index.levels) + 1 | ||||
|         assert deleveled.index.name == ser.index.name | ||||
|  | ||||
|         deleveled = ser.reset_index(drop=True) | ||||
|         assert isinstance(deleveled, Series) | ||||
|         assert deleveled.index.name == ser.index.name | ||||
|  | ||||
|     def test_reset_index_inplace_and_drop_ignore_name(self): | ||||
|         # GH#44575 | ||||
|         ser = Series(range(2), name="old") | ||||
|         ser.reset_index(name="new", drop=True, inplace=True) | ||||
|         expected = Series(range(2), name="old") | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_reset_index_drop_infer_string(self): | ||||
|         # GH#56160 | ||||
|         pytest.importorskip("pyarrow") | ||||
|         ser = Series(["a", "b", "c"], dtype=object) | ||||
|         with option_context("future.infer_string", True): | ||||
|             result = ser.reset_index(drop=True) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "array, dtype", | ||||
|     [ | ||||
|         (["a", "b"], object), | ||||
|         ( | ||||
|             pd.period_range("12-1-2000", periods=2, freq="Q-DEC"), | ||||
|             pd.PeriodDtype(freq="Q-DEC"), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_reset_index_dtypes_on_empty_series_with_multiindex( | ||||
|     array, dtype, using_infer_string | ||||
| ): | ||||
|     # GH 19602 - Preserve dtype on empty Series with MultiIndex | ||||
|     idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array]) | ||||
|     result = Series(dtype=object, index=idx)[:0].reset_index().dtypes | ||||
|     exp = "str" if using_infer_string else object | ||||
|     expected = Series( | ||||
|         { | ||||
|             "level_0": np.int64, | ||||
|             "level_1": np.float64, | ||||
|             "level_2": exp if dtype == object else dtype, | ||||
|             0: object, | ||||
|         } | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "names, expected_names", | ||||
|     [ | ||||
|         (["A", "A"], ["A", "A"]), | ||||
|         (["level_1", None], ["level_1", "level_1"]), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("allow_duplicates", [False, True]) | ||||
| def test_column_name_duplicates(names, expected_names, allow_duplicates): | ||||
|     # GH#44755 reset_index with duplicate column labels | ||||
|     s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names)) | ||||
|     if allow_duplicates: | ||||
|         result = s.reset_index(allow_duplicates=True) | ||||
|         expected = DataFrame([[1, 1, 1]], columns=expected_names + [0]) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|     else: | ||||
|         with pytest.raises(ValueError, match="cannot insert"): | ||||
|             s.reset_index() | ||||
| @ -0,0 +1,81 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesRound: | ||||
|     def test_round(self, datetime_series): | ||||
|         datetime_series.index.name = "index_name" | ||||
|         result = datetime_series.round(2) | ||||
|         expected = Series( | ||||
|             np.round(datetime_series.values, 2), index=datetime_series.index, name="ts" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         assert result.name == datetime_series.name | ||||
|  | ||||
|     def test_round_numpy(self, any_float_dtype): | ||||
|         # See GH#12600 | ||||
|         ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype) | ||||
|         out = np.round(ser, decimals=0) | ||||
|         expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype) | ||||
|         tm.assert_series_equal(out, expected) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.round(ser, decimals=0, out=ser) | ||||
|  | ||||
|     def test_round_numpy_with_nan(self, any_float_dtype): | ||||
|         # See GH#14197 | ||||
|         ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype) | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = ser.round() | ||||
|         expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_round_builtin(self, any_float_dtype): | ||||
|         ser = Series( | ||||
|             [1.123, 2.123, 3.123], | ||||
|             index=range(3), | ||||
|             dtype=any_float_dtype, | ||||
|         ) | ||||
|         result = round(ser) | ||||
|         expected_rounded0 = Series( | ||||
|             [1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected_rounded0) | ||||
|  | ||||
|         decimals = 2 | ||||
|         expected_rounded = Series( | ||||
|             [1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype | ||||
|         ) | ||||
|         result = round(ser, decimals) | ||||
|         tm.assert_series_equal(result, expected_rounded) | ||||
|  | ||||
|     @pytest.mark.parametrize("method", ["round", "floor", "ceil"]) | ||||
|     @pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"]) | ||||
|     def test_round_nat(self, method, freq, unit): | ||||
|         # GH14940, GH#56158 | ||||
|         ser = Series([pd.NaT], dtype=f"M8[{unit}]") | ||||
|         expected = Series(pd.NaT, dtype=f"M8[{unit}]") | ||||
|         round_method = getattr(ser.dt, method) | ||||
|         result = round_method(freq) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_round_ea_boolean(self): | ||||
|         # GH#55936 | ||||
|         ser = Series([True, False], dtype="boolean") | ||||
|         expected = ser.copy() | ||||
|         result = ser.round(2) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|         result.iloc[0] = False | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     def test_round_dtype_object(self): | ||||
|         # GH#61206 | ||||
|         ser = Series([0.2], dtype="object") | ||||
|         msg = "Expected numeric dtype, got object instead." | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.round() | ||||
| @ -0,0 +1,77 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.api.types import is_scalar | ||||
|  | ||||
|  | ||||
| class TestSeriesSearchSorted: | ||||
|     def test_searchsorted(self): | ||||
|         ser = Series([1, 2, 3]) | ||||
|  | ||||
|         result = ser.searchsorted(1, side="left") | ||||
|         assert is_scalar(result) | ||||
|         assert result == 0 | ||||
|  | ||||
|         result = ser.searchsorted(1, side="right") | ||||
|         assert is_scalar(result) | ||||
|         assert result == 1 | ||||
|  | ||||
|     def test_searchsorted_numeric_dtypes_scalar(self): | ||||
|         ser = Series([1, 2, 90, 1000, 3e9]) | ||||
|         res = ser.searchsorted(30) | ||||
|         assert is_scalar(res) | ||||
|         assert res == 2 | ||||
|  | ||||
|         res = ser.searchsorted([30]) | ||||
|         exp = np.array([2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted_numeric_dtypes_vector(self): | ||||
|         ser = Series([1, 2, 90, 1000, 3e9]) | ||||
|         res = ser.searchsorted([91, 2e6]) | ||||
|         exp = np.array([3, 4], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted_datetime64_scalar(self): | ||||
|         ser = Series(date_range("20120101", periods=10, freq="2D")) | ||||
|         val = Timestamp("20120102") | ||||
|         res = ser.searchsorted(val) | ||||
|         assert is_scalar(res) | ||||
|         assert res == 1 | ||||
|  | ||||
|     def test_searchsorted_datetime64_scalar_mixed_timezones(self): | ||||
|         # GH 30086 | ||||
|         ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC")) | ||||
|         val = Timestamp("20120102", tz="America/New_York") | ||||
|         res = ser.searchsorted(val) | ||||
|         assert is_scalar(res) | ||||
|         assert res == 1 | ||||
|  | ||||
|     def test_searchsorted_datetime64_list(self): | ||||
|         ser = Series(date_range("20120101", periods=10, freq="2D")) | ||||
|         vals = [Timestamp("20120102"), Timestamp("20120104")] | ||||
|         res = ser.searchsorted(vals) | ||||
|         exp = np.array([1, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted_sorter(self): | ||||
|         # GH8490 | ||||
|         ser = Series([3, 1, 2]) | ||||
|         res = ser.searchsorted([0, 3], sorter=np.argsort(ser)) | ||||
|         exp = np.array([0, 2], dtype=np.intp) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_searchsorted_dataframe_fail(self): | ||||
|         # GH#49620 | ||||
|         ser = Series([1, 2, 3, 4, 5]) | ||||
|         vals = pd.DataFrame([[1, 2], [3, 4]]) | ||||
|         msg = "Value must be 1-D array-like or scalar, DataFrame is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.searchsorted(vals) | ||||
| @ -0,0 +1,21 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| from pandas import Series | ||||
|  | ||||
|  | ||||
| class TestSetName: | ||||
|     def test_set_name(self): | ||||
|         ser = Series([1, 2, 3]) | ||||
|         ser2 = ser._set_name("foo") | ||||
|         assert ser2.name == "foo" | ||||
|         assert ser.name is None | ||||
|         assert ser is not ser2 | ||||
|  | ||||
|     def test_set_name_attribute(self): | ||||
|         ser = Series([1, 2, 3]) | ||||
|         ser2 = Series([1, 2, 3], name="bar") | ||||
|         for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: | ||||
|             ser.name = name | ||||
|             assert ser.name == name | ||||
|             ser2.name = name | ||||
|             assert ser2.name == name | ||||
| @ -0,0 +1,22 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import Series | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, index, expected", | ||||
|     [ | ||||
|         ([1, 2, 3], None, 3), | ||||
|         ({"a": 1, "b": 2, "c": 3}, None, 3), | ||||
|         ([1, 2, 3], ["x", "y", "z"], 3), | ||||
|         ([1, 2, 3, 4, 5], ["x", "y", "z", "w", "n"], 5), | ||||
|         ([1, 2, 3], None, 3), | ||||
|         ([1, 2, 3], ["x", "y", "z"], 3), | ||||
|         ([1, 2, 3, 4], ["x", "y", "z", "w"], 4), | ||||
|     ], | ||||
| ) | ||||
| def test_series(data, index, expected): | ||||
|     # GH#52897 | ||||
|     ser = Series(data, index=index) | ||||
|     assert ser.size == expected | ||||
|     assert isinstance(ser.size, int) | ||||
| @ -0,0 +1,337 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     IntervalIndex, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"]) | ||||
| def sort_kind(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| class TestSeriesSortIndex: | ||||
|     def test_sort_index_name(self, datetime_series): | ||||
|         result = datetime_series.sort_index(ascending=False) | ||||
|         assert result.name == datetime_series.name | ||||
|  | ||||
|     def test_sort_index(self, datetime_series): | ||||
|         datetime_series.index = datetime_series.index._with_freq(None) | ||||
|  | ||||
|         rindex = list(datetime_series.index) | ||||
|         np.random.default_rng(2).shuffle(rindex) | ||||
|  | ||||
|         random_order = datetime_series.reindex(rindex) | ||||
|         sorted_series = random_order.sort_index() | ||||
|         tm.assert_series_equal(sorted_series, datetime_series) | ||||
|  | ||||
|         # descending | ||||
|         sorted_series = random_order.sort_index(ascending=False) | ||||
|         tm.assert_series_equal( | ||||
|             sorted_series, datetime_series.reindex(datetime_series.index[::-1]) | ||||
|         ) | ||||
|  | ||||
|         # compat on level | ||||
|         sorted_series = random_order.sort_index(level=0) | ||||
|         tm.assert_series_equal(sorted_series, datetime_series) | ||||
|  | ||||
|         # compat on axis | ||||
|         sorted_series = random_order.sort_index(axis=0) | ||||
|         tm.assert_series_equal(sorted_series, datetime_series) | ||||
|  | ||||
|         msg = "No axis named 1 for object type Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             random_order.sort_values(axis=1) | ||||
|  | ||||
|         sorted_series = random_order.sort_index(level=0, axis=0) | ||||
|         tm.assert_series_equal(sorted_series, datetime_series) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             random_order.sort_index(level=0, axis=1) | ||||
|  | ||||
|     def test_sort_index_inplace(self, datetime_series): | ||||
|         datetime_series.index = datetime_series.index._with_freq(None) | ||||
|  | ||||
|         # For GH#11402 | ||||
|         rindex = list(datetime_series.index) | ||||
|         np.random.default_rng(2).shuffle(rindex) | ||||
|  | ||||
|         # descending | ||||
|         random_order = datetime_series.reindex(rindex) | ||||
|         result = random_order.sort_index(ascending=False, inplace=True) | ||||
|  | ||||
|         assert result is None | ||||
|         expected = datetime_series.reindex(datetime_series.index[::-1]) | ||||
|         expected.index = expected.index._with_freq(None) | ||||
|         tm.assert_series_equal(random_order, expected) | ||||
|  | ||||
|         # ascending | ||||
|         random_order = datetime_series.reindex(rindex) | ||||
|         result = random_order.sort_index(ascending=True, inplace=True) | ||||
|  | ||||
|         assert result is None | ||||
|         expected = datetime_series.copy() | ||||
|         expected.index = expected.index._with_freq(None) | ||||
|         tm.assert_series_equal(random_order, expected) | ||||
|  | ||||
|     def test_sort_index_level(self): | ||||
|         mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) | ||||
|         s = Series([1, 2], mi) | ||||
|         backwards = s.iloc[[1, 0]] | ||||
|  | ||||
|         res = s.sort_index(level="A") | ||||
|         tm.assert_series_equal(backwards, res) | ||||
|  | ||||
|         res = s.sort_index(level=["A", "B"]) | ||||
|         tm.assert_series_equal(backwards, res) | ||||
|  | ||||
|         res = s.sort_index(level="A", sort_remaining=False) | ||||
|         tm.assert_series_equal(s, res) | ||||
|  | ||||
|         res = s.sort_index(level=["A", "B"], sort_remaining=False) | ||||
|         tm.assert_series_equal(s, res) | ||||
|  | ||||
|     @pytest.mark.parametrize("level", ["A", 0])  # GH#21052 | ||||
|     def test_sort_index_multiindex(self, level): | ||||
|         mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) | ||||
|         s = Series([1, 2], mi) | ||||
|         backwards = s.iloc[[1, 0]] | ||||
|  | ||||
|         # implicit sort_remaining=True | ||||
|         res = s.sort_index(level=level) | ||||
|         tm.assert_series_equal(backwards, res) | ||||
|  | ||||
|         # GH#13496 | ||||
|         # sort has no effect without remaining lvls | ||||
|         res = s.sort_index(level=level, sort_remaining=False) | ||||
|         tm.assert_series_equal(s, res) | ||||
|  | ||||
|     def test_sort_index_kind(self, sort_kind): | ||||
|         # GH#14444 & GH#13589:  Add support for sort algo choosing | ||||
|         series = Series(index=[3, 2, 1, 4, 3], dtype=object) | ||||
|         expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(kind=sort_kind) | ||||
|         tm.assert_series_equal(expected_series, index_sorted_series) | ||||
|  | ||||
|     def test_sort_index_na_position(self): | ||||
|         series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) | ||||
|         expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(na_position="first") | ||||
|         tm.assert_series_equal(expected_series_first, index_sorted_series) | ||||
|  | ||||
|         expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(na_position="last") | ||||
|         tm.assert_series_equal(expected_series_last, index_sorted_series) | ||||
|  | ||||
|     def test_sort_index_intervals(self): | ||||
|         s = Series( | ||||
|             [np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4]) | ||||
|         ) | ||||
|  | ||||
|         result = s.sort_index() | ||||
|         expected = s | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.sort_index(ascending=False) | ||||
|         expected = Series( | ||||
|             [3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1]) | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("inplace", [True, False]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "original_list, sorted_list, ascending, ignore_index, output_index", | ||||
|         [ | ||||
|             ([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]), | ||||
|             ([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]), | ||||
|             ([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]), | ||||
|             ([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_index_ignore_index( | ||||
|         self, inplace, original_list, sorted_list, ascending, ignore_index, output_index | ||||
|     ): | ||||
|         # GH 30114 | ||||
|         ser = Series(original_list) | ||||
|         expected = Series(sorted_list, index=output_index) | ||||
|         kwargs = { | ||||
|             "ascending": ascending, | ||||
|             "ignore_index": ignore_index, | ||||
|             "inplace": inplace, | ||||
|         } | ||||
|  | ||||
|         if inplace: | ||||
|             result_ser = ser.copy() | ||||
|             result_ser.sort_index(**kwargs) | ||||
|         else: | ||||
|             result_ser = ser.sort_index(**kwargs) | ||||
|  | ||||
|         tm.assert_series_equal(result_ser, expected) | ||||
|         tm.assert_series_equal(ser, Series(original_list)) | ||||
|  | ||||
|     def test_sort_index_ascending_list(self): | ||||
|         # GH#16934 | ||||
|  | ||||
|         # Set up a Series with a three level MultiIndex | ||||
|         arrays = [ | ||||
|             ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], | ||||
|             ["one", "two", "one", "two", "one", "two", "one", "two"], | ||||
|             [4, 3, 2, 1, 4, 3, 2, 1], | ||||
|         ] | ||||
|         tuples = zip(*arrays) | ||||
|         mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"]) | ||||
|         ser = Series(range(8), index=mi) | ||||
|  | ||||
|         # Sort with boolean ascending | ||||
|         result = ser.sort_index(level=["third", "first"], ascending=False) | ||||
|         expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # Sort with list of boolean ascending | ||||
|         result = ser.sort_index(level=["third", "first"], ascending=[False, True]) | ||||
|         expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ascending", | ||||
|         [ | ||||
|             None, | ||||
|             (True, None), | ||||
|             (False, "True"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_index_ascending_bad_value_raises(self, ascending): | ||||
|         ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9]) | ||||
|         match = 'For argument "ascending" expected type bool' | ||||
|         with pytest.raises(ValueError, match=match): | ||||
|             ser.sort_index(ascending=ascending) | ||||
|  | ||||
|  | ||||
| class TestSeriesSortIndexKey: | ||||
|     def test_sort_index_multiindex_key(self): | ||||
|         mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) | ||||
|         s = Series([1, 2], mi) | ||||
|         backwards = s.iloc[[1, 0]] | ||||
|  | ||||
|         result = s.sort_index(level="C", key=lambda x: -x) | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|         result = s.sort_index(level="C", key=lambda x: x)  # nothing happens | ||||
|         tm.assert_series_equal(backwards, result) | ||||
|  | ||||
|     def test_sort_index_multiindex_key_multi_level(self): | ||||
|         mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) | ||||
|         s = Series([1, 2], mi) | ||||
|         backwards = s.iloc[[1, 0]] | ||||
|  | ||||
|         result = s.sort_index(level=["A", "C"], key=lambda x: -x) | ||||
|         tm.assert_series_equal(s, result) | ||||
|  | ||||
|         result = s.sort_index(level=["A", "C"], key=lambda x: x)  # nothing happens | ||||
|         tm.assert_series_equal(backwards, result) | ||||
|  | ||||
|     def test_sort_index_key(self): | ||||
|         series = Series(np.arange(6, dtype="int64"), index=list("aaBBca")) | ||||
|  | ||||
|         result = series.sort_index() | ||||
|         expected = series.iloc[[2, 3, 0, 1, 5, 4]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_index(key=lambda x: x.str.lower()) | ||||
|         expected = series.iloc[[0, 1, 5, 2, 3, 4]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_index(key=lambda x: x.str.lower(), ascending=False) | ||||
|         expected = series.iloc[[4, 2, 3, 0, 1, 5]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_sort_index_key_int(self): | ||||
|         series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64")) | ||||
|  | ||||
|         result = series.sort_index() | ||||
|         tm.assert_series_equal(result, series) | ||||
|  | ||||
|         result = series.sort_index(key=lambda x: -x) | ||||
|         expected = series.sort_index(ascending=False) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_index(key=lambda x: 2 * x) | ||||
|         tm.assert_series_equal(result, series) | ||||
|  | ||||
|     def test_sort_index_kind_key(self, sort_kind, sort_by_key): | ||||
|         # GH #14444 & #13589:  Add support for sort algo choosing | ||||
|         series = Series(index=[3, 2, 1, 4, 3], dtype=object) | ||||
|         expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key) | ||||
|         tm.assert_series_equal(expected_series, index_sorted_series) | ||||
|  | ||||
|     def test_sort_index_kind_neg_key(self, sort_kind): | ||||
|         # GH #14444 & #13589:  Add support for sort algo choosing | ||||
|         series = Series(index=[3, 2, 1, 4, 3], dtype=object) | ||||
|         expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x) | ||||
|         tm.assert_series_equal(expected_series, index_sorted_series) | ||||
|  | ||||
|     def test_sort_index_na_position_key(self, sort_by_key): | ||||
|         series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) | ||||
|         expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(na_position="first", key=sort_by_key) | ||||
|         tm.assert_series_equal(expected_series_first, index_sorted_series) | ||||
|  | ||||
|         expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) | ||||
|  | ||||
|         index_sorted_series = series.sort_index(na_position="last", key=sort_by_key) | ||||
|         tm.assert_series_equal(expected_series_last, index_sorted_series) | ||||
|  | ||||
|     def test_changes_length_raises(self): | ||||
|         s = Series([1, 2, 3]) | ||||
|         with pytest.raises(ValueError, match="change the shape"): | ||||
|             s.sort_index(key=lambda x: x[:1]) | ||||
|  | ||||
|     def test_sort_values_key_type(self): | ||||
|         s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"])) | ||||
|  | ||||
|         result = s.sort_index(key=lambda x: x.month) | ||||
|         expected = s.iloc[[0, 1, 2]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.sort_index(key=lambda x: x.day) | ||||
|         expected = s.iloc[[2, 1, 0]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.sort_index(key=lambda x: x.year) | ||||
|         expected = s.iloc[[2, 0, 1]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = s.sort_index(key=lambda x: x.month_name()) | ||||
|         expected = s.iloc[[2, 1, 0]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ascending", | ||||
|         [ | ||||
|             [True, False], | ||||
|             [False, True], | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_index_multi_already_monotonic(self, ascending): | ||||
|         # GH 56049 | ||||
|         mi = MultiIndex.from_product([[1, 2], [3, 4]]) | ||||
|         ser = Series(range(len(mi)), index=mi) | ||||
|         result = ser.sort_index(ascending=ascending) | ||||
|         if ascending == [True, False]: | ||||
|             expected = ser.take([1, 0, 3, 2]) | ||||
|         elif ascending == [False, True]: | ||||
|             expected = ser.take([2, 3, 0, 1]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,246 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesSortValues: | ||||
|     def test_sort_values(self, datetime_series, using_copy_on_write): | ||||
|         # check indexes are reordered corresponding with the values | ||||
|         ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"]) | ||||
|         expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"]) | ||||
|         result = ser.sort_values() | ||||
|         tm.assert_series_equal(expected, result) | ||||
|  | ||||
|         ts = datetime_series.copy() | ||||
|         ts[:5] = np.nan | ||||
|         vals = ts.values | ||||
|  | ||||
|         result = ts.sort_values() | ||||
|         assert np.isnan(result[-5:]).all() | ||||
|         tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) | ||||
|  | ||||
|         # na_position | ||||
|         result = ts.sort_values(na_position="first") | ||||
|         assert np.isnan(result[:5]).all() | ||||
|         tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) | ||||
|  | ||||
|         # something object-type | ||||
|         ser = Series(["A", "B"], [1, 2]) | ||||
|         # no failure | ||||
|         ser.sort_values() | ||||
|  | ||||
|         # ascending=False | ||||
|         ordered = ts.sort_values(ascending=False) | ||||
|         expected = np.sort(ts.dropna().values)[::-1] | ||||
|         tm.assert_almost_equal(expected, ordered.dropna().values) | ||||
|         ordered = ts.sort_values(ascending=False, na_position="first") | ||||
|         tm.assert_almost_equal(expected, ordered.dropna().values) | ||||
|  | ||||
|         # ascending=[False] should behave the same as ascending=False | ||||
|         ordered = ts.sort_values(ascending=[False]) | ||||
|         expected = ts.sort_values(ascending=False) | ||||
|         tm.assert_series_equal(expected, ordered) | ||||
|         ordered = ts.sort_values(ascending=[False], na_position="first") | ||||
|         expected = ts.sort_values(ascending=False, na_position="first") | ||||
|         tm.assert_series_equal(expected, ordered) | ||||
|  | ||||
|         msg = 'For argument "ascending" expected type bool, received type NoneType.' | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.sort_values(ascending=None) | ||||
|         msg = r"Length of ascending \(0\) must be 1 for Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.sort_values(ascending=[]) | ||||
|         msg = r"Length of ascending \(3\) must be 1 for Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.sort_values(ascending=[1, 2, 3]) | ||||
|         msg = r"Length of ascending \(2\) must be 1 for Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.sort_values(ascending=[False, False]) | ||||
|         msg = 'For argument "ascending" expected type bool, received type str.' | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ts.sort_values(ascending="foobar") | ||||
|  | ||||
|         # inplace=True | ||||
|         ts = datetime_series.copy() | ||||
|         return_value = ts.sort_values(ascending=False, inplace=True) | ||||
|         assert return_value is None | ||||
|         tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False)) | ||||
|         tm.assert_index_equal( | ||||
|             ts.index, datetime_series.sort_values(ascending=False).index | ||||
|         ) | ||||
|  | ||||
|         # GH#5856/5853 | ||||
|         # Series.sort_values operating on a view | ||||
|         df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) | ||||
|         s = df.iloc[:, 0] | ||||
|  | ||||
|         msg = ( | ||||
|             "This Series is a view of some other array, to sort in-place " | ||||
|             "you must create a copy" | ||||
|         ) | ||||
|         if using_copy_on_write: | ||||
|             s.sort_values(inplace=True) | ||||
|             tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) | ||||
|         else: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 s.sort_values(inplace=True) | ||||
|  | ||||
|     def test_sort_values_categorical(self): | ||||
|         c = Categorical(["a", "b", "b", "a"], ordered=False) | ||||
|         cat = Series(c.copy()) | ||||
|  | ||||
|         # sort in the categories order | ||||
|         expected = Series( | ||||
|             Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2] | ||||
|         ) | ||||
|         result = cat.sort_values() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|  | ||||
|         cat = Series( | ||||
|             Categorical( | ||||
|                 ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True | ||||
|             ) | ||||
|         ) | ||||
|         res = cat.sort_values() | ||||
|         exp = np.array(["a", "b", "c", "d"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|  | ||||
|         res = cat.sort_values(ascending=False) | ||||
|         exp = np.array(["d", "c", "b", "a"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(res.__array__(), exp) | ||||
|  | ||||
|         raw_cat1 = Categorical( | ||||
|             ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False | ||||
|         ) | ||||
|         raw_cat2 = Categorical( | ||||
|             ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True | ||||
|         ) | ||||
|         s = ["a", "b", "c", "d"] | ||||
|         df = DataFrame( | ||||
|             {"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]} | ||||
|         ) | ||||
|  | ||||
|         # Cats must be sorted in a dataframe | ||||
|         res = df.sort_values(by=["string"], ascending=False) | ||||
|         exp = np.array(["d", "c", "b", "a"], dtype=np.object_) | ||||
|         tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp) | ||||
|         assert res["sort"].dtype == "category" | ||||
|  | ||||
|         res = df.sort_values(by=["sort"], ascending=False) | ||||
|         exp = df.sort_values(by=["string"], ascending=True) | ||||
|         tm.assert_series_equal(res["values"], exp["values"]) | ||||
|         assert res["sort"].dtype == "category" | ||||
|         assert res["unsort"].dtype == "category" | ||||
|  | ||||
|         # unordered cat, but we allow this | ||||
|         df.sort_values(by=["unsort"], ascending=False) | ||||
|  | ||||
|         # multi-columns sort | ||||
|         # GH#7848 | ||||
|         df = DataFrame( | ||||
|             {"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]} | ||||
|         ) | ||||
|         df["grade"] = Categorical(df["raw_grade"], ordered=True) | ||||
|         df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"]) | ||||
|  | ||||
|         # sorts 'grade' according to the order of the categories | ||||
|         result = df.sort_values(by=["grade"]) | ||||
|         expected = df.iloc[[1, 2, 5, 0, 3, 4]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # multi | ||||
|         result = df.sort_values(by=["grade", "id"]) | ||||
|         expected = df.iloc[[2, 1, 5, 4, 3, 0]] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("inplace", [True, False]) | ||||
|     @pytest.mark.parametrize( | ||||
|         "original_list, sorted_list, ignore_index, output_index", | ||||
|         [ | ||||
|             ([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]), | ||||
|             ([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_sort_values_ignore_index( | ||||
|         self, inplace, original_list, sorted_list, ignore_index, output_index | ||||
|     ): | ||||
|         # GH 30114 | ||||
|         ser = Series(original_list) | ||||
|         expected = Series(sorted_list, index=output_index) | ||||
|         kwargs = {"ignore_index": ignore_index, "inplace": inplace} | ||||
|  | ||||
|         if inplace: | ||||
|             result_ser = ser.copy() | ||||
|             result_ser.sort_values(ascending=False, **kwargs) | ||||
|         else: | ||||
|             result_ser = ser.sort_values(ascending=False, **kwargs) | ||||
|  | ||||
|         tm.assert_series_equal(result_ser, expected) | ||||
|         tm.assert_series_equal(ser, Series(original_list)) | ||||
|  | ||||
|     def test_mergesort_descending_stability(self): | ||||
|         # GH 28697 | ||||
|         s = Series([1, 2, 1, 3], ["first", "b", "second", "c"]) | ||||
|         result = s.sort_values(ascending=False, kind="mergesort") | ||||
|         expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_sort_values_validate_ascending_for_value_error(self): | ||||
|         # GH41634 | ||||
|         ser = Series([23, 7, 21]) | ||||
|  | ||||
|         msg = 'For argument "ascending" expected type bool, received type str.' | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser.sort_values(ascending="False") | ||||
|  | ||||
|     @pytest.mark.parametrize("ascending", [False, 0, 1, True]) | ||||
|     def test_sort_values_validate_ascending_functional(self, ascending): | ||||
|         # GH41634 | ||||
|         ser = Series([23, 7, 21]) | ||||
|         expected = np.sort(ser.values) | ||||
|  | ||||
|         sorted_ser = ser.sort_values(ascending=ascending) | ||||
|         if not ascending: | ||||
|             expected = expected[::-1] | ||||
|  | ||||
|         result = sorted_ser.values | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestSeriesSortingKey: | ||||
|     def test_sort_values_key(self): | ||||
|         series = Series(np.array(["Hello", "goodbye"])) | ||||
|  | ||||
|         result = series.sort_values(axis=0) | ||||
|         expected = series | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_values(axis=0, key=lambda x: x.str.lower()) | ||||
|         expected = series[::-1] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_sort_values_key_nan(self): | ||||
|         series = Series(np.array([0, 5, np.nan, 3, 2, np.nan])) | ||||
|  | ||||
|         result = series.sort_values(axis=0) | ||||
|         expected = series.iloc[[0, 4, 3, 1, 2, 5]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_values(axis=0, key=lambda x: x + 5) | ||||
|         expected = series.iloc[[0, 4, 3, 1, 2, 5]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = series.sort_values(axis=0, key=lambda x: -x, ascending=False) | ||||
|         expected = series.iloc[[0, 4, 3, 1, 2, 5]] | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,179 @@ | ||||
| from datetime import datetime | ||||
| from io import StringIO | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
| from pandas.io.common import get_handle | ||||
|  | ||||
|  | ||||
| class TestSeriesToCSV: | ||||
|     def read_csv(self, path, **kwargs): | ||||
|         params = {"index_col": 0, "header": None} | ||||
|         params.update(**kwargs) | ||||
|  | ||||
|         header = params.get("header") | ||||
|         out = pd.read_csv(path, **params).squeeze("columns") | ||||
|  | ||||
|         if header is None: | ||||
|             out.name = out.index.name = None | ||||
|  | ||||
|         return out | ||||
|  | ||||
|     def test_from_csv(self, datetime_series, string_series): | ||||
|         # freq doesn't round-trip | ||||
|         datetime_series.index = datetime_series.index._with_freq(None) | ||||
|  | ||||
|         with tm.ensure_clean() as path: | ||||
|             datetime_series.to_csv(path, header=False) | ||||
|             ts = self.read_csv(path, parse_dates=True) | ||||
|             tm.assert_series_equal(datetime_series, ts, check_names=False) | ||||
|  | ||||
|             assert ts.name is None | ||||
|             assert ts.index.name is None | ||||
|  | ||||
|             # see gh-10483 | ||||
|             datetime_series.to_csv(path, header=True) | ||||
|             ts_h = self.read_csv(path, header=0) | ||||
|             assert ts_h.name == "ts" | ||||
|  | ||||
|             string_series.to_csv(path, header=False) | ||||
|             series = self.read_csv(path) | ||||
|             tm.assert_series_equal(string_series, series, check_names=False) | ||||
|  | ||||
|             assert series.name is None | ||||
|             assert series.index.name is None | ||||
|  | ||||
|             string_series.to_csv(path, header=True) | ||||
|             series_h = self.read_csv(path, header=0) | ||||
|             assert series_h.name == "series" | ||||
|  | ||||
|             with open(path, "w", encoding="utf-8") as outfile: | ||||
|                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0") | ||||
|  | ||||
|             series = self.read_csv(path, sep="|", parse_dates=True) | ||||
|             check_series = Series( | ||||
|                 {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0} | ||||
|             ) | ||||
|             tm.assert_series_equal(check_series, series) | ||||
|  | ||||
|             series = self.read_csv(path, sep="|", parse_dates=False) | ||||
|             check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) | ||||
|             tm.assert_series_equal(check_series, series) | ||||
|  | ||||
|     def test_to_csv(self, datetime_series): | ||||
|         with tm.ensure_clean() as path: | ||||
|             datetime_series.to_csv(path, header=False) | ||||
|  | ||||
|             with open(path, newline=None, encoding="utf-8") as f: | ||||
|                 lines = f.readlines() | ||||
|             assert lines[1] != "\n" | ||||
|  | ||||
|             datetime_series.to_csv(path, index=False, header=False) | ||||
|             arr = np.loadtxt(path) | ||||
|             tm.assert_almost_equal(arr, datetime_series.values) | ||||
|  | ||||
|     def test_to_csv_unicode_index(self): | ||||
|         buf = StringIO() | ||||
|         s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"]) | ||||
|  | ||||
|         s.to_csv(buf, encoding="UTF-8", header=False) | ||||
|         buf.seek(0) | ||||
|  | ||||
|         s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") | ||||
|         tm.assert_series_equal(s, s2) | ||||
|  | ||||
|     def test_to_csv_float_format(self): | ||||
|         with tm.ensure_clean() as filename: | ||||
|             ser = Series([0.123456, 0.234567, 0.567567]) | ||||
|             ser.to_csv(filename, float_format="%.2f", header=False) | ||||
|  | ||||
|             rs = self.read_csv(filename) | ||||
|             xp = Series([0.12, 0.23, 0.57]) | ||||
|             tm.assert_series_equal(rs, xp) | ||||
|  | ||||
|     def test_to_csv_list_entries(self): | ||||
|         s = Series(["jack and jill", "jesse and frank"]) | ||||
|  | ||||
|         split = s.str.split(r"\s+and\s+") | ||||
|  | ||||
|         buf = StringIO() | ||||
|         split.to_csv(buf, header=False) | ||||
|  | ||||
|     def test_to_csv_path_is_none(self): | ||||
|         # GH 8215 | ||||
|         # Series.to_csv() was returning None, inconsistent with | ||||
|         # DataFrame.to_csv() which returned string | ||||
|         s = Series([1, 2, 3]) | ||||
|         csv_str = s.to_csv(path_or_buf=None, header=False) | ||||
|         assert isinstance(csv_str, str) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "s,encoding", | ||||
|         [ | ||||
|             ( | ||||
|                 Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"), | ||||
|                 None, | ||||
|             ), | ||||
|             # GH 21241, 21118 | ||||
|             (Series(["abc", "def", "ghi"], name="X"), "ascii"), | ||||
|             (Series(["123", "你好", "世界"], name="中文"), "gb2312"), | ||||
|             ( | ||||
|                 Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"),  # noqa: RUF001 | ||||
|                 "cp737", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_csv_compression(self, s, encoding, compression): | ||||
|         with tm.ensure_clean() as filename: | ||||
|             s.to_csv(filename, compression=compression, encoding=encoding, header=True) | ||||
|             # test the round trip - to_csv -> read_csv | ||||
|             result = pd.read_csv( | ||||
|                 filename, | ||||
|                 compression=compression, | ||||
|                 encoding=encoding, | ||||
|                 index_col=0, | ||||
|             ).squeeze("columns") | ||||
|             tm.assert_series_equal(s, result) | ||||
|  | ||||
|             # test the round trip using file handle - to_csv -> read_csv | ||||
|             with get_handle( | ||||
|                 filename, "w", compression=compression, encoding=encoding | ||||
|             ) as handles: | ||||
|                 s.to_csv(handles.handle, encoding=encoding, header=True) | ||||
|  | ||||
|             result = pd.read_csv( | ||||
|                 filename, | ||||
|                 compression=compression, | ||||
|                 encoding=encoding, | ||||
|                 index_col=0, | ||||
|             ).squeeze("columns") | ||||
|             tm.assert_series_equal(s, result) | ||||
|  | ||||
|             # explicitly ensure file was compressed | ||||
|             with tm.decompress_file(filename, compression) as fh: | ||||
|                 text = fh.read().decode(encoding or "utf8") | ||||
|                 assert s.name in text | ||||
|  | ||||
|             with tm.decompress_file(filename, compression) as fh: | ||||
|                 tm.assert_series_equal( | ||||
|                     s, | ||||
|                     pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"), | ||||
|                 ) | ||||
|  | ||||
|     def test_to_csv_interval_index(self, using_infer_string): | ||||
|         # GH 28210 | ||||
|         s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3)) | ||||
|  | ||||
|         with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: | ||||
|             s.to_csv(path, header=False) | ||||
|             result = self.read_csv(path, index_col=0) | ||||
|  | ||||
|             # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) | ||||
|             expected = s | ||||
|             expected.index = expected.index.astype("str") | ||||
|             tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,38 @@ | ||||
| import collections | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesToDict: | ||||
|     @pytest.mark.parametrize( | ||||
|         "mapping", (dict, collections.defaultdict(list), collections.OrderedDict) | ||||
|     ) | ||||
|     def test_to_dict(self, mapping, datetime_series): | ||||
|         # GH#16122 | ||||
|         result = Series(datetime_series.to_dict(into=mapping), name="ts") | ||||
|         expected = datetime_series.copy() | ||||
|         expected.index = expected.index._with_freq(None) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         from_method = Series(datetime_series.to_dict(into=collections.Counter)) | ||||
|         from_constructor = Series(collections.Counter(datetime_series.items())) | ||||
|         tm.assert_series_equal(from_method, from_constructor) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "input", | ||||
|         ( | ||||
|             {"a": np.int64(64), "b": 10}, | ||||
|             {"a": np.int64(64), "b": 10, "c": "ABC"}, | ||||
|             {"a": np.uint64(64), "b": 10, "c": "ABC"}, | ||||
|         ), | ||||
|     ) | ||||
|     def test_to_dict_return_types(self, input): | ||||
|         # GH25969 | ||||
|  | ||||
|         d = Series(input).to_dict() | ||||
|         assert isinstance(d["a"], int) | ||||
|         assert isinstance(d["b"], int) | ||||
| @ -0,0 +1,63 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestToFrame: | ||||
|     def test_to_frame_respects_name_none(self): | ||||
|         # GH#44212 if we explicitly pass name=None, then that should be respected, | ||||
|         #  not changed to 0 | ||||
|         # GH-45448 this is first deprecated & enforced in 2.0 | ||||
|         ser = Series(range(3)) | ||||
|         result = ser.to_frame(None) | ||||
|  | ||||
|         exp_index = Index([None], dtype=object) | ||||
|         tm.assert_index_equal(result.columns, exp_index) | ||||
|  | ||||
|         result = ser.rename("foo").to_frame(None) | ||||
|         exp_index = Index([None], dtype=object) | ||||
|         tm.assert_index_equal(result.columns, exp_index) | ||||
|  | ||||
|     def test_to_frame(self, datetime_series): | ||||
|         datetime_series.name = None | ||||
|         rs = datetime_series.to_frame() | ||||
|         xp = DataFrame(datetime_series.values, index=datetime_series.index) | ||||
|         tm.assert_frame_equal(rs, xp) | ||||
|  | ||||
|         datetime_series.name = "testname" | ||||
|         rs = datetime_series.to_frame() | ||||
|         xp = DataFrame( | ||||
|             {"testname": datetime_series.values}, index=datetime_series.index | ||||
|         ) | ||||
|         tm.assert_frame_equal(rs, xp) | ||||
|  | ||||
|         rs = datetime_series.to_frame(name="testdifferent") | ||||
|         xp = DataFrame( | ||||
|             {"testdifferent": datetime_series.values}, index=datetime_series.index | ||||
|         ) | ||||
|         tm.assert_frame_equal(rs, xp) | ||||
|  | ||||
|     @pytest.mark.filterwarnings( | ||||
|         "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" | ||||
|     ) | ||||
|     def test_to_frame_expanddim(self): | ||||
|         # GH#9762 | ||||
|  | ||||
|         class SubclassedSeries(Series): | ||||
|             @property | ||||
|             def _constructor_expanddim(self): | ||||
|                 return SubclassedFrame | ||||
|  | ||||
|         class SubclassedFrame(DataFrame): | ||||
|             pass | ||||
|  | ||||
|         ser = SubclassedSeries([1, 2, 3], name="X") | ||||
|         result = ser.to_frame() | ||||
|         assert isinstance(result, SubclassedFrame) | ||||
|         expected = SubclassedFrame({"X": [1, 2, 3]}) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,49 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     NA, | ||||
|     Series, | ||||
|     Timedelta, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dtype", ["int64", "float64"]) | ||||
| def test_to_numpy_na_value(dtype): | ||||
|     # GH#48951 | ||||
|     ser = Series([1, 2, NA, 4]) | ||||
|     result = ser.to_numpy(dtype=dtype, na_value=0) | ||||
|     expected = np.array([1, 2, 0, 4], dtype=dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numpy_cast_before_setting_na(): | ||||
|     # GH#50600 | ||||
|     ser = Series([1]) | ||||
|     result = ser.to_numpy(dtype=np.float64, na_value=np.nan) | ||||
|     expected = np.array([1.0]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @td.skip_if_no("pyarrow") | ||||
| def test_to_numpy_arrow_dtype_given(): | ||||
|     # GH#57121 | ||||
|     ser = Series([1, NA], dtype="int64[pyarrow]") | ||||
|     result = ser.to_numpy(dtype="float64") | ||||
|     expected = np.array([1.0, np.nan]) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_astype_ea_int_to_td_ts(): | ||||
|     # GH#57093 | ||||
|     ser = Series([1, None], dtype="Int64") | ||||
|     result = ser.astype("m8[ns]") | ||||
|     expected = Series([1, Timedelta("nat")], dtype="m8[ns]") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = ser.astype("M8[ns]") | ||||
|     expected = Series([1, Timedelta("nat")], dtype="M8[ns]") | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,36 @@ | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     Interval, | ||||
|     Period, | ||||
|     Series, | ||||
|     Timedelta, | ||||
|     Timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, dtype, expected_dtype", | ||||
|     ( | ||||
|         ([1], "int64", int), | ||||
|         ([1], "Int64", int), | ||||
|         ([1.0], "float64", float), | ||||
|         ([1.0], "Float64", float), | ||||
|         (["abc"], "object", str), | ||||
|         (["abc"], "string", str), | ||||
|         ([Interval(1, 3)], "interval", Interval), | ||||
|         ([Period("2000-01-01", "D")], "period[D]", Period), | ||||
|         ([Timedelta(days=1)], "timedelta64[ns]", Timedelta), | ||||
|         ([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp), | ||||
|         pytest.param([1], "int64[pyarrow]", int, marks=td.skip_if_no("pyarrow")), | ||||
|         pytest.param([1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow")), | ||||
|         pytest.param(["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow")), | ||||
|     ), | ||||
| ) | ||||
| def test_tolist_scalar_dtype(values, dtype, expected_dtype): | ||||
|     # GH49890 | ||||
|     ser = Series(values, dtype=dtype) | ||||
|     result_dtype = type(ser.tolist()[0]) | ||||
|     assert result_dtype == expected_dtype | ||||
| @ -0,0 +1,67 @@ | ||||
| from datetime import datetime | ||||
|  | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestTruncate: | ||||
|     def test_truncate_datetimeindex_tz(self): | ||||
|         # GH 9243 | ||||
|         idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") | ||||
|         s = Series(range(len(idx)), index=idx) | ||||
|         with pytest.raises(TypeError, match="Cannot compare tz-naive"): | ||||
|             # GH#36148 as of 2.0 we require tzawareness compat | ||||
|             s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) | ||||
|  | ||||
|         lb = idx[1] | ||||
|         ub = idx[3] | ||||
|         result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime()) | ||||
|         expected = Series([1, 2, 3], index=idx[1:4]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_truncate_periodindex(self): | ||||
|         # GH 17717 | ||||
|         idx1 = pd.PeriodIndex( | ||||
|             [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] | ||||
|         ) | ||||
|         series1 = Series([1, 2, 3], index=idx1) | ||||
|         result1 = series1.truncate(after="2017-09-02") | ||||
|  | ||||
|         expected_idx1 = pd.PeriodIndex( | ||||
|             [pd.Period("2017-09-02"), pd.Period("2017-09-02")] | ||||
|         ) | ||||
|         tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1)) | ||||
|  | ||||
|         idx2 = pd.PeriodIndex( | ||||
|             [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] | ||||
|         ) | ||||
|         series2 = Series([1, 2, 3], index=idx2) | ||||
|         result2 = series2.sort_index().truncate(after="2017-09-02") | ||||
|  | ||||
|         expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) | ||||
|         tm.assert_series_equal(result2, Series([2], index=expected_idx2)) | ||||
|  | ||||
|     def test_truncate_one_element_series(self): | ||||
|         # GH 35544 | ||||
|         series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"])) | ||||
|         before = pd.Timestamp("2020-08-02") | ||||
|         after = pd.Timestamp("2020-08-04") | ||||
|  | ||||
|         result = series.truncate(before=before, after=after) | ||||
|  | ||||
|         # the input Series and the expected Series are the same | ||||
|         tm.assert_series_equal(result, series) | ||||
|  | ||||
|     def test_truncate_index_only_one_unique_value(self): | ||||
|         # GH 42365 | ||||
|         obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5) | ||||
|  | ||||
|         truncated = obj.truncate("2021-06-28", "2021-07-01") | ||||
|  | ||||
|         tm.assert_series_equal(truncated, obj) | ||||
| @ -0,0 +1,123 @@ | ||||
| from datetime import timezone | ||||
|  | ||||
| import pytest | ||||
| import pytz | ||||
|  | ||||
| from pandas._libs.tslibs import timezones | ||||
|  | ||||
| from pandas import ( | ||||
|     DatetimeIndex, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestTZLocalize: | ||||
|     def test_series_tz_localize_ambiguous_bool(self): | ||||
|         # make sure that we are correctly accepting bool values as ambiguous | ||||
|  | ||||
|         # GH#14402 | ||||
|         ts = Timestamp("2015-11-01 01:00:03") | ||||
|         expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") | ||||
|         expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") | ||||
|  | ||||
|         ser = Series([ts]) | ||||
|         expected0 = Series([expected0]) | ||||
|         expected1 = Series([expected1]) | ||||
|  | ||||
|         with tm.external_error_raised(pytz.AmbiguousTimeError): | ||||
|             ser.dt.tz_localize("US/Central") | ||||
|  | ||||
|         result = ser.dt.tz_localize("US/Central", ambiguous=True) | ||||
|         tm.assert_series_equal(result, expected0) | ||||
|  | ||||
|         result = ser.dt.tz_localize("US/Central", ambiguous=[True]) | ||||
|         tm.assert_series_equal(result, expected0) | ||||
|  | ||||
|         result = ser.dt.tz_localize("US/Central", ambiguous=False) | ||||
|         tm.assert_series_equal(result, expected1) | ||||
|  | ||||
|         result = ser.dt.tz_localize("US/Central", ambiguous=[False]) | ||||
|         tm.assert_series_equal(result, expected1) | ||||
|  | ||||
|     def test_series_tz_localize_matching_index(self): | ||||
|         # Matching the index of the result with that of the original series | ||||
|         # GH 43080 | ||||
|         dt_series = Series( | ||||
|             date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"), | ||||
|             index=[2, 6, 7, 8, 11], | ||||
|             dtype="category", | ||||
|         ) | ||||
|         result = dt_series.dt.tz_localize("Europe/Berlin") | ||||
|         expected = Series( | ||||
|             date_range( | ||||
|                 start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin" | ||||
|             ), | ||||
|             index=[2, 6, 7, 8, 11], | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "method, exp", | ||||
|         [ | ||||
|             ["shift_forward", "2015-03-29 03:00:00"], | ||||
|             ["shift_backward", "2015-03-29 01:59:59.999999999"], | ||||
|             ["NaT", NaT], | ||||
|             ["raise", None], | ||||
|             ["foo", "invalid"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_tz_localize_nonexistent(self, warsaw, method, exp, unit): | ||||
|         # GH 8917 | ||||
|         tz = warsaw | ||||
|         n = 60 | ||||
|         dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min", unit=unit) | ||||
|         ser = Series(1, index=dti) | ||||
|         df = ser.to_frame() | ||||
|  | ||||
|         if method == "raise": | ||||
|             with tm.external_error_raised(pytz.NonExistentTimeError): | ||||
|                 dti.tz_localize(tz, nonexistent=method) | ||||
|             with tm.external_error_raised(pytz.NonExistentTimeError): | ||||
|                 ser.tz_localize(tz, nonexistent=method) | ||||
|             with tm.external_error_raised(pytz.NonExistentTimeError): | ||||
|                 df.tz_localize(tz, nonexistent=method) | ||||
|  | ||||
|         elif exp == "invalid": | ||||
|             msg = ( | ||||
|                 "The nonexistent argument must be one of " | ||||
|                 "'raise', 'NaT', 'shift_forward', 'shift_backward' " | ||||
|                 "or a timedelta object" | ||||
|             ) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 dti.tz_localize(tz, nonexistent=method) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 ser.tz_localize(tz, nonexistent=method) | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 df.tz_localize(tz, nonexistent=method) | ||||
|  | ||||
|         else: | ||||
|             result = ser.tz_localize(tz, nonexistent=method) | ||||
|             expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz).as_unit(unit)) | ||||
|             tm.assert_series_equal(result, expected) | ||||
|  | ||||
|             result = df.tz_localize(tz, nonexistent=method) | ||||
|             expected = expected.to_frame() | ||||
|             tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|             res_index = dti.tz_localize(tz, nonexistent=method) | ||||
|             tm.assert_index_equal(res_index, expected.index) | ||||
|  | ||||
|     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) | ||||
|     def test_series_tz_localize_empty(self, tzstr): | ||||
|         # GH#2248 | ||||
|         ser = Series(dtype=object) | ||||
|  | ||||
|         ser2 = ser.tz_localize("utc") | ||||
|         assert ser2.index.tz == timezone.utc | ||||
|  | ||||
|         ser2 = ser.tz_localize(tzstr) | ||||
|         timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) | ||||
| @ -0,0 +1,76 @@ | ||||
| import numpy as np | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestUnique: | ||||
|     def test_unique_uint64(self): | ||||
|         ser = Series([1, 2, 2**63, 2**63], dtype=np.uint64) | ||||
|         res = ser.unique() | ||||
|         exp = np.array([1, 2, 2**63], dtype=np.uint64) | ||||
|         tm.assert_numpy_array_equal(res, exp) | ||||
|  | ||||
|     def test_unique_data_ownership(self): | ||||
|         # it works! GH#1807 | ||||
|         Series(Series(["a", "c", "b"]).unique()).sort_values() | ||||
|  | ||||
|     def test_unique(self): | ||||
|         # GH#714 also, dtype=float | ||||
|         ser = Series([1.2345] * 100) | ||||
|         ser[::2] = np.nan | ||||
|         result = ser.unique() | ||||
|         assert len(result) == 2 | ||||
|  | ||||
|         # explicit f4 dtype | ||||
|         ser = Series([1.2345] * 100, dtype="f4") | ||||
|         ser[::2] = np.nan | ||||
|         result = ser.unique() | ||||
|         assert len(result) == 2 | ||||
|  | ||||
|     def test_unique_nan_object_dtype(self): | ||||
|         # NAs in object arrays GH#714 | ||||
|         ser = Series(["foo"] * 100, dtype="O") | ||||
|         ser[::2] = np.nan | ||||
|         result = ser.unique() | ||||
|         assert len(result) == 2 | ||||
|  | ||||
|     def test_unique_none(self): | ||||
|         # decision about None | ||||
|         ser = Series([1, 2, 3, None, None, None], dtype=object) | ||||
|         result = ser.unique() | ||||
|         expected = np.array([1, 2, 3, None], dtype=object) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_unique_categorical(self): | ||||
|         # GH#18051 | ||||
|         cat = Categorical([]) | ||||
|         ser = Series(cat) | ||||
|         result = ser.unique() | ||||
|         tm.assert_categorical_equal(result, cat) | ||||
|  | ||||
|         cat = Categorical([np.nan]) | ||||
|         ser = Series(cat) | ||||
|         result = ser.unique() | ||||
|         tm.assert_categorical_equal(result, cat) | ||||
|  | ||||
|     def test_tz_unique(self): | ||||
|         # GH 46128 | ||||
|         dti1 = date_range("2016-01-01", periods=3) | ||||
|         ii1 = IntervalIndex.from_breaks(dti1) | ||||
|         ser1 = Series(ii1) | ||||
|         uni1 = ser1.unique() | ||||
|         tm.assert_interval_array_equal(ser1.array, uni1) | ||||
|  | ||||
|         dti2 = date_range("2016-01-01", periods=3, tz="US/Eastern") | ||||
|         ii2 = IntervalIndex.from_breaks(dti2) | ||||
|         ser2 = Series(ii2) | ||||
|         uni2 = ser2.unique() | ||||
|         tm.assert_interval_array_equal(ser2.array, uni2) | ||||
|  | ||||
|         assert uni1.dtype != uni2.dtype | ||||
| @ -0,0 +1,169 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_unstack_preserves_object(): | ||||
|     mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]]) | ||||
|  | ||||
|     ser = Series(np.arange(4.0), index=mi, dtype=object) | ||||
|  | ||||
|     res1 = ser.unstack() | ||||
|     assert (res1.dtypes == object).all() | ||||
|  | ||||
|     res2 = ser.unstack(level=0) | ||||
|     assert (res2.dtypes == object).all() | ||||
|  | ||||
|  | ||||
| def test_unstack(): | ||||
|     index = MultiIndex( | ||||
|         levels=[["bar", "foo"], ["one", "three", "two"]], | ||||
|         codes=[[1, 1, 0, 0], [0, 1, 0, 2]], | ||||
|     ) | ||||
|  | ||||
|     s = Series(np.arange(4.0), index=index) | ||||
|     unstacked = s.unstack() | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], | ||||
|         index=["bar", "foo"], | ||||
|         columns=["one", "three", "two"], | ||||
|     ) | ||||
|  | ||||
|     tm.assert_frame_equal(unstacked, expected) | ||||
|  | ||||
|     unstacked = s.unstack(level=0) | ||||
|     tm.assert_frame_equal(unstacked, expected.T) | ||||
|  | ||||
|     index = MultiIndex( | ||||
|         levels=[["bar"], ["one", "two", "three"], [0, 1]], | ||||
|         codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], | ||||
|     ) | ||||
|     s = Series(np.random.default_rng(2).standard_normal(6), index=index) | ||||
|     exp_index = MultiIndex( | ||||
|         levels=[["one", "two", "three"], [0, 1]], | ||||
|         codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], | ||||
|     ) | ||||
|     expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) | ||||
|     unstacked = s.unstack(0).sort_index() | ||||
|     tm.assert_frame_equal(unstacked, expected) | ||||
|  | ||||
|     # GH5873 | ||||
|     idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) | ||||
|     ts = Series([1, 2], index=idx) | ||||
|     left = ts.unstack() | ||||
|     right = DataFrame( | ||||
|         [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] | ||||
|     ) | ||||
|     tm.assert_frame_equal(left, right) | ||||
|  | ||||
|     idx = MultiIndex.from_arrays( | ||||
|         [ | ||||
|             ["cat", "cat", "cat", "dog", "dog"], | ||||
|             ["a", "a", "b", "a", "b"], | ||||
|             [1, 2, 1, 1, np.nan], | ||||
|         ] | ||||
|     ) | ||||
|     ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) | ||||
|     right = DataFrame( | ||||
|         [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], | ||||
|         columns=["cat", "dog"], | ||||
|     ) | ||||
|     tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] | ||||
|     right.index = MultiIndex.from_tuples(tpls) | ||||
|     tm.assert_frame_equal(ts.unstack(level=0), right) | ||||
|  | ||||
|  | ||||
| def test_unstack_tuplename_in_multiindex(): | ||||
|     # GH 19966 | ||||
|     idx = MultiIndex.from_product( | ||||
|         [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] | ||||
|     ) | ||||
|     ser = Series(1, index=idx) | ||||
|     result = ser.unstack(("A", "a")) | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         [[1, 1, 1], [1, 1, 1], [1, 1, 1]], | ||||
|         columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]), | ||||
|         index=Index([1, 2, 3], name=("B", "b")), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "unstack_idx, expected_values, expected_index, expected_columns", | ||||
|     [ | ||||
|         ( | ||||
|             ("A", "a"), | ||||
|             [[1, 1], [1, 1], [1, 1], [1, 1]], | ||||
|             MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]), | ||||
|             MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), | ||||
|         ), | ||||
|         ( | ||||
|             (("A", "a"), "B"), | ||||
|             [[1, 1, 1, 1], [1, 1, 1, 1]], | ||||
|             Index([3, 4], name="C"), | ||||
|             MultiIndex.from_tuples( | ||||
|                 [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_unstack_mixed_type_name_in_multiindex( | ||||
|     unstack_idx, expected_values, expected_index, expected_columns | ||||
| ): | ||||
|     # GH 19966 | ||||
|     idx = MultiIndex.from_product( | ||||
|         [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] | ||||
|     ) | ||||
|     ser = Series(1, index=idx) | ||||
|     result = ser.unstack(unstack_idx) | ||||
|  | ||||
|     expected = DataFrame( | ||||
|         expected_values, columns=expected_columns, index=expected_index | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_unstack_multi_index_categorical_values(): | ||||
|     df = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 4)), | ||||
|         columns=Index(list("ABCD")), | ||||
|         index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|     ) | ||||
|     mi = df.stack(future_stack=True).index.rename(["major", "minor"]) | ||||
|     ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category") | ||||
|  | ||||
|     result = ser.unstack() | ||||
|  | ||||
|     dti = ser.index.levels[0] | ||||
|     c = pd.Categorical(["foo"] * len(dti)) | ||||
|     expected = DataFrame( | ||||
|         {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, | ||||
|         columns=Index(list("ABCD"), name="minor"), | ||||
|         index=dti.rename("major"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_unstack_mixed_level_names(): | ||||
|     # GH#48763 | ||||
|     arrays = [["a", "a"], [1, 2], ["red", "blue"]] | ||||
|     idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y")) | ||||
|     ser = Series([1, 2], index=idx) | ||||
|     result = ser.unstack("x") | ||||
|     expected = DataFrame( | ||||
|         [[1], [2]], | ||||
|         columns=Index(["a"], name="x"), | ||||
|         index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,139 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| from pandas import ( | ||||
|     CategoricalDtype, | ||||
|     DataFrame, | ||||
|     NaT, | ||||
|     Series, | ||||
|     Timestamp, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestUpdate: | ||||
|     def test_update(self, using_copy_on_write): | ||||
|         s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) | ||||
|         s2 = Series([np.nan, 3.5, np.nan, 5.0]) | ||||
|         s.update(s2) | ||||
|  | ||||
|         expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) | ||||
|         tm.assert_series_equal(s, expected) | ||||
|  | ||||
|         # GH 3217 | ||||
|         df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) | ||||
|         df["c"] = np.nan | ||||
|         # Cast to object to avoid upcast when setting "foo" | ||||
|         df["c"] = df["c"].astype(object) | ||||
|         df_orig = df.copy() | ||||
|  | ||||
|         if using_copy_on_write: | ||||
|             with tm.raises_chained_assignment_error(): | ||||
|                 df["c"].update(Series(["foo"], index=[0])) | ||||
|             expected = df_orig | ||||
|         else: | ||||
|             with tm.assert_produces_warning(FutureWarning, match="inplace method"): | ||||
|                 df["c"].update(Series(["foo"], index=[0])) | ||||
|             expected = DataFrame( | ||||
|                 [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] | ||||
|             ) | ||||
|             expected["c"] = expected["c"].astype(object) | ||||
|         tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other, dtype, expected, warn", | ||||
|         [ | ||||
|             # other is int | ||||
|             ([61, 63], "int32", Series([10, 61, 12], dtype="int32"), None), | ||||
|             ([61, 63], "int64", Series([10, 61, 12]), None), | ||||
|             ([61, 63], float, Series([10.0, 61.0, 12.0]), None), | ||||
|             ([61, 63], object, Series([10, 61, 12], dtype=object), None), | ||||
|             # other is float, but can be cast to int | ||||
|             ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32"), None), | ||||
|             ([61.0, 63.0], "int64", Series([10, 61, 12]), None), | ||||
|             ([61.0, 63.0], float, Series([10.0, 61.0, 12.0]), None), | ||||
|             ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object), None), | ||||
|             # others is float, cannot be cast to int | ||||
|             ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0]), FutureWarning), | ||||
|             ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0]), FutureWarning), | ||||
|             ([61.1, 63.1], float, Series([10.0, 61.1, 12.0]), None), | ||||
|             ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object), None), | ||||
|             # other is object, cannot be cast | ||||
|             ([(61,), (63,)], "int32", Series([10, (61,), 12]), FutureWarning), | ||||
|             ([(61,), (63,)], "int64", Series([10, (61,), 12]), FutureWarning), | ||||
|             ([(61,), (63,)], float, Series([10.0, (61,), 12.0]), FutureWarning), | ||||
|             ([(61,), (63,)], object, Series([10, (61,), 12]), None), | ||||
|         ], | ||||
|     ) | ||||
|     def test_update_dtypes(self, other, dtype, expected, warn): | ||||
|         ser = Series([10, 11, 12], dtype=dtype) | ||||
|         other = Series(other, index=[1, 3]) | ||||
|         with tm.assert_produces_warning(warn, match="item of incompatible dtype"): | ||||
|             ser.update(other) | ||||
|  | ||||
|         tm.assert_series_equal(ser, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "series, other, expected", | ||||
|         [ | ||||
|             # update by key | ||||
|             ( | ||||
|                 Series({"a": 1, "b": 2, "c": 3, "d": 4}), | ||||
|                 {"b": 5, "c": np.nan}, | ||||
|                 Series({"a": 1, "b": 5, "c": 3, "d": 4}), | ||||
|             ), | ||||
|             # update by position | ||||
|             (Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])), | ||||
|         ], | ||||
|     ) | ||||
|     def test_update_from_non_series(self, series, other, expected): | ||||
|         # GH 33215 | ||||
|         series.update(other) | ||||
|         tm.assert_series_equal(series, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data, other, expected, dtype", | ||||
|         [ | ||||
|             (["a", None], [None, "b"], ["a", "b"], "string[python]"), | ||||
|             pytest.param( | ||||
|                 ["a", None], | ||||
|                 [None, "b"], | ||||
|                 ["a", "b"], | ||||
|                 "string[pyarrow]", | ||||
|                 marks=td.skip_if_no("pyarrow"), | ||||
|             ), | ||||
|             ([1, None], [None, 2], [1, 2], "Int64"), | ||||
|             ([True, None], [None, False], [True, False], "boolean"), | ||||
|             ( | ||||
|                 ["a", None], | ||||
|                 [None, "b"], | ||||
|                 ["a", "b"], | ||||
|                 CategoricalDtype(categories=["a", "b"]), | ||||
|             ), | ||||
|             ( | ||||
|                 [Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT], | ||||
|                 [NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")], | ||||
|                 [Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2, | ||||
|                 "datetime64[ns, Europe/London]", | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_update_extension_array_series(self, data, other, expected, dtype): | ||||
|         result = Series(data, dtype=dtype) | ||||
|         other = Series(other, dtype=dtype) | ||||
|         expected = Series(expected, dtype=dtype) | ||||
|  | ||||
|         result.update(other) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_update_with_categorical_type(self): | ||||
|         # GH 25744 | ||||
|         dtype = CategoricalDtype(["a", "b", "c", "d"]) | ||||
|         s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype) | ||||
|         s2 = Series(["b", "a"], index=[1, 2], dtype=dtype) | ||||
|         s1.update(s2) | ||||
|         result = s1 | ||||
|         expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,271 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     CategoricalIndex, | ||||
|     Index, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeriesValueCounts: | ||||
|     def test_value_counts_datetime(self, unit): | ||||
|         # most dtypes are tested in tests/base | ||||
|         values = [ | ||||
|             pd.Timestamp("2011-01-01 09:00"), | ||||
|             pd.Timestamp("2011-01-01 10:00"), | ||||
|             pd.Timestamp("2011-01-01 11:00"), | ||||
|             pd.Timestamp("2011-01-01 09:00"), | ||||
|             pd.Timestamp("2011-01-01 09:00"), | ||||
|             pd.Timestamp("2011-01-01 11:00"), | ||||
|         ] | ||||
|  | ||||
|         exp_idx = pd.DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], | ||||
|             name="xxx", | ||||
|         ).as_unit(unit) | ||||
|         exp = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         ser = Series(values, name="xxx").dt.as_unit(unit) | ||||
|         tm.assert_series_equal(ser.value_counts(), exp) | ||||
|         # check DatetimeIndex outputs the same result | ||||
|         idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit) | ||||
|         tm.assert_series_equal(idx.value_counts(), exp) | ||||
|  | ||||
|         # normalize | ||||
|         exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion") | ||||
|         tm.assert_series_equal(ser.value_counts(normalize=True), exp) | ||||
|         tm.assert_series_equal(idx.value_counts(normalize=True), exp) | ||||
|  | ||||
|     def test_value_counts_datetime_tz(self, unit): | ||||
|         values = [ | ||||
|             pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), | ||||
|             pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"), | ||||
|             pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), | ||||
|             pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), | ||||
|             pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), | ||||
|             pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), | ||||
|         ] | ||||
|  | ||||
|         exp_idx = pd.DatetimeIndex( | ||||
|             ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], | ||||
|             tz="US/Eastern", | ||||
|             name="xxx", | ||||
|         ).as_unit(unit) | ||||
|         exp = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         ser = Series(values, name="xxx").dt.as_unit(unit) | ||||
|         tm.assert_series_equal(ser.value_counts(), exp) | ||||
|         idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit) | ||||
|         tm.assert_series_equal(idx.value_counts(), exp) | ||||
|  | ||||
|         exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion") | ||||
|         tm.assert_series_equal(ser.value_counts(normalize=True), exp) | ||||
|         tm.assert_series_equal(idx.value_counts(normalize=True), exp) | ||||
|  | ||||
|     def test_value_counts_period(self): | ||||
|         values = [ | ||||
|             pd.Period("2011-01", freq="M"), | ||||
|             pd.Period("2011-02", freq="M"), | ||||
|             pd.Period("2011-03", freq="M"), | ||||
|             pd.Period("2011-01", freq="M"), | ||||
|             pd.Period("2011-01", freq="M"), | ||||
|             pd.Period("2011-03", freq="M"), | ||||
|         ] | ||||
|  | ||||
|         exp_idx = pd.PeriodIndex( | ||||
|             ["2011-01", "2011-03", "2011-02"], freq="M", name="xxx" | ||||
|         ) | ||||
|         exp = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         ser = Series(values, name="xxx") | ||||
|         tm.assert_series_equal(ser.value_counts(), exp) | ||||
|         # check DatetimeIndex outputs the same result | ||||
|         idx = pd.PeriodIndex(values, name="xxx") | ||||
|         tm.assert_series_equal(idx.value_counts(), exp) | ||||
|  | ||||
|         # normalize | ||||
|         exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion") | ||||
|         tm.assert_series_equal(ser.value_counts(normalize=True), exp) | ||||
|         tm.assert_series_equal(idx.value_counts(normalize=True), exp) | ||||
|  | ||||
|     def test_value_counts_categorical_ordered(self): | ||||
|         # most dtypes are tested in tests/base | ||||
|         values = Categorical([1, 2, 3, 1, 1, 3], ordered=True) | ||||
|  | ||||
|         exp_idx = CategoricalIndex( | ||||
|             [1, 3, 2], categories=[1, 2, 3], ordered=True, name="xxx" | ||||
|         ) | ||||
|         exp = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         ser = Series(values, name="xxx") | ||||
|         tm.assert_series_equal(ser.value_counts(), exp) | ||||
|         # check CategoricalIndex outputs the same result | ||||
|         idx = CategoricalIndex(values, name="xxx") | ||||
|         tm.assert_series_equal(idx.value_counts(), exp) | ||||
|  | ||||
|         # normalize | ||||
|         exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion") | ||||
|         tm.assert_series_equal(ser.value_counts(normalize=True), exp) | ||||
|         tm.assert_series_equal(idx.value_counts(normalize=True), exp) | ||||
|  | ||||
|     def test_value_counts_categorical_not_ordered(self): | ||||
|         values = Categorical([1, 2, 3, 1, 1, 3], ordered=False) | ||||
|  | ||||
|         exp_idx = CategoricalIndex( | ||||
|             [1, 3, 2], categories=[1, 2, 3], ordered=False, name="xxx" | ||||
|         ) | ||||
|         exp = Series([3, 2, 1], index=exp_idx, name="count") | ||||
|  | ||||
|         ser = Series(values, name="xxx") | ||||
|         tm.assert_series_equal(ser.value_counts(), exp) | ||||
|         # check CategoricalIndex outputs the same result | ||||
|         idx = CategoricalIndex(values, name="xxx") | ||||
|         tm.assert_series_equal(idx.value_counts(), exp) | ||||
|  | ||||
|         # normalize | ||||
|         exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion") | ||||
|         tm.assert_series_equal(ser.value_counts(normalize=True), exp) | ||||
|         tm.assert_series_equal(idx.value_counts(normalize=True), exp) | ||||
|  | ||||
|     def test_value_counts_categorical(self): | ||||
|         # GH#12835 | ||||
|         cats = Categorical(list("abcccb"), categories=list("cabd")) | ||||
|         ser = Series(cats, name="xxx") | ||||
|         res = ser.value_counts(sort=False) | ||||
|  | ||||
|         exp_index = CategoricalIndex( | ||||
|             list("cabd"), categories=cats.categories, name="xxx" | ||||
|         ) | ||||
|         exp = Series([3, 1, 2, 0], name="count", index=exp_index) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         res = ser.value_counts(sort=True) | ||||
|  | ||||
|         exp_index = CategoricalIndex( | ||||
|             list("cbad"), categories=cats.categories, name="xxx" | ||||
|         ) | ||||
|         exp = Series([3, 2, 1, 0], name="count", index=exp_index) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         # check object dtype handles the Series.name as the same | ||||
|         # (tested in tests/base) | ||||
|         ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx") | ||||
|         res = ser.value_counts() | ||||
|         exp = Series([3, 2, 1], name="count", index=Index(["c", "b", "a"], name="xxx")) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     def test_value_counts_categorical_with_nan(self): | ||||
|         # see GH#9443 | ||||
|  | ||||
|         # sanity check | ||||
|         ser = Series(["a", "b", "a"], dtype="category") | ||||
|         exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count") | ||||
|  | ||||
|         res = ser.value_counts(dropna=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         res = ser.value_counts(dropna=True) | ||||
|         tm.assert_series_equal(res, exp) | ||||
|  | ||||
|         # same Series via two different constructions --> same behaviour | ||||
|         series = [ | ||||
|             Series(["a", "b", None, "a", None, None], dtype="category"), | ||||
|             Series( | ||||
|                 Categorical(["a", "b", None, "a", None, None], categories=["a", "b"]) | ||||
|             ), | ||||
|         ] | ||||
|  | ||||
|         for ser in series: | ||||
|             # None is a NaN value, so we exclude its count here | ||||
|             exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count") | ||||
|             res = ser.value_counts(dropna=True) | ||||
|             tm.assert_series_equal(res, exp) | ||||
|  | ||||
|             # we don't exclude the count of None and sort by counts | ||||
|             exp = Series( | ||||
|                 [3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]), name="count" | ||||
|             ) | ||||
|             res = ser.value_counts(dropna=False) | ||||
|             tm.assert_series_equal(res, exp) | ||||
|  | ||||
|             # When we aren't sorting by counts, and np.nan isn't a | ||||
|             # category, it should be last. | ||||
|             exp = Series( | ||||
|                 [2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]), name="count" | ||||
|             ) | ||||
|             res = ser.value_counts(dropna=False, sort=False) | ||||
|             tm.assert_series_equal(res, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser, dropna, exp", | ||||
|         [ | ||||
|             ( | ||||
|                 Series([False, True, True, pd.NA]), | ||||
|                 False, | ||||
|                 Series([2, 1, 1], index=[True, False, pd.NA], name="count"), | ||||
|             ), | ||||
|             ( | ||||
|                 Series([False, True, True, pd.NA]), | ||||
|                 True, | ||||
|                 Series([2, 1], index=Index([True, False], dtype=object), name="count"), | ||||
|             ), | ||||
|             ( | ||||
|                 Series(range(3), index=[True, False, np.nan]).index, | ||||
|                 False, | ||||
|                 Series([1, 1, 1], index=[True, False, np.nan], name="count"), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_value_counts_bool_with_nan(self, ser, dropna, exp): | ||||
|         # GH32146 | ||||
|         out = ser.value_counts(dropna=dropna) | ||||
|         tm.assert_series_equal(out, exp) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "input_array,expected", | ||||
|         [ | ||||
|             ( | ||||
|                 [1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], | ||||
|                 Series( | ||||
|                     [3, 2, 1], | ||||
|                     index=Index([3j, 1 + 1j, 1], dtype=np.complex128), | ||||
|                     name="count", | ||||
|                 ), | ||||
|             ), | ||||
|             ( | ||||
|                 np.array([1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], dtype=np.complex64), | ||||
|                 Series( | ||||
|                     [3, 2, 1], | ||||
|                     index=Index([3j, 1 + 1j, 1], dtype=np.complex64), | ||||
|                     name="count", | ||||
|                 ), | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_value_counts_complex_numbers(self, input_array, expected): | ||||
|         # GH 17927 | ||||
|         result = Series(input_array).value_counts() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_value_counts_masked(self): | ||||
|         # GH#54984 | ||||
|         dtype = "Int64" | ||||
|         ser = Series([1, 2, None, 2, None, 3], dtype=dtype) | ||||
|         result = ser.value_counts(dropna=False) | ||||
|         expected = Series( | ||||
|             [2, 2, 1, 1], | ||||
|             index=Index([2, None, 1, 3], dtype=dtype), | ||||
|             dtype=dtype, | ||||
|             name="count", | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = ser.value_counts(dropna=True) | ||||
|         expected = Series( | ||||
|             [2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count" | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,29 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     IntervalIndex, | ||||
|     Series, | ||||
|     period_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestValues: | ||||
|     @pytest.mark.parametrize( | ||||
|         "data", | ||||
|         [ | ||||
|             period_range("2000", periods=4), | ||||
|             IntervalIndex.from_breaks([1, 2, 3, 4]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_values_object_extension_dtypes(self, data): | ||||
|         # https://github.com/pandas-dev/pandas/issues/23995 | ||||
|         result = Series(data).values | ||||
|         expected = np.array(data.astype(object)) | ||||
|         tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|     def test_values(self, datetime_series): | ||||
|         tm.assert_almost_equal( | ||||
|             datetime_series.values, list(datetime_series), check_dtype=False | ||||
|         ) | ||||
| @ -0,0 +1,61 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Index, | ||||
|     Series, | ||||
|     array, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| pytestmark = pytest.mark.filterwarnings( | ||||
|     "ignore:Series.view is deprecated and will be removed in a future version.:FutureWarning"  # noqa: E501 | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestView: | ||||
|     def test_view_i8_to_datetimelike(self): | ||||
|         dti = date_range("2000", periods=4, tz="US/Central") | ||||
|         ser = Series(dti.asi8) | ||||
|  | ||||
|         result = ser.view(dti.dtype) | ||||
|         tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None)) | ||||
|  | ||||
|         pi = dti.tz_localize(None).to_period("D") | ||||
|         ser = Series(pi.asi8) | ||||
|         result = ser.view(pi.dtype) | ||||
|         tm.assert_period_array_equal(result._values, pi._data) | ||||
|  | ||||
|     def test_view_tz(self): | ||||
|         # GH#24024 | ||||
|         ser = Series(date_range("2000", periods=4, tz="US/Central")) | ||||
|         result = ser.view("i8") | ||||
|         expected = Series( | ||||
|             [ | ||||
|                 946706400000000000, | ||||
|                 946792800000000000, | ||||
|                 946879200000000000, | ||||
|                 946965600000000000, | ||||
|             ] | ||||
|         ) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] | ||||
|     ) | ||||
|     @pytest.mark.parametrize( | ||||
|         "second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] | ||||
|     ) | ||||
|     @pytest.mark.parametrize("box", [Series, Index, array]) | ||||
|     def test_view_between_datetimelike(self, first, second, box): | ||||
|         dti = date_range("2016-01-01", periods=3) | ||||
|  | ||||
|         orig = box(dti) | ||||
|         obj = orig.view(first) | ||||
|         assert obj.dtype == first | ||||
|         tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) | ||||
|  | ||||
|         res = obj.view(second) | ||||
|         assert res.dtype == second | ||||
|         tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) | ||||
		Reference in New Issue
	
	Block a user