done
This commit is contained in:
		| @ -0,0 +1,413 @@ | ||||
| """Tests dealing with the NDFrame.allows_duplicates.""" | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| not_implemented = pytest.mark.xfail(reason="Not implemented.") | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Preservation | ||||
|  | ||||
|  | ||||
| class TestPreserves: | ||||
|     @pytest.mark.parametrize( | ||||
|         "cls, data", | ||||
|         [ | ||||
|             (pd.Series, np.array([])), | ||||
|             (pd.Series, [1, 2]), | ||||
|             (pd.DataFrame, {}), | ||||
|             (pd.DataFrame, {"A": [1, 2]}), | ||||
|         ], | ||||
|     ) | ||||
|     def test_construction_ok(self, cls, data): | ||||
|         result = cls(data) | ||||
|         assert result.flags.allows_duplicate_labels is True | ||||
|  | ||||
|         result = cls(data).set_flags(allows_duplicate_labels=False) | ||||
|         assert result.flags.allows_duplicate_labels is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "func", | ||||
|         [ | ||||
|             operator.itemgetter(["a"]), | ||||
|             operator.methodcaller("add", 1), | ||||
|             operator.methodcaller("rename", str.upper), | ||||
|             operator.methodcaller("rename", "name"), | ||||
|             operator.methodcaller("abs"), | ||||
|             np.abs, | ||||
|         ], | ||||
|     ) | ||||
|     def test_preserved_series(self, func): | ||||
|         s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | ||||
|         assert func(s).flags.allows_duplicate_labels is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])] | ||||
|     ) | ||||
|     # TODO: frame | ||||
|     @not_implemented | ||||
|     def test_align(self, other): | ||||
|         s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | ||||
|         a, b = s.align(other) | ||||
|         assert a.flags.allows_duplicate_labels is False | ||||
|         assert b.flags.allows_duplicate_labels is False | ||||
|  | ||||
|     def test_preserved_frame(self): | ||||
|         df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( | ||||
|             allows_duplicate_labels=False | ||||
|         ) | ||||
|         assert df.loc[["a"]].flags.allows_duplicate_labels is False | ||||
|         assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False | ||||
|  | ||||
|     def test_to_frame(self): | ||||
|         ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) | ||||
|         assert ser.to_frame().flags.allows_duplicate_labels is False | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["add", "sub"]) | ||||
|     @pytest.mark.parametrize("frame", [False, True]) | ||||
|     @pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")]) | ||||
|     def test_binops(self, func, other, frame): | ||||
|         df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( | ||||
|             allows_duplicate_labels=False | ||||
|         ) | ||||
|         if frame: | ||||
|             df = df.to_frame() | ||||
|         if isinstance(other, pd.Series) and frame: | ||||
|             other = other.to_frame() | ||||
|         func = operator.methodcaller(func, other) | ||||
|         assert df.flags.allows_duplicate_labels is False | ||||
|         assert func(df).flags.allows_duplicate_labels is False | ||||
|  | ||||
|     def test_preserve_getitem(self): | ||||
|         df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) | ||||
|         assert df[["A"]].flags.allows_duplicate_labels is False | ||||
|         assert df["A"].flags.allows_duplicate_labels is False | ||||
|         assert df.loc[0].flags.allows_duplicate_labels is False | ||||
|         assert df.loc[[0]].flags.allows_duplicate_labels is False | ||||
|         assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False | ||||
|  | ||||
|     def test_ndframe_getitem_caching_issue( | ||||
|         self, request, using_copy_on_write, warn_copy_on_write | ||||
|     ): | ||||
|         if not (using_copy_on_write or warn_copy_on_write): | ||||
|             request.applymarker(pytest.mark.xfail(reason="Unclear behavior.")) | ||||
|         # NDFrame.__getitem__ will cache the first df['A']. May need to | ||||
|         # invalidate that cache? Update the cached entries? | ||||
|         df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) | ||||
|         assert df["A"].flags.allows_duplicate_labels is False | ||||
|         df.flags.allows_duplicate_labels = True | ||||
|         assert df["A"].flags.allows_duplicate_labels is True | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "objs, kwargs", | ||||
|         [ | ||||
|             # Series | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.Series(1, index=["a", "b"]), | ||||
|                     pd.Series(2, index=["c", "d"]), | ||||
|                 ], | ||||
|                 {}, | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.Series(1, index=["a", "b"]), | ||||
|                     pd.Series(2, index=["a", "b"]), | ||||
|                 ], | ||||
|                 {"ignore_index": True}, | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.Series(1, index=["a", "b"]), | ||||
|                     pd.Series(2, index=["a", "b"]), | ||||
|                 ], | ||||
|                 {"axis": 1}, | ||||
|             ), | ||||
|             # Frame | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["c", "d"]), | ||||
|                 ], | ||||
|                 {}, | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), | ||||
|                 ], | ||||
|                 {"ignore_index": True}, | ||||
|             ), | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), | ||||
|                     pd.DataFrame({"B": [1, 2]}, index=["a", "b"]), | ||||
|                 ], | ||||
|                 {"axis": 1}, | ||||
|             ), | ||||
|             # Series / Frame | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), | ||||
|                     pd.Series([1, 2], index=["a", "b"], name="B"), | ||||
|                 ], | ||||
|                 {"axis": 1}, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat(self, objs, kwargs): | ||||
|         objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] | ||||
|         result = pd.concat(objs, **kwargs) | ||||
|         assert result.flags.allows_duplicate_labels is False | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "left, right, expected", | ||||
|         [ | ||||
|             # false false false | ||||
|             pytest.param( | ||||
|                 pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( | ||||
|                     allows_duplicate_labels=False | ||||
|                 ), | ||||
|                 pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags( | ||||
|                     allows_duplicate_labels=False | ||||
|                 ), | ||||
|                 False, | ||||
|                 marks=not_implemented, | ||||
|             ), | ||||
|             # false true false | ||||
|             pytest.param( | ||||
|                 pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( | ||||
|                     allows_duplicate_labels=False | ||||
|                 ), | ||||
|                 pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), | ||||
|                 False, | ||||
|                 marks=not_implemented, | ||||
|             ), | ||||
|             # true true true | ||||
|             ( | ||||
|                 pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), | ||||
|                 pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), | ||||
|                 True, | ||||
|             ), | ||||
|         ], | ||||
|     ) | ||||
|     def test_merge(self, left, right, expected): | ||||
|         result = pd.merge(left, right, left_index=True, right_index=True) | ||||
|         assert result.flags.allows_duplicate_labels is expected | ||||
|  | ||||
|     @not_implemented | ||||
|     def test_groupby(self): | ||||
|         # XXX: This is under tested | ||||
|         # TODO: | ||||
|         #  - apply | ||||
|         #  - transform | ||||
|         #  - Should passing a grouper that disallows duplicates propagate? | ||||
|         df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) | ||||
|         result = df.groupby([0, 0, 1]).agg("count") | ||||
|         assert result.flags.allows_duplicate_labels is False | ||||
|  | ||||
|     @pytest.mark.parametrize("frame", [True, False]) | ||||
|     @not_implemented | ||||
|     def test_window(self, frame): | ||||
|         df = pd.Series( | ||||
|             1, | ||||
|             index=pd.date_range("2000", periods=12), | ||||
|             name="A", | ||||
|             allows_duplicate_labels=False, | ||||
|         ) | ||||
|         if frame: | ||||
|             df = df.to_frame() | ||||
|         assert df.rolling(3).mean().flags.allows_duplicate_labels is False | ||||
|         assert df.ewm(3).mean().flags.allows_duplicate_labels is False | ||||
|         assert df.expanding(3).mean().flags.allows_duplicate_labels is False | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Raises | ||||
|  | ||||
|  | ||||
| class TestRaises: | ||||
|     @pytest.mark.parametrize( | ||||
|         "cls, axes", | ||||
|         [ | ||||
|             (pd.Series, {"index": ["a", "a"], "dtype": float}), | ||||
|             (pd.DataFrame, {"index": ["a", "a"]}), | ||||
|             (pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}), | ||||
|             (pd.DataFrame, {"columns": ["b", "b"]}), | ||||
|         ], | ||||
|     ) | ||||
|     def test_set_flags_with_duplicates(self, cls, axes): | ||||
|         result = cls(**axes) | ||||
|         assert result.flags.allows_duplicate_labels is True | ||||
|  | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             cls(**axes).set_flags(allows_duplicate_labels=False) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "data", | ||||
|         [ | ||||
|             pd.Series(index=[0, 0], dtype=float), | ||||
|             pd.DataFrame(index=[0, 0]), | ||||
|             pd.DataFrame(columns=[0, 0]), | ||||
|         ], | ||||
|     ) | ||||
|     def test_setting_allows_duplicate_labels_raises(self, data): | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             data.flags.allows_duplicate_labels = False | ||||
|  | ||||
|         assert data.flags.allows_duplicate_labels is True | ||||
|  | ||||
|     def test_series_raises(self): | ||||
|         a = pd.Series(0, index=["a", "b"]) | ||||
|         b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             pd.concat([a, b]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "getter, target", | ||||
|         [ | ||||
|             (operator.itemgetter(["A", "A"]), None), | ||||
|             # loc | ||||
|             (operator.itemgetter(["a", "a"]), "loc"), | ||||
|             pytest.param(operator.itemgetter(("a", ["A", "A"])), "loc"), | ||||
|             (operator.itemgetter((["a", "a"], "A")), "loc"), | ||||
|             # iloc | ||||
|             (operator.itemgetter([0, 0]), "iloc"), | ||||
|             pytest.param(operator.itemgetter((0, [0, 0])), "iloc"), | ||||
|             pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"), | ||||
|         ], | ||||
|     ) | ||||
|     def test_getitem_raises(self, getter, target): | ||||
|         df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( | ||||
|             allows_duplicate_labels=False | ||||
|         ) | ||||
|         if target: | ||||
|             # df, df.loc, or df.iloc | ||||
|             target = getattr(df, target) | ||||
|         else: | ||||
|             target = df | ||||
|  | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             getter(target) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "objs, kwargs", | ||||
|         [ | ||||
|             ( | ||||
|                 [ | ||||
|                     pd.Series(1, index=[0, 1], name="a"), | ||||
|                     pd.Series(2, index=[0, 1], name="a"), | ||||
|                 ], | ||||
|                 {"axis": 1}, | ||||
|             ) | ||||
|         ], | ||||
|     ) | ||||
|     def test_concat_raises(self, objs, kwargs): | ||||
|         objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             pd.concat(objs, **kwargs) | ||||
|  | ||||
|     @not_implemented | ||||
|     def test_merge_raises(self): | ||||
|         a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( | ||||
|             allows_duplicate_labels=False | ||||
|         ) | ||||
|         b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"]) | ||||
|         msg = "Index has duplicates." | ||||
|         with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|             pd.merge(a, b, left_index=True, right_index=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "idx", | ||||
|     [ | ||||
|         pd.Index([1, 1]), | ||||
|         pd.Index(["a", "a"]), | ||||
|         pd.Index([1.1, 1.1]), | ||||
|         pd.PeriodIndex([pd.Period("2000", "D")] * 2), | ||||
|         pd.DatetimeIndex([pd.Timestamp("2000")] * 2), | ||||
|         pd.TimedeltaIndex([pd.Timedelta("1D")] * 2), | ||||
|         pd.CategoricalIndex(["a", "a"]), | ||||
|         pd.IntervalIndex([pd.Interval(0, 1)] * 2), | ||||
|         pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]), | ||||
|     ], | ||||
|     ids=lambda x: type(x).__name__, | ||||
| ) | ||||
| def test_raises_basic(idx): | ||||
|     msg = "Index has duplicates." | ||||
|     with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|         pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) | ||||
|  | ||||
|     with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|         pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False) | ||||
|  | ||||
|     with pytest.raises(pd.errors.DuplicateLabelError, match=msg): | ||||
|         pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) | ||||
|  | ||||
|  | ||||
| def test_format_duplicate_labels_message(): | ||||
|     idx = pd.Index(["a", "b", "a", "b", "c"]) | ||||
|     result = idx._format_duplicate_message() | ||||
|     expected = pd.DataFrame( | ||||
|         {"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label") | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_format_duplicate_labels_message_multi(): | ||||
|     idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]]) | ||||
|     result = idx._format_duplicate_message() | ||||
|     expected = pd.DataFrame( | ||||
|         {"positions": [[0, 2], [1, 3]]}, | ||||
|         index=pd.MultiIndex.from_product([["A"], ["a", "b"]]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_dataframe_insert_raises(): | ||||
|     df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) | ||||
|     msg = "Cannot specify" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.insert(0, "A", [3, 4], allow_duplicates=True) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method, frame_only", | ||||
|     [ | ||||
|         (operator.methodcaller("set_index", "A", inplace=True), True), | ||||
|         (operator.methodcaller("reset_index", inplace=True), True), | ||||
|         (operator.methodcaller("rename", lambda x: x, inplace=True), False), | ||||
|     ], | ||||
| ) | ||||
| def test_inplace_raises(method, frame_only): | ||||
|     df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( | ||||
|         allows_duplicate_labels=False | ||||
|     ) | ||||
|     s = df["A"] | ||||
|     s.flags.allows_duplicate_labels = False | ||||
|     msg = "Cannot specify" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         method(df) | ||||
|     if not frame_only: | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             method(s) | ||||
|  | ||||
|  | ||||
| def test_pickle(): | ||||
|     a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) | ||||
|     b = tm.round_trip_pickle(a) | ||||
|     tm.assert_series_equal(a, b) | ||||
|  | ||||
|     a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) | ||||
|     b = tm.round_trip_pickle(a) | ||||
|     tm.assert_frame_equal(a, b) | ||||
| @ -0,0 +1,767 @@ | ||||
| """ | ||||
| An exhaustive list of pandas methods exercising NDFrame.__finalize__. | ||||
| """ | ||||
| import operator | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
| # TODO: | ||||
| # * Binary methods (mul, div, etc.) | ||||
| # * Binary outputs (align, etc.) | ||||
| # * top-level methods (concat, merge, get_dummies, etc.) | ||||
| # * window | ||||
| # * cumulative reductions | ||||
|  | ||||
| not_implemented_mark = pytest.mark.xfail(reason="not implemented") | ||||
|  | ||||
| mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"]) | ||||
|  | ||||
| frame_data = ({"A": [1]},) | ||||
| frame_mi_data = ({"A": [1, 2, 3, 4]}, mi) | ||||
|  | ||||
|  | ||||
| # Tuple of | ||||
| # - Callable: Constructor (Series, DataFrame) | ||||
| # - Tuple: Constructor args | ||||
| # - Callable: pass the constructed value with attrs set to this. | ||||
|  | ||||
| _all_methods = [ | ||||
|     (pd.Series, ([0],), operator.methodcaller("take", [])), | ||||
|     (pd.Series, ([0],), operator.methodcaller("__getitem__", [True])), | ||||
|     (pd.Series, ([0],), operator.methodcaller("repeat", 2)), | ||||
|     (pd.Series, ([0],), operator.methodcaller("reset_index")), | ||||
|     (pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)), | ||||
|     (pd.Series, ([0],), operator.methodcaller("to_frame")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("duplicated")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("round")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("rename", "name")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("drop", [0])), | ||||
|     (pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("shift")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("isna")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("isnull")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("notna")), | ||||
|     (pd.Series, ([0, 0],), operator.methodcaller("notnull")), | ||||
|     (pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))), | ||||
|     # TODO: mul, div, etc. | ||||
|     ( | ||||
|         pd.Series, | ||||
|         ([0], pd.period_range("2000", periods=1)), | ||||
|         operator.methodcaller("to_timestamp"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         ([0], pd.date_range("2000", periods=1)), | ||||
|         operator.methodcaller("to_period"), | ||||
|     ), | ||||
|     pytest.param( | ||||
|         ( | ||||
|             pd.DataFrame, | ||||
|             frame_data, | ||||
|             operator.methodcaller("dot", pd.DataFrame(index=["A"])), | ||||
|         ), | ||||
|         marks=pytest.mark.xfail(reason="Implement binary finalize"), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("transpose")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))), | ||||
|     (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("reset_index")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("isna")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("isnull")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("notna")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("notnull")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("dropna")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("duplicated")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("sort_index")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("add", pd.DataFrame(*frame_data)), | ||||
|     ), | ||||
|     # TODO: div, mul, etc. | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("combine_first", pd.DataFrame(*frame_data)), | ||||
|     ), | ||||
|     pytest.param( | ||||
|         ( | ||||
|             pd.DataFrame, | ||||
|             frame_data, | ||||
|             operator.methodcaller("update", pd.DataFrame(*frame_data)), | ||||
|         ), | ||||
|         marks=not_implemented_mark, | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1], "B": [1]},), | ||||
|         operator.methodcaller("pivot_table", columns="A"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1], "B": [1]},), | ||||
|         operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("stack")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},), | ||||
|         operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)), | ||||
|     pytest.param( | ||||
|         ( | ||||
|             pd.DataFrame, | ||||
|             frame_data, | ||||
|             operator.methodcaller("merge", pd.DataFrame({"A": [1]})), | ||||
|         ), | ||||
|         marks=not_implemented_mark, | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("round", 2)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("corr")), | ||||
|     pytest.param( | ||||
|         (pd.DataFrame, frame_data, operator.methodcaller("cov")), | ||||
|         marks=[ | ||||
|             pytest.mark.filterwarnings("ignore::RuntimeWarning"), | ||||
|         ], | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("corrwith", pd.DataFrame(*frame_data)), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("count")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("nunique")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("idxmin")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("idxmax")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("mode")), | ||||
|     (pd.Series, [0], operator.methodcaller("mode")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("median")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("quantile", numeric_only=True), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},), | ||||
|         operator.methodcaller("quantile", numeric_only=False), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},), | ||||
|         operator.methodcaller("quantile", numeric_only=True), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1]}, [pd.Period("2000", "D")]), | ||||
|         operator.methodcaller("to_timestamp"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1]}, [pd.Timestamp("2000")]), | ||||
|         operator.methodcaller("to_period", freq="D"), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_mi_data, | ||||
|         operator.methodcaller("isin", pd.DataFrame({"A": [1]})), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")), | ||||
|     # Squeeze on columns, otherwise we'll end up with a scalar | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("squeeze")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")), | ||||
|     # Unary ops | ||||
|     (pd.DataFrame, frame_data, operator.neg), | ||||
|     (pd.Series, [1], operator.neg), | ||||
|     (pd.DataFrame, frame_data, operator.pos), | ||||
|     (pd.Series, [1], operator.pos), | ||||
|     (pd.DataFrame, frame_data, operator.inv), | ||||
|     (pd.Series, [1], operator.inv), | ||||
|     (pd.DataFrame, frame_data, abs), | ||||
|     (pd.Series, [1], abs), | ||||
|     (pd.DataFrame, frame_data, round), | ||||
|     (pd.Series, [1], round), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")), | ||||
|     (pd.Series, (1, mi), operator.methodcaller("xs", "a")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_data, | ||||
|         operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         frame_data, | ||||
|         operator.methodcaller("reindex_like", pd.Series([0, 1, 2])), | ||||
|     ), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")), | ||||
|     (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")), | ||||
|     (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")), | ||||
|     (pd.Series, ([3, 2],), operator.methodcaller("sort_values")), | ||||
|     (pd.Series, ([1] * 10,), operator.methodcaller("head")), | ||||
|     (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")), | ||||
|     (pd.Series, ([1] * 10,), operator.methodcaller("tail")), | ||||
|     (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)), | ||||
|     (pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("astype", float)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("astype", float)), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("copy")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("copy")), | ||||
|     (pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": np.array([1, 2], dtype=object)},), | ||||
|         operator.methodcaller("infer_objects"), | ||||
|     ), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")), | ||||
|     (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")), | ||||
|     (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("asfreq", "h"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("asfreq", "h"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("at_time", "12:00"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("at_time", "12:00"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("between_time", "12:00", "13:00"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("between_time", "12:00", "13:00"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("last", "3D"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("last", "3D"), | ||||
|     ), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("rank")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("rank")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4, tz="UTC")), | ||||
|         operator.methodcaller("tz_convert", "CET"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")), | ||||
|         operator.methodcaller("tz_convert", "CET"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.Series, | ||||
|         (1, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("tz_localize", "CET"), | ||||
|     ), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|         operator.methodcaller("tz_localize", "CET"), | ||||
|     ), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("describe")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("describe")), | ||||
|     (pd.Series, ([1, 2],), operator.methodcaller("pct_change")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("pct_change")), | ||||
|     (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())), | ||||
|     ( | ||||
|         pd.DataFrame, | ||||
|         frame_mi_data, | ||||
|         operator.methodcaller("transform", lambda x: x - x.min()), | ||||
|     ), | ||||
|     (pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)), | ||||
|     (pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)), | ||||
|     # Cumulative reductions | ||||
|     (pd.Series, ([1],), operator.methodcaller("cumsum")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("cumsum")), | ||||
|     (pd.Series, ([1],), operator.methodcaller("cummin")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("cummin")), | ||||
|     (pd.Series, ([1],), operator.methodcaller("cummax")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("cummax")), | ||||
|     (pd.Series, ([1],), operator.methodcaller("cumprod")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("cumprod")), | ||||
|     # Reductions | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("any")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("all")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("min")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("max")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("sum")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("std")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("mean")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("prod")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("sem")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("skew")), | ||||
|     (pd.DataFrame, frame_data, operator.methodcaller("kurt")), | ||||
| ] | ||||
|  | ||||
|  | ||||
| def idfn(x): | ||||
|     xpr = re.compile(r"'(.*)?'") | ||||
|     m = xpr.search(str(x)) | ||||
|     if m: | ||||
|         return m.group(1) | ||||
|     else: | ||||
|         return str(x) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1])) | ||||
| def ndframe_method(request): | ||||
|     """ | ||||
|     An NDFrame method returning an NDFrame. | ||||
|     """ | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings( | ||||
|     "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", | ||||
|     "ignore:last is deprecated:FutureWarning", | ||||
| ) | ||||
| def test_finalize_called(ndframe_method): | ||||
|     cls, init_args, method = ndframe_method | ||||
|     ndframe = cls(*init_args) | ||||
|  | ||||
|     ndframe.attrs = {"a": 1} | ||||
|     result = method(ndframe) | ||||
|  | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         pd.Series(1, pd.date_range("2000", periods=4)), | ||||
|         pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|     ], | ||||
| ) | ||||
| def test_finalize_first(data): | ||||
|     deprecated_msg = "first is deprecated" | ||||
|  | ||||
|     data.attrs = {"a": 1} | ||||
|     with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): | ||||
|         result = data.first("3D") | ||||
|         assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         pd.Series(1, pd.date_range("2000", periods=4)), | ||||
|         pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), | ||||
|     ], | ||||
| ) | ||||
| def test_finalize_last(data): | ||||
|     # GH 53710 | ||||
|     deprecated_msg = "last is deprecated" | ||||
|  | ||||
|     data.attrs = {"a": 1} | ||||
|     with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): | ||||
|         result = data.last("3D") | ||||
|         assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @not_implemented_mark | ||||
| def test_finalize_called_eval_numexpr(): | ||||
|     pytest.importorskip("numexpr") | ||||
|     df = pd.DataFrame({"A": [1, 2]}) | ||||
|     df.attrs["A"] = 1 | ||||
|     result = df.eval("A + 1", engine="numexpr") | ||||
|     assert result.attrs == {"A": 1} | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Binary operations | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("annotate", ["left", "right", "both"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "args", | ||||
|     [ | ||||
|         (1, pd.Series([1])), | ||||
|         (1, pd.DataFrame({"A": [1]})), | ||||
|         (pd.Series([1]), 1), | ||||
|         (pd.DataFrame({"A": [1]}), 1), | ||||
|         (pd.Series([1]), pd.Series([1])), | ||||
|         (pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})), | ||||
|         (pd.Series([1]), pd.DataFrame({"A": [1]})), | ||||
|         (pd.DataFrame({"A": [1]}), pd.Series([1])), | ||||
|     ], | ||||
|     ids=lambda x: f"({type(x[0]).__name__},{type(x[1]).__name__})", | ||||
| ) | ||||
| def test_binops(request, args, annotate, all_binary_operators): | ||||
|     # This generates 624 tests... Is that needed? | ||||
|     left, right = args | ||||
|     if isinstance(left, (pd.DataFrame, pd.Series)): | ||||
|         left.attrs = {} | ||||
|     if isinstance(right, (pd.DataFrame, pd.Series)): | ||||
|         right.attrs = {} | ||||
|  | ||||
|     if annotate == "left" and isinstance(left, int): | ||||
|         pytest.skip("left is an int and doesn't support .attrs") | ||||
|     if annotate == "right" and isinstance(right, int): | ||||
|         pytest.skip("right is an int and doesn't support .attrs") | ||||
|  | ||||
|     if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both": | ||||
|         if not all_binary_operators.__name__.startswith("r"): | ||||
|             if annotate == "right" and isinstance(left, type(right)): | ||||
|                 request.applymarker( | ||||
|                     pytest.mark.xfail( | ||||
|                         reason=f"{all_binary_operators} doesn't work when right has " | ||||
|                         f"attrs and both are {type(left)}" | ||||
|                     ) | ||||
|                 ) | ||||
|             if not isinstance(left, type(right)): | ||||
|                 if annotate == "left" and isinstance(left, pd.Series): | ||||
|                     request.applymarker( | ||||
|                         pytest.mark.xfail( | ||||
|                             reason=f"{all_binary_operators} doesn't work when the " | ||||
|                             "objects are different Series has attrs" | ||||
|                         ) | ||||
|                     ) | ||||
|                 elif annotate == "right" and isinstance(right, pd.Series): | ||||
|                     request.applymarker( | ||||
|                         pytest.mark.xfail( | ||||
|                             reason=f"{all_binary_operators} doesn't work when the " | ||||
|                             "objects are different Series has attrs" | ||||
|                         ) | ||||
|                     ) | ||||
|         else: | ||||
|             if annotate == "left" and isinstance(left, type(right)): | ||||
|                 request.applymarker( | ||||
|                     pytest.mark.xfail( | ||||
|                         reason=f"{all_binary_operators} doesn't work when left has " | ||||
|                         f"attrs and both are {type(left)}" | ||||
|                     ) | ||||
|                 ) | ||||
|             if not isinstance(left, type(right)): | ||||
|                 if annotate == "right" and isinstance(right, pd.Series): | ||||
|                     request.applymarker( | ||||
|                         pytest.mark.xfail( | ||||
|                             reason=f"{all_binary_operators} doesn't work when the " | ||||
|                             "objects are different Series has attrs" | ||||
|                         ) | ||||
|                     ) | ||||
|                 elif annotate == "left" and isinstance(left, pd.Series): | ||||
|                     request.applymarker( | ||||
|                         pytest.mark.xfail( | ||||
|                             reason=f"{all_binary_operators} doesn't work when the " | ||||
|                             "objects are different Series has attrs" | ||||
|                         ) | ||||
|                     ) | ||||
|     if annotate in {"left", "both"} and not isinstance(left, int): | ||||
|         left.attrs = {"a": 1} | ||||
|     if annotate in {"right", "both"} and not isinstance(right, int): | ||||
|         right.attrs = {"a": 1} | ||||
|  | ||||
|     is_cmp = all_binary_operators in [ | ||||
|         operator.eq, | ||||
|         operator.ne, | ||||
|         operator.gt, | ||||
|         operator.ge, | ||||
|         operator.lt, | ||||
|         operator.le, | ||||
|     ] | ||||
|     if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series): | ||||
|         # in 2.0 silent alignment on comparisons was removed xref GH#28759 | ||||
|         left, right = left.align(right, axis=1, copy=False) | ||||
|     elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame): | ||||
|         right, left = right.align(left, axis=1, copy=False) | ||||
|  | ||||
|     result = all_binary_operators(left, right) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Accessors | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         operator.methodcaller("capitalize"), | ||||
|         operator.methodcaller("casefold"), | ||||
|         operator.methodcaller("cat", ["a"]), | ||||
|         operator.methodcaller("contains", "a"), | ||||
|         operator.methodcaller("count", "a"), | ||||
|         operator.methodcaller("encode", "utf-8"), | ||||
|         operator.methodcaller("endswith", "a"), | ||||
|         operator.methodcaller("extract", r"(\w)(\d)"), | ||||
|         operator.methodcaller("extract", r"(\w)(\d)", expand=False), | ||||
|         operator.methodcaller("find", "a"), | ||||
|         operator.methodcaller("findall", "a"), | ||||
|         operator.methodcaller("get", 0), | ||||
|         operator.methodcaller("index", "a"), | ||||
|         operator.methodcaller("len"), | ||||
|         operator.methodcaller("ljust", 4), | ||||
|         operator.methodcaller("lower"), | ||||
|         operator.methodcaller("lstrip"), | ||||
|         operator.methodcaller("match", r"\w"), | ||||
|         operator.methodcaller("normalize", "NFC"), | ||||
|         operator.methodcaller("pad", 4), | ||||
|         operator.methodcaller("partition", "a"), | ||||
|         operator.methodcaller("repeat", 2), | ||||
|         operator.methodcaller("replace", "a", "b"), | ||||
|         operator.methodcaller("rfind", "a"), | ||||
|         operator.methodcaller("rindex", "a"), | ||||
|         operator.methodcaller("rjust", 4), | ||||
|         operator.methodcaller("rpartition", "a"), | ||||
|         operator.methodcaller("rstrip"), | ||||
|         operator.methodcaller("slice", 4), | ||||
|         operator.methodcaller("slice_replace", 1, repl="a"), | ||||
|         operator.methodcaller("startswith", "a"), | ||||
|         operator.methodcaller("strip"), | ||||
|         operator.methodcaller("swapcase"), | ||||
|         operator.methodcaller("translate", {"a": "b"}), | ||||
|         operator.methodcaller("upper"), | ||||
|         operator.methodcaller("wrap", 4), | ||||
|         operator.methodcaller("zfill", 4), | ||||
|         operator.methodcaller("isalnum"), | ||||
|         operator.methodcaller("isalpha"), | ||||
|         operator.methodcaller("isdigit"), | ||||
|         operator.methodcaller("isspace"), | ||||
|         operator.methodcaller("islower"), | ||||
|         operator.methodcaller("isupper"), | ||||
|         operator.methodcaller("istitle"), | ||||
|         operator.methodcaller("isnumeric"), | ||||
|         operator.methodcaller("isdecimal"), | ||||
|         operator.methodcaller("get_dummies"), | ||||
|     ], | ||||
|     ids=idfn, | ||||
| ) | ||||
| def test_string_method(method): | ||||
|     s = pd.Series(["a1"]) | ||||
|     s.attrs = {"a": 1} | ||||
|     result = method(s.str) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         operator.methodcaller("to_period"), | ||||
|         operator.methodcaller("tz_localize", "CET"), | ||||
|         operator.methodcaller("normalize"), | ||||
|         operator.methodcaller("strftime", "%Y"), | ||||
|         operator.methodcaller("round", "h"), | ||||
|         operator.methodcaller("floor", "h"), | ||||
|         operator.methodcaller("ceil", "h"), | ||||
|         operator.methodcaller("month_name"), | ||||
|         operator.methodcaller("day_name"), | ||||
|     ], | ||||
|     ids=idfn, | ||||
| ) | ||||
| def test_datetime_method(method): | ||||
|     s = pd.Series(pd.date_range("2000", periods=4)) | ||||
|     s.attrs = {"a": 1} | ||||
|     result = method(s.dt) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "attr", | ||||
|     [ | ||||
|         "date", | ||||
|         "time", | ||||
|         "timetz", | ||||
|         "year", | ||||
|         "month", | ||||
|         "day", | ||||
|         "hour", | ||||
|         "minute", | ||||
|         "second", | ||||
|         "microsecond", | ||||
|         "nanosecond", | ||||
|         "dayofweek", | ||||
|         "day_of_week", | ||||
|         "dayofyear", | ||||
|         "day_of_year", | ||||
|         "quarter", | ||||
|         "is_month_start", | ||||
|         "is_month_end", | ||||
|         "is_quarter_start", | ||||
|         "is_quarter_end", | ||||
|         "is_year_start", | ||||
|         "is_year_end", | ||||
|         "is_leap_year", | ||||
|         "daysinmonth", | ||||
|         "days_in_month", | ||||
|     ], | ||||
| ) | ||||
| def test_datetime_property(attr): | ||||
|     s = pd.Series(pd.date_range("2000", periods=4)) | ||||
|     s.attrs = {"a": 1} | ||||
|     result = getattr(s.dt, attr) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"] | ||||
| ) | ||||
| def test_timedelta_property(attr): | ||||
|     s = pd.Series(pd.timedelta_range("2000", periods=4)) | ||||
|     s.attrs = {"a": 1} | ||||
|     result = getattr(s.dt, attr) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")]) | ||||
| def test_timedelta_methods(method): | ||||
|     s = pd.Series(pd.timedelta_range("2000", periods=4)) | ||||
|     s.attrs = {"a": 1} | ||||
|     result = method(s.dt) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         operator.methodcaller("add_categories", ["c"]), | ||||
|         operator.methodcaller("as_ordered"), | ||||
|         operator.methodcaller("as_unordered"), | ||||
|         lambda x: getattr(x, "codes"), | ||||
|         operator.methodcaller("remove_categories", "a"), | ||||
|         operator.methodcaller("remove_unused_categories"), | ||||
|         operator.methodcaller("rename_categories", {"a": "A", "b": "B"}), | ||||
|         operator.methodcaller("reorder_categories", ["b", "a"]), | ||||
|         operator.methodcaller("set_categories", ["A", "B"]), | ||||
|     ], | ||||
| ) | ||||
| @not_implemented_mark | ||||
| def test_categorical_accessor(method): | ||||
|     s = pd.Series(["a", "b"], dtype="category") | ||||
|     s.attrs = {"a": 1} | ||||
|     result = method(s.cat) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| # ---------------------------------------------------------------------------- | ||||
| # Groupby | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         operator.methodcaller("sum"), | ||||
|         lambda x: x.apply(lambda y: y), | ||||
|         lambda x: x.agg("sum"), | ||||
|         lambda x: x.agg("mean"), | ||||
|         lambda x: x.agg("median"), | ||||
|     ], | ||||
| ) | ||||
| def test_groupby_finalize(obj, method): | ||||
|     obj.attrs = {"a": 1} | ||||
|     result = method(obj.groupby([0, 0], group_keys=False)) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})] | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "method", | ||||
|     [ | ||||
|         lambda x: x.agg(["sum", "count"]), | ||||
|         lambda x: x.agg("std"), | ||||
|         lambda x: x.agg("var"), | ||||
|         lambda x: x.agg("sem"), | ||||
|         lambda x: x.agg("size"), | ||||
|         lambda x: x.agg("ohlc"), | ||||
|     ], | ||||
| ) | ||||
| @not_implemented_mark | ||||
| def test_groupby_finalize_not_implemented(obj, method): | ||||
|     obj.attrs = {"a": 1} | ||||
|     result = method(obj.groupby([0, 0])) | ||||
|     assert result.attrs == {"a": 1} | ||||
|  | ||||
|  | ||||
| def test_finalize_frame_series_name(): | ||||
|     # https://github.com/pandas-dev/pandas/pull/37186/files#r506978889 | ||||
|     # ensure we don't copy the column `name` to the Series. | ||||
|     df = pd.DataFrame({"name": [1, 2]}) | ||||
|     result = pd.Series([1, 2]).__finalize__(df) | ||||
|     assert result.name is None | ||||
							
								
								
									
										209
									
								
								lib/python3.11/site-packages/pandas/tests/generic/test_frame.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										209
									
								
								lib/python3.11/site-packages/pandas/tests/generic/test_frame.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,209 @@ | ||||
| from copy import deepcopy | ||||
| from operator import methodcaller | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestDataFrame: | ||||
|     @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) | ||||
|     def test_set_axis_name(self, func): | ||||
|         df = DataFrame([[1, 2], [3, 4]]) | ||||
|  | ||||
|         result = methodcaller(func, "foo")(df) | ||||
|         assert df.index.name is None | ||||
|         assert result.index.name == "foo" | ||||
|  | ||||
|         result = methodcaller(func, "cols", axis=1)(df) | ||||
|         assert df.columns.name is None | ||||
|         assert result.columns.name == "cols" | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"]) | ||||
|     def test_set_axis_name_mi(self, func): | ||||
|         df = DataFrame( | ||||
|             np.empty((3, 3)), | ||||
|             index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]), | ||||
|             columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), | ||||
|         ) | ||||
|  | ||||
|         level_names = ["L1", "L2"] | ||||
|  | ||||
|         result = methodcaller(func, level_names)(df) | ||||
|         assert result.index.names == level_names | ||||
|         assert result.columns.names == [None, None] | ||||
|  | ||||
|         result = methodcaller(func, level_names, axis=1)(df) | ||||
|         assert result.columns.names == ["L1", "L2"] | ||||
|         assert result.index.names == [None, None] | ||||
|  | ||||
|     def test_nonzero_single_element(self): | ||||
|         # allow single item via bool method | ||||
|         msg_warn = ( | ||||
|             "DataFrame.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         df = DataFrame([[True]]) | ||||
|         df1 = DataFrame([[False]]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             assert df.bool() | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             assert not df1.bool() | ||||
|  | ||||
|         df = DataFrame([[False, False]]) | ||||
|         msg_err = "The truth value of a DataFrame is ambiguous" | ||||
|         with pytest.raises(ValueError, match=msg_err): | ||||
|             bool(df) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             with pytest.raises(ValueError, match=msg_err): | ||||
|                 df.bool() | ||||
|  | ||||
|     def test_metadata_propagation_indiv_groupby(self): | ||||
|         # groupby | ||||
|         df = DataFrame( | ||||
|             { | ||||
|                 "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], | ||||
|                 "B": ["one", "one", "two", "three", "two", "two", "one", "three"], | ||||
|                 "C": np.random.default_rng(2).standard_normal(8), | ||||
|                 "D": np.random.default_rng(2).standard_normal(8), | ||||
|             } | ||||
|         ) | ||||
|         result = df.groupby("A").sum() | ||||
|         tm.assert_metadata_equivalent(df, result) | ||||
|  | ||||
|     def test_metadata_propagation_indiv_resample(self): | ||||
|         # resample | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((1000, 2)), | ||||
|             index=date_range("20130101", periods=1000, freq="s"), | ||||
|         ) | ||||
|         result = df.resample("1min") | ||||
|         tm.assert_metadata_equivalent(df, result) | ||||
|  | ||||
|     def test_metadata_propagation_indiv(self, monkeypatch): | ||||
|         # merging with override | ||||
|         # GH 6923 | ||||
|  | ||||
|         def finalize(self, other, method=None, **kwargs): | ||||
|             for name in self._metadata: | ||||
|                 if method == "merge": | ||||
|                     left, right = other.left, other.right | ||||
|                     value = getattr(left, name, "") + "|" + getattr(right, name, "") | ||||
|                     object.__setattr__(self, name, value) | ||||
|                 elif method == "concat": | ||||
|                     value = "+".join( | ||||
|                         [getattr(o, name) for o in other.objs if getattr(o, name, None)] | ||||
|                     ) | ||||
|                     object.__setattr__(self, name, value) | ||||
|                 else: | ||||
|                     object.__setattr__(self, name, getattr(other, name, "")) | ||||
|  | ||||
|             return self | ||||
|  | ||||
|         with monkeypatch.context() as m: | ||||
|             m.setattr(DataFrame, "_metadata", ["filename"]) | ||||
|             m.setattr(DataFrame, "__finalize__", finalize) | ||||
|  | ||||
|             df1 = DataFrame( | ||||
|                 np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["a", "b"] | ||||
|             ) | ||||
|             df2 = DataFrame( | ||||
|                 np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["c", "d"] | ||||
|             ) | ||||
|             DataFrame._metadata = ["filename"] | ||||
|             df1.filename = "fname1.csv" | ||||
|             df2.filename = "fname2.csv" | ||||
|  | ||||
|             result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner") | ||||
|             assert result.filename == "fname1.csv|fname2.csv" | ||||
|  | ||||
|             # concat | ||||
|             # GH#6927 | ||||
|             df1 = DataFrame( | ||||
|                 np.random.default_rng(2).integers(0, 4, (3, 2)), columns=list("ab") | ||||
|             ) | ||||
|             df1.filename = "foo" | ||||
|  | ||||
|             result = pd.concat([df1, df1]) | ||||
|             assert result.filename == "foo+foo" | ||||
|  | ||||
|     def test_set_attribute(self): | ||||
|         # Test for consistent setattr behavior when an attribute and a column | ||||
|         # have the same name (Issue #8994) | ||||
|         df = DataFrame({"x": [1, 2, 3]}) | ||||
|  | ||||
|         df.y = 2 | ||||
|         df["y"] = [2, 4, 6] | ||||
|         df.y = 5 | ||||
|  | ||||
|         assert df.y == 5 | ||||
|         tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y")) | ||||
|  | ||||
|     def test_deepcopy_empty(self): | ||||
|         # This test covers empty frame copying with non-empty column sets | ||||
|         # as reported in issue GH15370 | ||||
|         empty_frame = DataFrame(data=[], index=[], columns=["A"]) | ||||
|         empty_frame_copy = deepcopy(empty_frame) | ||||
|  | ||||
|         tm.assert_frame_equal(empty_frame_copy, empty_frame) | ||||
|  | ||||
|  | ||||
| # formerly in Generic but only test DataFrame | ||||
| class TestDataFrame2: | ||||
|     @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) | ||||
|     def test_validate_bool_args(self, value): | ||||
|         df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) | ||||
|  | ||||
|         msg = 'For argument "inplace" expected type bool, received type' | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().rename_axis(mapper={"a": "x", "b": "y"}, axis=1, inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().drop("a", axis=1, inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().fillna(value=0, inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().replace(to_replace=1, value=7, inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().interpolate(inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy()._where(cond=df.a > 2, inplace=value) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.copy().mask(cond=df.a > 2, inplace=value) | ||||
|  | ||||
|     def test_unexpected_keyword(self): | ||||
|         # GH8597 | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((5, 2)), columns=["jim", "joe"] | ||||
|         ) | ||||
|         ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) | ||||
|         ts = df["joe"].copy() | ||||
|         ts[2] = np.nan | ||||
|  | ||||
|         msg = "unexpected keyword" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.drop("joe", axis=1, in_place=True) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             df.reindex([1, 0], inplace=True) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ca.fillna(0, inplace=True) | ||||
|  | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ts.fillna(0, in_place=True) | ||||
| @ -0,0 +1,504 @@ | ||||
| from copy import ( | ||||
|     copy, | ||||
|     deepcopy, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_scalar | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
| # ---------------------------------------------------------------------- | ||||
| # Generic types test cases | ||||
|  | ||||
|  | ||||
| def construct(box, shape, value=None, dtype=None, **kwargs): | ||||
|     """ | ||||
|     construct an object for the given shape | ||||
|     if value is specified use that if its a scalar | ||||
|     if value is an array, repeat it as needed | ||||
|     """ | ||||
|     if isinstance(shape, int): | ||||
|         shape = tuple([shape] * box._AXIS_LEN) | ||||
|     if value is not None: | ||||
|         if is_scalar(value): | ||||
|             if value == "empty": | ||||
|                 arr = None | ||||
|                 dtype = np.float64 | ||||
|  | ||||
|                 # remove the info axis | ||||
|                 kwargs.pop(box._info_axis_name, None) | ||||
|             else: | ||||
|                 arr = np.empty(shape, dtype=dtype) | ||||
|                 arr.fill(value) | ||||
|         else: | ||||
|             fshape = np.prod(shape) | ||||
|             arr = value.ravel() | ||||
|             new_shape = fshape / arr.shape[0] | ||||
|             if fshape % arr.shape[0] != 0: | ||||
|                 raise Exception("invalid value passed in construct") | ||||
|  | ||||
|             arr = np.repeat(arr, new_shape).reshape(shape) | ||||
|     else: | ||||
|         arr = np.random.default_rng(2).standard_normal(shape) | ||||
|     return box(arr, dtype=dtype, **kwargs) | ||||
|  | ||||
|  | ||||
| class TestGeneric: | ||||
|     @pytest.mark.parametrize( | ||||
|         "func", | ||||
|         [ | ||||
|             str.lower, | ||||
|             {x: x.lower() for x in list("ABCD")}, | ||||
|             Series({x: x.lower() for x in list("ABCD")}), | ||||
|         ], | ||||
|     ) | ||||
|     def test_rename(self, frame_or_series, func): | ||||
|         # single axis | ||||
|         idx = list("ABCD") | ||||
|  | ||||
|         for axis in frame_or_series._AXIS_ORDERS: | ||||
|             kwargs = {axis: idx} | ||||
|             obj = construct(frame_or_series, 4, **kwargs) | ||||
|  | ||||
|             # rename a single axis | ||||
|             result = obj.rename(**{axis: func}) | ||||
|             expected = obj.copy() | ||||
|             setattr(expected, axis, list("abcd")) | ||||
|             tm.assert_equal(result, expected) | ||||
|  | ||||
|     def test_get_numeric_data(self, frame_or_series): | ||||
|         n = 4 | ||||
|         kwargs = { | ||||
|             frame_or_series._get_axis_name(i): list(range(n)) | ||||
|             for i in range(frame_or_series._AXIS_LEN) | ||||
|         } | ||||
|  | ||||
|         # get the numeric data | ||||
|         o = construct(frame_or_series, n, **kwargs) | ||||
|         result = o._get_numeric_data() | ||||
|         tm.assert_equal(result, o) | ||||
|  | ||||
|         # non-inclusion | ||||
|         result = o._get_bool_data() | ||||
|         expected = construct(frame_or_series, n, value="empty", **kwargs) | ||||
|         if isinstance(o, DataFrame): | ||||
|             # preserve columns dtype | ||||
|             expected.columns = o.columns[:0] | ||||
|         # https://github.com/pandas-dev/pandas/issues/50862 | ||||
|         tm.assert_equal(result.reset_index(drop=True), expected) | ||||
|  | ||||
|         # get the bool data | ||||
|         arr = np.array([True, True, False, True]) | ||||
|         o = construct(frame_or_series, n, value=arr, **kwargs) | ||||
|         result = o._get_numeric_data() | ||||
|         tm.assert_equal(result, o) | ||||
|  | ||||
|     def test_nonzero(self, frame_or_series): | ||||
|         # GH 4633 | ||||
|         # look at the boolean/nonzero behavior for objects | ||||
|         obj = construct(frame_or_series, shape=4) | ||||
|         msg = f"The truth value of a {frame_or_series.__name__} is ambiguous" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 0) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 1) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj) | ||||
|  | ||||
|         obj = construct(frame_or_series, shape=4, value=1) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 0) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 1) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj) | ||||
|  | ||||
|         obj = construct(frame_or_series, shape=4, value=np.nan) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 0) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj == 1) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj) | ||||
|  | ||||
|         # empty | ||||
|         obj = construct(frame_or_series, shape=0) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(obj) | ||||
|  | ||||
|         # invalid behaviors | ||||
|  | ||||
|         obj1 = construct(frame_or_series, shape=4, value=1) | ||||
|         obj2 = construct(frame_or_series, shape=4, value=1) | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             if obj1: | ||||
|                 pass | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             obj1 and obj2 | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             obj1 or obj2 | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             not obj1 | ||||
|  | ||||
|     def test_frame_or_series_compound_dtypes(self, frame_or_series): | ||||
|         # see gh-5191 | ||||
|         # Compound dtypes should raise NotImplementedError. | ||||
|  | ||||
|         def f(dtype): | ||||
|             return construct(frame_or_series, shape=3, value=1, dtype=dtype) | ||||
|  | ||||
|         msg = ( | ||||
|             "compound dtypes are not implemented " | ||||
|             f"in the {frame_or_series.__name__} constructor" | ||||
|         ) | ||||
|  | ||||
|         with pytest.raises(NotImplementedError, match=msg): | ||||
|             f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) | ||||
|  | ||||
|         # these work (though results may be unexpected) | ||||
|         f("int64") | ||||
|         f("float64") | ||||
|         f("M8[ns]") | ||||
|  | ||||
|     def test_metadata_propagation(self, frame_or_series): | ||||
|         # check that the metadata matches up on the resulting ops | ||||
|  | ||||
|         o = construct(frame_or_series, shape=3) | ||||
|         o.name = "foo" | ||||
|         o2 = construct(frame_or_series, shape=3) | ||||
|         o2.name = "bar" | ||||
|  | ||||
|         # ---------- | ||||
|         # preserving | ||||
|         # ---------- | ||||
|  | ||||
|         # simple ops with scalars | ||||
|         for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: | ||||
|             result = getattr(o, op)(1) | ||||
|             tm.assert_metadata_equivalent(o, result) | ||||
|  | ||||
|         # ops with like | ||||
|         for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: | ||||
|             result = getattr(o, op)(o) | ||||
|             tm.assert_metadata_equivalent(o, result) | ||||
|  | ||||
|         # simple boolean | ||||
|         for op in ["__eq__", "__le__", "__ge__"]: | ||||
|             v1 = getattr(o, op)(o) | ||||
|             tm.assert_metadata_equivalent(o, v1) | ||||
|             tm.assert_metadata_equivalent(o, v1 & v1) | ||||
|             tm.assert_metadata_equivalent(o, v1 | v1) | ||||
|  | ||||
|         # combine_first | ||||
|         result = o.combine_first(o2) | ||||
|         tm.assert_metadata_equivalent(o, result) | ||||
|  | ||||
|         # --------------------------- | ||||
|         # non-preserving (by default) | ||||
|         # --------------------------- | ||||
|  | ||||
|         # add non-like | ||||
|         result = o + o2 | ||||
|         tm.assert_metadata_equivalent(result) | ||||
|  | ||||
|         # simple boolean | ||||
|         for op in ["__eq__", "__le__", "__ge__"]: | ||||
|             # this is a name matching op | ||||
|             v1 = getattr(o, op)(o) | ||||
|             v2 = getattr(o, op)(o2) | ||||
|             tm.assert_metadata_equivalent(v2) | ||||
|             tm.assert_metadata_equivalent(v1 & v2) | ||||
|             tm.assert_metadata_equivalent(v1 | v2) | ||||
|  | ||||
|     def test_size_compat(self, frame_or_series): | ||||
|         # GH8846 | ||||
|         # size property should be defined | ||||
|  | ||||
|         o = construct(frame_or_series, shape=10) | ||||
|         assert o.size == np.prod(o.shape) | ||||
|         assert o.size == 10 ** len(o.axes) | ||||
|  | ||||
|     def test_split_compat(self, frame_or_series): | ||||
|         # xref GH8846 | ||||
|         o = construct(frame_or_series, shape=10) | ||||
|         with tm.assert_produces_warning( | ||||
|             FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False | ||||
|         ): | ||||
|             assert len(np.array_split(o, 5)) == 5 | ||||
|             assert len(np.array_split(o, 2)) == 2 | ||||
|  | ||||
|     # See gh-12301 | ||||
|     def test_stat_unexpected_keyword(self, frame_or_series): | ||||
|         obj = construct(frame_or_series, 5) | ||||
|         starwars = "Star Wars" | ||||
|         errmsg = "unexpected keyword" | ||||
|  | ||||
|         with pytest.raises(TypeError, match=errmsg): | ||||
|             obj.max(epic=starwars)  # stat_function | ||||
|         with pytest.raises(TypeError, match=errmsg): | ||||
|             obj.var(epic=starwars)  # stat_function_ddof | ||||
|         with pytest.raises(TypeError, match=errmsg): | ||||
|             obj.sum(epic=starwars)  # cum_function | ||||
|         with pytest.raises(TypeError, match=errmsg): | ||||
|             obj.any(epic=starwars)  # logical_function | ||||
|  | ||||
|     @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"]) | ||||
|     def test_api_compat(self, func, frame_or_series): | ||||
|         # GH 12021 | ||||
|         # compat for __name__, __qualname__ | ||||
|  | ||||
|         obj = construct(frame_or_series, 5) | ||||
|         f = getattr(obj, func) | ||||
|         assert f.__name__ == func | ||||
|         assert f.__qualname__.endswith(func) | ||||
|  | ||||
|     def test_stat_non_defaults_args(self, frame_or_series): | ||||
|         obj = construct(frame_or_series, 5) | ||||
|         out = np.array([0]) | ||||
|         errmsg = "the 'out' parameter is not supported" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=errmsg): | ||||
|             obj.max(out=out)  # stat_function | ||||
|         with pytest.raises(ValueError, match=errmsg): | ||||
|             obj.var(out=out)  # stat_function_ddof | ||||
|         with pytest.raises(ValueError, match=errmsg): | ||||
|             obj.sum(out=out)  # cum_function | ||||
|         with pytest.raises(ValueError, match=errmsg): | ||||
|             obj.any(out=out)  # logical_function | ||||
|  | ||||
|     def test_truncate_out_of_bounds(self, frame_or_series): | ||||
|         # GH11382 | ||||
|  | ||||
|         # small | ||||
|         shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1)) | ||||
|         small = construct(frame_or_series, shape, dtype="int8", value=1) | ||||
|         tm.assert_equal(small.truncate(), small) | ||||
|         tm.assert_equal(small.truncate(before=0, after=3e3), small) | ||||
|         tm.assert_equal(small.truncate(before=-1, after=2e3), small) | ||||
|  | ||||
|         # big | ||||
|         shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1)) | ||||
|         big = construct(frame_or_series, shape, dtype="int8", value=1) | ||||
|         tm.assert_equal(big.truncate(), big) | ||||
|         tm.assert_equal(big.truncate(before=0, after=3e6), big) | ||||
|         tm.assert_equal(big.truncate(before=-1, after=2e6), big) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "func", | ||||
|         [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("shape", [0, 1, 2]) | ||||
|     def test_copy_and_deepcopy(self, frame_or_series, shape, func): | ||||
|         # GH 15444 | ||||
|         obj = construct(frame_or_series, shape) | ||||
|         obj_copy = func(obj) | ||||
|         assert obj_copy is not obj | ||||
|         tm.assert_equal(obj_copy, obj) | ||||
|  | ||||
|     def test_data_deprecated(self, frame_or_series): | ||||
|         obj = frame_or_series() | ||||
|         msg = "(Series|DataFrame)._data is deprecated" | ||||
|         with tm.assert_produces_warning(DeprecationWarning, match=msg): | ||||
|             mgr = obj._data | ||||
|         assert mgr is obj._mgr | ||||
|  | ||||
|  | ||||
| class TestNDFrame: | ||||
|     # tests that don't fit elsewhere | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser", | ||||
|         [ | ||||
|             Series(range(10), dtype=np.float64), | ||||
|             Series([str(i) for i in range(10)], dtype=object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_squeeze_series_noop(self, ser): | ||||
|         # noop | ||||
|         tm.assert_series_equal(ser.squeeze(), ser) | ||||
|  | ||||
|     def test_squeeze_frame_noop(self): | ||||
|         # noop | ||||
|         df = DataFrame(np.eye(2)) | ||||
|         tm.assert_frame_equal(df.squeeze(), df) | ||||
|  | ||||
|     def test_squeeze_frame_reindex(self): | ||||
|         # squeezing | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ).reindex(columns=["A"]) | ||||
|         tm.assert_series_equal(df.squeeze(), df["A"]) | ||||
|  | ||||
|     def test_squeeze_0_len_dim(self): | ||||
|         # don't fail with 0 length dimensions GH11229 & GH8999 | ||||
|         empty_series = Series([], name="five", dtype=np.float64) | ||||
|         empty_frame = DataFrame([empty_series]) | ||||
|         tm.assert_series_equal(empty_series, empty_series.squeeze()) | ||||
|         tm.assert_series_equal(empty_series, empty_frame.squeeze()) | ||||
|  | ||||
|     def test_squeeze_axis(self): | ||||
|         # axis argument | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((1, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=1, freq="B"), | ||||
|         ).iloc[:, :1] | ||||
|         assert df.shape == (1, 1) | ||||
|         tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) | ||||
|         tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) | ||||
|         tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) | ||||
|         tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) | ||||
|         assert df.squeeze() == df.iloc[0, 0] | ||||
|         msg = "No axis named 2 for object type DataFrame" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.squeeze(axis=2) | ||||
|         msg = "No axis named x for object type DataFrame" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             df.squeeze(axis="x") | ||||
|  | ||||
|     def test_squeeze_axis_len_3(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((3, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=3, freq="B"), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df.squeeze(axis=0), df) | ||||
|  | ||||
|     def test_numpy_squeeze(self): | ||||
|         s = Series(range(2), dtype=np.float64) | ||||
|         tm.assert_series_equal(np.squeeze(s), s) | ||||
|  | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ).reindex(columns=["A"]) | ||||
|         tm.assert_series_equal(np.squeeze(df), df["A"]) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser", | ||||
|         [ | ||||
|             Series(range(10), dtype=np.float64), | ||||
|             Series([str(i) for i in range(10)], dtype=object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_transpose_series(self, ser): | ||||
|         # calls implementation in pandas/core/base.py | ||||
|         tm.assert_series_equal(ser.transpose(), ser) | ||||
|  | ||||
|     def test_transpose_frame(self): | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|         tm.assert_frame_equal(df.transpose().transpose(), df) | ||||
|  | ||||
|     def test_numpy_transpose(self, frame_or_series): | ||||
|         obj = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|         obj = tm.get_obj(obj, frame_or_series) | ||||
|  | ||||
|         if frame_or_series is Series: | ||||
|             # 1D -> np.transpose is no-op | ||||
|             tm.assert_series_equal(np.transpose(obj), obj) | ||||
|  | ||||
|         # round-trip preserved | ||||
|         tm.assert_equal(np.transpose(np.transpose(obj)), obj) | ||||
|  | ||||
|         msg = "the 'axes' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             np.transpose(obj, axes=1) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "ser", | ||||
|         [ | ||||
|             Series(range(10), dtype=np.float64), | ||||
|             Series([str(i) for i in range(10)], dtype=object), | ||||
|         ], | ||||
|     ) | ||||
|     def test_take_series(self, ser): | ||||
|         indices = [1, 5, -2, 6, 3, -1] | ||||
|         out = ser.take(indices) | ||||
|         expected = Series( | ||||
|             data=ser.values.take(indices), | ||||
|             index=ser.index.take(indices), | ||||
|             dtype=ser.dtype, | ||||
|         ) | ||||
|         tm.assert_series_equal(out, expected) | ||||
|  | ||||
|     def test_take_frame(self): | ||||
|         indices = [1, 5, -2, 6, 3, -1] | ||||
|         df = DataFrame( | ||||
|             np.random.default_rng(2).standard_normal((10, 4)), | ||||
|             columns=Index(list("ABCD"), dtype=object), | ||||
|             index=date_range("2000-01-01", periods=10, freq="B"), | ||||
|         ) | ||||
|         out = df.take(indices) | ||||
|         expected = DataFrame( | ||||
|             data=df.values.take(indices, axis=0), | ||||
|             index=df.index.take(indices), | ||||
|             columns=df.columns, | ||||
|         ) | ||||
|         tm.assert_frame_equal(out, expected) | ||||
|  | ||||
|     def test_take_invalid_kwargs(self, frame_or_series): | ||||
|         indices = [-3, 2, 0, 1] | ||||
|  | ||||
|         obj = DataFrame(range(5)) | ||||
|         obj = tm.get_obj(obj, frame_or_series) | ||||
|  | ||||
|         msg = r"take\(\) got an unexpected keyword argument 'foo'" | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             obj.take(indices, foo=2) | ||||
|  | ||||
|         msg = "the 'out' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             obj.take(indices, out=indices) | ||||
|  | ||||
|         msg = "the 'mode' parameter is not supported" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             obj.take(indices, mode="clip") | ||||
|  | ||||
|     def test_axis_classmethods(self, frame_or_series): | ||||
|         box = frame_or_series | ||||
|         obj = box(dtype=object) | ||||
|         values = box._AXIS_TO_AXIS_NUMBER.keys() | ||||
|         for v in values: | ||||
|             assert obj._get_axis_number(v) == box._get_axis_number(v) | ||||
|             assert obj._get_axis_name(v) == box._get_axis_name(v) | ||||
|             assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) | ||||
|  | ||||
|     def test_flags_identity(self, frame_or_series): | ||||
|         obj = Series([1, 2]) | ||||
|         if frame_or_series is DataFrame: | ||||
|             obj = obj.to_frame() | ||||
|  | ||||
|         assert obj.flags is obj.flags | ||||
|         obj2 = obj.copy() | ||||
|         assert obj2.flags is not obj.flags | ||||
|  | ||||
|     def test_bool_dep(self) -> None: | ||||
|         # GH-51749 | ||||
|         msg_warn = ( | ||||
|             "DataFrame.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             DataFrame({"col": [False]}).bool() | ||||
| @ -0,0 +1,336 @@ | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.missing import array_equivalent | ||||
|  | ||||
| import pandas as pd | ||||
|  | ||||
|  | ||||
| # Fixtures | ||||
| # ======== | ||||
| @pytest.fixture | ||||
| def df(): | ||||
|     """DataFrame with columns 'L1', 'L2', and 'L3'""" | ||||
|     return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]}) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]]) | ||||
| def df_levels(request, df): | ||||
|     """DataFrame with columns or index levels 'L1', 'L2', and 'L3'""" | ||||
|     levels = request.param | ||||
|  | ||||
|     if levels: | ||||
|         df = df.set_index(levels) | ||||
|  | ||||
|     return df | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df_ambig(df): | ||||
|     """DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'""" | ||||
|     df = df.set_index(["L1", "L2"]) | ||||
|  | ||||
|     df["L1"] = df["L3"] | ||||
|  | ||||
|     return df | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def df_duplabels(df): | ||||
|     """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'""" | ||||
|     df = df.set_index(["L1"]) | ||||
|     df = pd.concat([df, df["L2"]], axis=1) | ||||
|  | ||||
|     return df | ||||
|  | ||||
|  | ||||
| # Test is label/level reference | ||||
| # ============================= | ||||
| def get_labels_levels(df_levels): | ||||
|     expected_labels = list(df_levels.columns) | ||||
|     expected_levels = [name for name in df_levels.index.names if name is not None] | ||||
|     return expected_labels, expected_levels | ||||
|  | ||||
|  | ||||
| def assert_label_reference(frame, labels, axis): | ||||
|     for label in labels: | ||||
|         assert frame._is_label_reference(label, axis=axis) | ||||
|         assert not frame._is_level_reference(label, axis=axis) | ||||
|         assert frame._is_label_or_level_reference(label, axis=axis) | ||||
|  | ||||
|  | ||||
| def assert_level_reference(frame, levels, axis): | ||||
|     for level in levels: | ||||
|         assert frame._is_level_reference(level, axis=axis) | ||||
|         assert not frame._is_label_reference(level, axis=axis) | ||||
|         assert frame._is_label_or_level_reference(level, axis=axis) | ||||
|  | ||||
|  | ||||
| # DataFrame | ||||
| # --------- | ||||
| def test_is_level_or_label_reference_df_simple(df_levels, axis): | ||||
|     axis = df_levels._get_axis_number(axis) | ||||
|     # Compute expected labels and levels | ||||
|     expected_labels, expected_levels = get_labels_levels(df_levels) | ||||
|  | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_levels = df_levels.T | ||||
|  | ||||
|     # Perform checks | ||||
|     assert_level_reference(df_levels, expected_levels, axis=axis) | ||||
|     assert_label_reference(df_levels, expected_labels, axis=axis) | ||||
|  | ||||
|  | ||||
| def test_is_level_reference_df_ambig(df_ambig, axis): | ||||
|     axis = df_ambig._get_axis_number(axis) | ||||
|  | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_ambig = df_ambig.T | ||||
|  | ||||
|     # df has both an on-axis level and off-axis label named L1 | ||||
|     # Therefore L1 should reference the label, not the level | ||||
|     assert_label_reference(df_ambig, ["L1"], axis=axis) | ||||
|  | ||||
|     # df has an on-axis level named L2 and it is not ambiguous | ||||
|     # Therefore L2 is an level reference | ||||
|     assert_level_reference(df_ambig, ["L2"], axis=axis) | ||||
|  | ||||
|     # df has a column named L3 and it not an level reference | ||||
|     assert_label_reference(df_ambig, ["L3"], axis=axis) | ||||
|  | ||||
|  | ||||
| # Series | ||||
| # ------ | ||||
| def test_is_level_reference_series_simple_axis0(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|     assert_level_reference(s, ["L1"], axis=0) | ||||
|     assert not s._is_level_reference("L2") | ||||
|  | ||||
|     # Make series with L1 and L2 as index | ||||
|     s = df.set_index(["L1", "L2"]).L3 | ||||
|     assert_level_reference(s, ["L1", "L2"], axis=0) | ||||
|     assert not s._is_level_reference("L3") | ||||
|  | ||||
|  | ||||
| def test_is_level_reference_series_axis1_error(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No axis named 1"): | ||||
|         s._is_level_reference("L1", axis=1) | ||||
|  | ||||
|  | ||||
| # Test _check_label_or_level_ambiguity_df | ||||
| # ======================================= | ||||
|  | ||||
|  | ||||
| # DataFrame | ||||
| # --------- | ||||
| def test_check_label_or_level_ambiguity_df(df_ambig, axis): | ||||
|     axis = df_ambig._get_axis_number(axis) | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_ambig = df_ambig.T | ||||
|         msg = "'L1' is both a column level and an index label" | ||||
|  | ||||
|     else: | ||||
|         msg = "'L1' is both an index level and a column label" | ||||
|     # df_ambig has both an on-axis level and off-axis label named L1 | ||||
|     # Therefore, L1 is ambiguous. | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df_ambig._check_label_or_level_ambiguity("L1", axis=axis) | ||||
|  | ||||
|     # df_ambig has an on-axis level named L2,, and it is not ambiguous. | ||||
|     df_ambig._check_label_or_level_ambiguity("L2", axis=axis) | ||||
|  | ||||
|     # df_ambig has an off-axis label named L3, and it is not ambiguous | ||||
|     assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis) | ||||
|  | ||||
|  | ||||
| # Series | ||||
| # ------ | ||||
| def test_check_label_or_level_ambiguity_series(df): | ||||
|     # A series has no columns and therefore references are never ambiguous | ||||
|  | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|     s._check_label_or_level_ambiguity("L1", axis=0) | ||||
|     s._check_label_or_level_ambiguity("L2", axis=0) | ||||
|  | ||||
|     # Make series with L1 and L2 as index | ||||
|     s = df.set_index(["L1", "L2"]).L3 | ||||
|     s._check_label_or_level_ambiguity("L1", axis=0) | ||||
|     s._check_label_or_level_ambiguity("L2", axis=0) | ||||
|     s._check_label_or_level_ambiguity("L3", axis=0) | ||||
|  | ||||
|  | ||||
| def test_check_label_or_level_ambiguity_series_axis1_error(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No axis named 1"): | ||||
|         s._check_label_or_level_ambiguity("L1", axis=1) | ||||
|  | ||||
|  | ||||
| # Test _get_label_or_level_values | ||||
| # =============================== | ||||
| def assert_label_values(frame, labels, axis): | ||||
|     axis = frame._get_axis_number(axis) | ||||
|     for label in labels: | ||||
|         if axis == 0: | ||||
|             expected = frame[label]._values | ||||
|         else: | ||||
|             expected = frame.loc[label]._values | ||||
|  | ||||
|         result = frame._get_label_or_level_values(label, axis=axis) | ||||
|         assert array_equivalent(expected, result) | ||||
|  | ||||
|  | ||||
| def assert_level_values(frame, levels, axis): | ||||
|     axis = frame._get_axis_number(axis) | ||||
|     for level in levels: | ||||
|         if axis == 0: | ||||
|             expected = frame.index.get_level_values(level=level)._values | ||||
|         else: | ||||
|             expected = frame.columns.get_level_values(level=level)._values | ||||
|  | ||||
|         result = frame._get_label_or_level_values(level, axis=axis) | ||||
|         assert array_equivalent(expected, result) | ||||
|  | ||||
|  | ||||
| # DataFrame | ||||
| # --------- | ||||
| def test_get_label_or_level_values_df_simple(df_levels, axis): | ||||
|     # Compute expected labels and levels | ||||
|     expected_labels, expected_levels = get_labels_levels(df_levels) | ||||
|  | ||||
|     axis = df_levels._get_axis_number(axis) | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_levels = df_levels.T | ||||
|  | ||||
|     # Perform checks | ||||
|     assert_label_values(df_levels, expected_labels, axis=axis) | ||||
|     assert_level_values(df_levels, expected_levels, axis=axis) | ||||
|  | ||||
|  | ||||
| def test_get_label_or_level_values_df_ambig(df_ambig, axis): | ||||
|     axis = df_ambig._get_axis_number(axis) | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_ambig = df_ambig.T | ||||
|  | ||||
|     # df has an on-axis level named L2, and it is not ambiguous. | ||||
|     assert_level_values(df_ambig, ["L2"], axis=axis) | ||||
|  | ||||
|     # df has an off-axis label named L3, and it is not ambiguous. | ||||
|     assert_label_values(df_ambig, ["L3"], axis=axis) | ||||
|  | ||||
|  | ||||
| def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): | ||||
|     axis = df_duplabels._get_axis_number(axis) | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_duplabels = df_duplabels.T | ||||
|  | ||||
|     # df has unambiguous level 'L1' | ||||
|     assert_level_values(df_duplabels, ["L1"], axis=axis) | ||||
|  | ||||
|     # df has unique label 'L3' | ||||
|     assert_label_values(df_duplabels, ["L3"], axis=axis) | ||||
|  | ||||
|     # df has duplicate labels 'L2' | ||||
|     if axis == 0: | ||||
|         expected_msg = "The column label 'L2' is not unique" | ||||
|     else: | ||||
|         expected_msg = "The index label 'L2' is not unique" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=expected_msg): | ||||
|         assert_label_values(df_duplabels, ["L2"], axis=axis) | ||||
|  | ||||
|  | ||||
| # Series | ||||
| # ------ | ||||
| def test_get_label_or_level_values_series_axis0(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|     assert_level_values(s, ["L1"], axis=0) | ||||
|  | ||||
|     # Make series with L1 and L2 as index | ||||
|     s = df.set_index(["L1", "L2"]).L3 | ||||
|     assert_level_values(s, ["L1", "L2"], axis=0) | ||||
|  | ||||
|  | ||||
| def test_get_label_or_level_values_series_axis1_error(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No axis named 1"): | ||||
|         s._get_label_or_level_values("L1", axis=1) | ||||
|  | ||||
|  | ||||
| # Test _drop_labels_or_levels | ||||
| # =========================== | ||||
| def assert_labels_dropped(frame, labels, axis): | ||||
|     axis = frame._get_axis_number(axis) | ||||
|     for label in labels: | ||||
|         df_dropped = frame._drop_labels_or_levels(label, axis=axis) | ||||
|  | ||||
|         if axis == 0: | ||||
|             assert label in frame.columns | ||||
|             assert label not in df_dropped.columns | ||||
|         else: | ||||
|             assert label in frame.index | ||||
|             assert label not in df_dropped.index | ||||
|  | ||||
|  | ||||
| def assert_levels_dropped(frame, levels, axis): | ||||
|     axis = frame._get_axis_number(axis) | ||||
|     for level in levels: | ||||
|         df_dropped = frame._drop_labels_or_levels(level, axis=axis) | ||||
|  | ||||
|         if axis == 0: | ||||
|             assert level in frame.index.names | ||||
|             assert level not in df_dropped.index.names | ||||
|         else: | ||||
|             assert level in frame.columns.names | ||||
|             assert level not in df_dropped.columns.names | ||||
|  | ||||
|  | ||||
| # DataFrame | ||||
| # --------- | ||||
| def test_drop_labels_or_levels_df(df_levels, axis): | ||||
|     # Compute expected labels and levels | ||||
|     expected_labels, expected_levels = get_labels_levels(df_levels) | ||||
|  | ||||
|     axis = df_levels._get_axis_number(axis) | ||||
|     # Transpose frame if axis == 1 | ||||
|     if axis == 1: | ||||
|         df_levels = df_levels.T | ||||
|  | ||||
|     # Perform checks | ||||
|     assert_labels_dropped(df_levels, expected_labels, axis=axis) | ||||
|     assert_levels_dropped(df_levels, expected_levels, axis=axis) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="not valid labels or levels"): | ||||
|         df_levels._drop_labels_or_levels("L4", axis=axis) | ||||
|  | ||||
|  | ||||
| # Series | ||||
| # ------ | ||||
| def test_drop_labels_or_levels_series(df): | ||||
|     # Make series with L1 as index | ||||
|     s = df.set_index("L1").L2 | ||||
|     assert_levels_dropped(s, ["L1"], axis=0) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="not valid labels or levels"): | ||||
|         s._drop_labels_or_levels("L4", axis=0) | ||||
|  | ||||
|     # Make series with L1 and L2 as index | ||||
|     s = df.set_index(["L1", "L2"]).L3 | ||||
|     assert_levels_dropped(s, ["L1", "L2"], axis=0) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="not valid labels or levels"): | ||||
|         s._drop_labels_or_levels("L4", axis=0) | ||||
							
								
								
									
										159
									
								
								lib/python3.11/site-packages/pandas/tests/generic/test_series.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								lib/python3.11/site-packages/pandas/tests/generic/test_series.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,159 @@ | ||||
| from operator import methodcaller | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| class TestSeries: | ||||
|     @pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"]) | ||||
|     def test_set_axis_name_mi(self, func): | ||||
|         ser = Series( | ||||
|             [11, 21, 31], | ||||
|             index=MultiIndex.from_tuples( | ||||
|                 [("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"] | ||||
|             ), | ||||
|         ) | ||||
|  | ||||
|         result = methodcaller(func, ["L1", "L2"])(ser) | ||||
|         assert ser.index.name is None | ||||
|         assert ser.index.names == ["l1", "l2"] | ||||
|         assert result.index.name is None | ||||
|         assert result.index.names, ["L1", "L2"] | ||||
|  | ||||
|     def test_set_axis_name_raises(self): | ||||
|         ser = Series([1]) | ||||
|         msg = "No axis named 1 for object type Series" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             ser._set_axis_name(name="a", axis=1) | ||||
|  | ||||
|     def test_get_bool_data_preserve_dtype(self): | ||||
|         ser = Series([True, False, True]) | ||||
|         result = ser._get_bool_data() | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|     def test_nonzero_single_element(self): | ||||
|         # allow single item via bool method | ||||
|         msg_warn = ( | ||||
|             "Series.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         ser = Series([True]) | ||||
|         ser1 = Series([False]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             assert ser.bool() | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             assert not ser1.bool() | ||||
|  | ||||
|     @pytest.mark.parametrize("data", [np.nan, pd.NaT, True, False]) | ||||
|     def test_nonzero_single_element_raise_1(self, data): | ||||
|         # single item nan to raise | ||||
|         series = Series([data]) | ||||
|  | ||||
|         msg = "The truth value of a Series is ambiguous" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             bool(series) | ||||
|  | ||||
|     @pytest.mark.parametrize("data", [np.nan, pd.NaT]) | ||||
|     def test_nonzero_single_element_raise_2(self, data): | ||||
|         msg_warn = ( | ||||
|             "Series.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         msg_err = "bool cannot act on a non-boolean single element Series" | ||||
|         series = Series([data]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             with pytest.raises(ValueError, match=msg_err): | ||||
|                 series.bool() | ||||
|  | ||||
|     @pytest.mark.parametrize("data", [(True, True), (False, False)]) | ||||
|     def test_nonzero_multiple_element_raise(self, data): | ||||
|         # multiple bool are still an error | ||||
|         msg_warn = ( | ||||
|             "Series.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         msg_err = "The truth value of a Series is ambiguous" | ||||
|         series = Series([data]) | ||||
|         with pytest.raises(ValueError, match=msg_err): | ||||
|             bool(series) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             with pytest.raises(ValueError, match=msg_err): | ||||
|                 series.bool() | ||||
|  | ||||
|     @pytest.mark.parametrize("data", [1, 0, "a", 0.0]) | ||||
|     def test_nonbool_single_element_raise(self, data): | ||||
|         # single non-bool are an error | ||||
|         msg_warn = ( | ||||
|             "Series.bool is now deprecated and will be removed " | ||||
|             "in future version of pandas" | ||||
|         ) | ||||
|         msg_err1 = "The truth value of a Series is ambiguous" | ||||
|         msg_err2 = "bool cannot act on a non-boolean single element Series" | ||||
|         series = Series([data]) | ||||
|         with pytest.raises(ValueError, match=msg_err1): | ||||
|             bool(series) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg_warn): | ||||
|             with pytest.raises(ValueError, match=msg_err2): | ||||
|                 series.bool() | ||||
|  | ||||
|     def test_metadata_propagation_indiv_resample(self): | ||||
|         # resample | ||||
|         ts = Series( | ||||
|             np.random.default_rng(2).random(1000), | ||||
|             index=date_range("20130101", periods=1000, freq="s"), | ||||
|             name="foo", | ||||
|         ) | ||||
|         result = ts.resample("1min").mean() | ||||
|         tm.assert_metadata_equivalent(ts, result) | ||||
|  | ||||
|         result = ts.resample("1min").min() | ||||
|         tm.assert_metadata_equivalent(ts, result) | ||||
|  | ||||
|         result = ts.resample("1min").apply(lambda x: x.sum()) | ||||
|         tm.assert_metadata_equivalent(ts, result) | ||||
|  | ||||
|     def test_metadata_propagation_indiv(self, monkeypatch): | ||||
|         # check that the metadata matches up on the resulting ops | ||||
|  | ||||
|         ser = Series(range(3), range(3)) | ||||
|         ser.name = "foo" | ||||
|         ser2 = Series(range(3), range(3)) | ||||
|         ser2.name = "bar" | ||||
|  | ||||
|         result = ser.T | ||||
|         tm.assert_metadata_equivalent(ser, result) | ||||
|  | ||||
|         def finalize(self, other, method=None, **kwargs): | ||||
|             for name in self._metadata: | ||||
|                 if method == "concat" and name == "filename": | ||||
|                     value = "+".join( | ||||
|                         [ | ||||
|                             getattr(obj, name) | ||||
|                             for obj in other.objs | ||||
|                             if getattr(obj, name, None) | ||||
|                         ] | ||||
|                     ) | ||||
|                     object.__setattr__(self, name, value) | ||||
|                 else: | ||||
|                     object.__setattr__(self, name, getattr(other, name, None)) | ||||
|  | ||||
|             return self | ||||
|  | ||||
|         with monkeypatch.context() as m: | ||||
|             m.setattr(Series, "_metadata", ["name", "filename"]) | ||||
|             m.setattr(Series, "__finalize__", finalize) | ||||
|  | ||||
|             ser.filename = "foo" | ||||
|             ser2.filename = "bar" | ||||
|  | ||||
|             result = pd.concat([ser, ser2]) | ||||
|             assert result.filename == "foo+bar" | ||||
|             assert result.name is None | ||||
| @ -0,0 +1,144 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     Categorical, | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     StringDtype, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.util.version import Version | ||||
|  | ||||
| xarray = pytest.importorskip("xarray") | ||||
|  | ||||
|  | ||||
| class TestDataFrameToXArray: | ||||
|     @pytest.fixture | ||||
|     def df(self): | ||||
|         return DataFrame( | ||||
|             { | ||||
|                 "a": list("abcd"), | ||||
|                 "b": list(range(1, 5)), | ||||
|                 "c": np.arange(3, 7).astype("u1"), | ||||
|                 "d": np.arange(4.0, 8.0, dtype="float64"), | ||||
|                 "e": [True, False, True, False], | ||||
|                 "f": Categorical(list("abcd")), | ||||
|                 "g": date_range("20130101", periods=4), | ||||
|                 "h": date_range("20130101", periods=4, tz="US/Eastern"), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|     def test_to_xarray_index_types(self, index_flat, df, using_infer_string): | ||||
|         index = index_flat | ||||
|         # MultiIndex is tested in test_to_xarray_with_multiindex | ||||
|         if len(index) == 0: | ||||
|             pytest.skip("Test doesn't make sense for empty index") | ||||
|  | ||||
|         from xarray import Dataset | ||||
|  | ||||
|         df.index = index[:4] | ||||
|         df.index.name = "foo" | ||||
|         df.columns.name = "bar" | ||||
|         result = df.to_xarray() | ||||
|         assert result.sizes["foo"] == 4 | ||||
|         assert len(result.coords) == 1 | ||||
|         assert len(result.data_vars) == 8 | ||||
|         tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) | ||||
|         assert isinstance(result, Dataset) | ||||
|  | ||||
|         # idempotency | ||||
|         # datetimes w/tz are preserved | ||||
|         # column names are lost | ||||
|         expected = df.copy() | ||||
|         expected["f"] = expected["f"].astype( | ||||
|             object if not using_infer_string else "str" | ||||
|         ) | ||||
|         expected.columns.name = None | ||||
|         tm.assert_frame_equal(result.to_dataframe(), expected) | ||||
|  | ||||
|     def test_to_xarray_empty(self, df): | ||||
|         from xarray import Dataset | ||||
|  | ||||
|         df.index.name = "foo" | ||||
|         result = df[0:0].to_xarray() | ||||
|         assert result.sizes["foo"] == 0 | ||||
|         assert isinstance(result, Dataset) | ||||
|  | ||||
|     def test_to_xarray_with_multiindex(self, df, using_infer_string): | ||||
|         from xarray import Dataset | ||||
|  | ||||
|         # MultiIndex | ||||
|         df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"]) | ||||
|         result = df.to_xarray() | ||||
|         assert result.sizes["one"] == 1 | ||||
|         assert result.sizes["two"] == 4 | ||||
|         assert len(result.coords) == 2 | ||||
|         assert len(result.data_vars) == 8 | ||||
|         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) | ||||
|         assert isinstance(result, Dataset) | ||||
|  | ||||
|         result = result.to_dataframe() | ||||
|         expected = df.copy() | ||||
|         expected["f"] = expected["f"].astype( | ||||
|             object if not using_infer_string else "str" | ||||
|         ) | ||||
|         expected.columns.name = None | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| class TestSeriesToXArray: | ||||
|     def test_to_xarray_index_types(self, index_flat, request): | ||||
|         index = index_flat | ||||
|         if ( | ||||
|             isinstance(index.dtype, StringDtype) | ||||
|             and index.dtype.storage == "pyarrow" | ||||
|             and Version(xarray.__version__) > Version("2024.9.0") | ||||
|             and Version(xarray.__version__) < Version("2025.6.0") | ||||
|         ): | ||||
|             request.applymarker( | ||||
|                 pytest.mark.xfail( | ||||
|                     reason="xarray calling reshape of ArrowExtensionArray", | ||||
|                     raises=NotImplementedError, | ||||
|                 ) | ||||
|             ) | ||||
|         # MultiIndex is tested in test_to_xarray_with_multiindex | ||||
|  | ||||
|         from xarray import DataArray | ||||
|  | ||||
|         ser = Series(range(len(index)), index=index, dtype="int64") | ||||
|         ser.index.name = "foo" | ||||
|         result = ser.to_xarray() | ||||
|         repr(result) | ||||
|         assert len(result) == len(index) | ||||
|         assert len(result.coords) == 1 | ||||
|         tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) | ||||
|         assert isinstance(result, DataArray) | ||||
|  | ||||
|         # idempotency | ||||
|         tm.assert_series_equal(result.to_series(), ser) | ||||
|  | ||||
|     def test_to_xarray_empty(self): | ||||
|         from xarray import DataArray | ||||
|  | ||||
|         ser = Series([], dtype=object) | ||||
|         ser.index.name = "foo" | ||||
|         result = ser.to_xarray() | ||||
|         assert len(result) == 0 | ||||
|         assert len(result.coords) == 1 | ||||
|         tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) | ||||
|         assert isinstance(result, DataArray) | ||||
|  | ||||
|     def test_to_xarray_with_multiindex(self): | ||||
|         from xarray import DataArray | ||||
|  | ||||
|         mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"]) | ||||
|         ser = Series(range(6), dtype="int64", index=mi) | ||||
|         result = ser.to_xarray() | ||||
|         assert len(result) == 2 | ||||
|         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) | ||||
|         assert isinstance(result, DataArray) | ||||
|         res = result.to_series() | ||||
|         tm.assert_series_equal(res, ser) | ||||
		Reference in New Issue
	
	Block a user