done
This commit is contained in:
		| @ -0,0 +1,7 @@ | ||||
| from pandas.core.groupby.base import transformation_kernels | ||||
|  | ||||
| # There is no Series.cumcount or DataFrame.cumcount | ||||
| series_transform_kernels = [ | ||||
|     x for x in sorted(transformation_kernels) if x != "cumcount" | ||||
| ] | ||||
| frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] | ||||
							
								
								
									
										1739
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_frame_apply.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1739
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_frame_apply.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,113 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat.numpy import np_version_gte1p25 | ||||
|  | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_agg_relabel(): | ||||
|     # GH 26513 | ||||
|     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) | ||||
|  | ||||
|     # simplest case with one column, one func | ||||
|     result = df.agg(foo=("B", "sum")) | ||||
|     expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"])) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # test on same column with different methods | ||||
|     result = df.agg(foo=("B", "sum"), bar=("B", "min")) | ||||
|     expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"])) | ||||
|  | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_agg_relabel_multi_columns_multi_methods(): | ||||
|     # GH 26513, test on multiple columns with multiple methods | ||||
|     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) | ||||
|     result = df.agg( | ||||
|         foo=("A", "sum"), | ||||
|         bar=("B", "mean"), | ||||
|         cat=("A", "min"), | ||||
|         dat=("B", "max"), | ||||
|         f=("A", "max"), | ||||
|         g=("C", "min"), | ||||
|     ) | ||||
|     expected = pd.DataFrame( | ||||
|         { | ||||
|             "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan], | ||||
|             "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan], | ||||
|             "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0], | ||||
|         }, | ||||
|         index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min") | ||||
| def test_agg_relabel_partial_functions(): | ||||
|     # GH 26513, test on partial, functools or more complex cases | ||||
|     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) | ||||
|     msg = "using Series.[mean|min]" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) | ||||
|     expected = pd.DataFrame( | ||||
|         {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     msg = "using Series.[mean|min|max|sum]" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = df.agg( | ||||
|             foo=("A", min), | ||||
|             bar=("A", np.min), | ||||
|             cat=("B", max), | ||||
|             dat=("C", "min"), | ||||
|             f=("B", np.sum), | ||||
|             kk=("B", lambda x: min(x)), | ||||
|         ) | ||||
|     expected = pd.DataFrame( | ||||
|         { | ||||
|             "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan], | ||||
|             "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0], | ||||
|             "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan], | ||||
|         }, | ||||
|         index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_agg_namedtuple(): | ||||
|     # GH 26513 | ||||
|     df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) | ||||
|     result = df.agg( | ||||
|         foo=pd.NamedAgg("B", "sum"), | ||||
|         bar=pd.NamedAgg("B", "min"), | ||||
|         cat=pd.NamedAgg(column="B", aggfunc="count"), | ||||
|         fft=pd.NamedAgg("B", aggfunc="max"), | ||||
|     ) | ||||
|  | ||||
|     expected = pd.DataFrame( | ||||
|         {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"]) | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     result = df.agg( | ||||
|         foo=pd.NamedAgg("A", "min"), | ||||
|         bar=pd.NamedAgg(column="B", aggfunc="max"), | ||||
|         cat=pd.NamedAgg(column="A", aggfunc="max"), | ||||
|     ) | ||||
|     expected = pd.DataFrame( | ||||
|         {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]}, | ||||
|         index=pd.Index(["foo", "bar", "cat"]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reconstruct_func(): | ||||
|     # GH 28472, test to ensure reconstruct_func isn't moved; | ||||
|     # This method is used by other libraries (e.g. dask) | ||||
|     result = pd.core.apply.reconstruct_func("min") | ||||
|     expected = (False, "min", None, None) | ||||
|     tm.assert_equal(result, expected) | ||||
| @ -0,0 +1,264 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.apply.common import frame_transform_kernels | ||||
| from pandas.tests.frame.common import zip_frames | ||||
|  | ||||
|  | ||||
| def unpack_obj(obj, klass, axis): | ||||
|     """ | ||||
|     Helper to ensure we have the right type of object for a test parametrized | ||||
|     over frame_or_series. | ||||
|     """ | ||||
|     if klass is not DataFrame: | ||||
|         obj = obj["A"] | ||||
|         if axis != 0: | ||||
|             pytest.skip(f"Test is only for DataFrame with axis={axis}") | ||||
|     return obj | ||||
|  | ||||
|  | ||||
| def test_transform_ufunc(axis, float_frame, frame_or_series): | ||||
|     # GH 35964 | ||||
|     obj = unpack_obj(float_frame, frame_or_series, axis) | ||||
|  | ||||
|     with np.errstate(all="ignore"): | ||||
|         f_sqrt = np.sqrt(obj) | ||||
|  | ||||
|     # ufunc | ||||
|     result = obj.transform(np.sqrt, axis=axis) | ||||
|     expected = f_sqrt | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, names", | ||||
|     [ | ||||
|         ([np.sqrt], ["sqrt"]), | ||||
|         ([np.abs, np.sqrt], ["absolute", "sqrt"]), | ||||
|         (np.array([np.sqrt]), ["sqrt"]), | ||||
|         (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), | ||||
|     ], | ||||
| ) | ||||
| def test_transform_listlike(axis, float_frame, ops, names): | ||||
|     # GH 35964 | ||||
|     other_axis = 1 if axis in {0, "index"} else 0 | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = zip_frames([op(float_frame) for op in ops], axis=other_axis) | ||||
|     if axis in {0, "index"}: | ||||
|         expected.columns = MultiIndex.from_product([float_frame.columns, names]) | ||||
|     else: | ||||
|         expected.index = MultiIndex.from_product([float_frame.index, names]) | ||||
|     result = float_frame.transform(ops, axis=axis) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("ops", [[], np.array([])]) | ||||
| def test_transform_empty_listlike(float_frame, ops, frame_or_series): | ||||
|     obj = unpack_obj(float_frame, frame_or_series, 0) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No transform functions were provided"): | ||||
|         obj.transform(ops) | ||||
|  | ||||
|  | ||||
| def test_transform_listlike_func_with_args(): | ||||
|     # GH 50624 | ||||
|     df = DataFrame({"x": [1, 2, 3]}) | ||||
|  | ||||
|     def foo1(x, a=1, c=0): | ||||
|         return x + a + c | ||||
|  | ||||
|     def foo2(x, b=2, c=0): | ||||
|         return x + b + c | ||||
|  | ||||
|     msg = r"foo1\(\) got an unexpected keyword argument 'b'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.transform([foo1, foo2], 0, 3, b=3, c=4) | ||||
|  | ||||
|     result = df.transform([foo1, foo2], 0, 3, c=4) | ||||
|     expected = DataFrame( | ||||
|         [[8, 8], [9, 9], [10, 10]], | ||||
|         columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [dict, Series]) | ||||
| def test_transform_dictlike(axis, float_frame, box): | ||||
|     # GH 35964 | ||||
|     if axis in (0, "index"): | ||||
|         e = float_frame.columns[0] | ||||
|         expected = float_frame[[e]].transform(np.abs) | ||||
|     else: | ||||
|         e = float_frame.index[0] | ||||
|         expected = float_frame.iloc[[0]].transform(np.abs) | ||||
|     result = float_frame.transform(box({e: np.abs}), axis=axis) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_transform_dictlike_mixed(): | ||||
|     # GH 40018 - mix of lists and non-lists in values of a dictionary | ||||
|     df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) | ||||
|     result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) | ||||
|     expected = DataFrame( | ||||
|         [[1.0, 1, 1.0], [2.0, 4, 2.0]], | ||||
|         columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops", | ||||
|     [ | ||||
|         {}, | ||||
|         {"A": []}, | ||||
|         {"A": [], "B": "cumsum"}, | ||||
|         {"A": "cumsum", "B": []}, | ||||
|         {"A": [], "B": ["cumsum"]}, | ||||
|         {"A": ["cumsum"], "B": []}, | ||||
|     ], | ||||
| ) | ||||
| def test_transform_empty_dictlike(float_frame, ops, frame_or_series): | ||||
|     obj = unpack_obj(float_frame, frame_or_series, 0) | ||||
|  | ||||
|     with pytest.raises(ValueError, match="No transform functions were provided"): | ||||
|         obj.transform(ops) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("use_apply", [True, False]) | ||||
| def test_transform_udf(axis, float_frame, use_apply, frame_or_series): | ||||
|     # GH 35964 | ||||
|     obj = unpack_obj(float_frame, frame_or_series, axis) | ||||
|  | ||||
|     # transform uses UDF either via apply or passing the entire DataFrame | ||||
|     def func(x): | ||||
|         # transform is using apply iff x is not a DataFrame | ||||
|         if use_apply == isinstance(x, frame_or_series): | ||||
|             # Force transform to fallback | ||||
|             raise ValueError | ||||
|         return x + 1 | ||||
|  | ||||
|     result = obj.transform(func, axis=axis) | ||||
|     expected = obj + 1 | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] | ||||
| frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) | ||||
| def test_transform_bad_dtype(op, frame_or_series, request): | ||||
|     # GH 35964 | ||||
|     if op == "ngroup": | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") | ||||
|         ) | ||||
|  | ||||
|     obj = DataFrame({"A": 3 * [object]})  # DataFrame that will fail on most transforms | ||||
|     obj = tm.get_obj(obj, frame_or_series) | ||||
|     error = TypeError | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             "not supported between instances of 'type' and 'type'", | ||||
|             "unsupported operand type", | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         obj.transform(op) | ||||
|     with pytest.raises(error, match=msg): | ||||
|         obj.transform([op]) | ||||
|     with pytest.raises(error, match=msg): | ||||
|         obj.transform({"A": op}) | ||||
|     with pytest.raises(error, match=msg): | ||||
|         obj.transform({"A": [op]}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", frame_kernels_raise) | ||||
| def test_transform_failure_typeerror(request, op): | ||||
|     # GH 35964 | ||||
|  | ||||
|     if op == "ngroup": | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") | ||||
|         ) | ||||
|  | ||||
|     # Using object makes most transform kernels fail | ||||
|     df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) | ||||
|     error = TypeError | ||||
|     msg = "|".join( | ||||
|         [ | ||||
|             "not supported between instances of 'type' and 'type'", | ||||
|             "unsupported operand type", | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         df.transform([op]) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         df.transform({"A": op, "B": op}) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         df.transform({"A": [op], "B": [op]}) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         df.transform({"A": [op, "shift"], "B": [op]}) | ||||
|  | ||||
|  | ||||
| def test_transform_failure_valueerror(): | ||||
|     # GH 40211 | ||||
|     def op(x): | ||||
|         if np.sum(np.sum(x)) < 10: | ||||
|             raise ValueError | ||||
|         return x | ||||
|  | ||||
|     df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) | ||||
|     msg = "Transform function failed" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.transform([op]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.transform({"A": op, "B": op}) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.transform({"A": [op], "B": [op]}) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.transform({"A": [op, "shift"], "B": [op]}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("use_apply", [True, False]) | ||||
| def test_transform_passes_args(use_apply, frame_or_series): | ||||
|     # GH 35964 | ||||
|     # transform uses UDF either via apply or passing the entire DataFrame | ||||
|     expected_args = [1, 2] | ||||
|     expected_kwargs = {"c": 3} | ||||
|  | ||||
|     def f(x, a, b, c): | ||||
|         # transform is using apply iff x is not a DataFrame | ||||
|         if use_apply == isinstance(x, frame_or_series): | ||||
|             # Force transform to fallback | ||||
|             raise ValueError | ||||
|         assert [a, b] == expected_args | ||||
|         assert c == expected_kwargs["c"] | ||||
|         return x | ||||
|  | ||||
|     frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs) | ||||
|  | ||||
|  | ||||
| def test_transform_empty_dataframe(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/39636 | ||||
|     df = DataFrame([], columns=["col1", "col2"]) | ||||
|     result = df.transform(lambda x: x + 10) | ||||
|     tm.assert_frame_equal(result, df) | ||||
|  | ||||
|     result = df["col1"].transform(lambda x: x + 10) | ||||
|     tm.assert_series_equal(result, df["col1"]) | ||||
| @ -0,0 +1,363 @@ | ||||
| # Tests specifically aimed at detecting bad arguments. | ||||
| # This file is organized by reason for exception. | ||||
| #     1. always invalid argument values | ||||
| #     2. missing column(s) | ||||
| #     3. incompatible ops/dtype/args/kwargs | ||||
| #     4. invalid result shape/type | ||||
| # If your test does not fit into one of these categories, add to this list. | ||||
|  | ||||
| from itertools import chain | ||||
| import re | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.errors import SpecificationError | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
|     date_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("result_type", ["foo", 1]) | ||||
| def test_result_type_error(result_type): | ||||
|     # allowed result_type | ||||
|     df = DataFrame( | ||||
|         np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1, | ||||
|         columns=["A", "B", "C"], | ||||
|     ) | ||||
|  | ||||
|     msg = ( | ||||
|         "invalid value for result_type, must be one of " | ||||
|         "{None, 'reduce', 'broadcast', 'expand'}" | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type) | ||||
|  | ||||
|  | ||||
| def test_apply_invalid_axis_value(): | ||||
|     df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) | ||||
|     msg = "No axis named 2 for object type DataFrame" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.apply(lambda x: x, 2) | ||||
|  | ||||
|  | ||||
| def test_agg_raises(): | ||||
|     # GH 26513 | ||||
|     df = DataFrame({"A": [0, 1], "B": [1, 2]}) | ||||
|     msg = "Must provide" | ||||
|  | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.agg() | ||||
|  | ||||
|  | ||||
| def test_map_with_invalid_na_action_raises(): | ||||
|     # https://github.com/pandas-dev/pandas/issues/32815 | ||||
|     s = Series([1, 2, 3]) | ||||
|     msg = "na_action must either be 'ignore' or None" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         s.map(lambda x: x, na_action="____") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("input_na_action", ["____", True]) | ||||
| def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action): | ||||
|     # https://github.com/pandas-dev/pandas/issues/46588 | ||||
|     s = Series([1, 2, 3]) | ||||
|     msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         s.map({1: 2}, na_action=input_na_action) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) | ||||
| @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}]) | ||||
| def test_nested_renamer(frame_or_series, method, func): | ||||
|     # GH 35964 | ||||
|     obj = frame_or_series({"A": [1]}) | ||||
|     match = "nested renamer is not supported" | ||||
|     with pytest.raises(SpecificationError, match=match): | ||||
|         getattr(obj, method)(func) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "renamer", | ||||
|     [{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}], | ||||
| ) | ||||
| def test_series_nested_renamer(renamer): | ||||
|     s = Series(range(6), dtype="int64", name="series") | ||||
|     msg = "nested renamer is not supported" | ||||
|     with pytest.raises(SpecificationError, match=msg): | ||||
|         s.agg(renamer) | ||||
|  | ||||
|  | ||||
| def test_apply_dict_depr(): | ||||
|     tsdf = DataFrame( | ||||
|         np.random.default_rng(2).standard_normal((10, 3)), | ||||
|         columns=["A", "B", "C"], | ||||
|         index=date_range("1/1/2000", periods=10), | ||||
|     ) | ||||
|     msg = "nested renamer is not supported" | ||||
|     with pytest.raises(SpecificationError, match=msg): | ||||
|         tsdf.A.agg({"foo": ["sum", "mean"]}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["agg", "transform"]) | ||||
| def test_dict_nested_renaming_depr(method): | ||||
|     df = DataFrame({"A": range(5), "B": 5}) | ||||
|  | ||||
|     # nested renaming | ||||
|     msg = r"nested renamer is not supported" | ||||
|     with pytest.raises(SpecificationError, match=msg): | ||||
|         getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["apply", "agg", "transform"]) | ||||
| @pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}]) | ||||
| def test_missing_column(method, func): | ||||
|     # GH 40004 | ||||
|     obj = DataFrame({"A": [1]}) | ||||
|     match = re.escape("Column(s) ['B'] do not exist") | ||||
|     with pytest.raises(KeyError, match=match): | ||||
|         getattr(obj, method)(func) | ||||
|  | ||||
|  | ||||
| def test_transform_mixed_column_name_dtypes(): | ||||
|     # GH39025 | ||||
|     df = DataFrame({"a": ["1"]}) | ||||
|     msg = r"Column\(s\) \[1, 'b'\] do not exist" | ||||
|     with pytest.raises(KeyError, match=msg): | ||||
|         df.transform({"a": int, 1: str, "b": int}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)] | ||||
| ) | ||||
| def test_apply_str_axis_1_raises(how, args): | ||||
|     # GH 39211 - some ops don't support axis=1 | ||||
|     df = DataFrame({"a": [1, 2], "b": [3, 4]}) | ||||
|     msg = f"Operation {how} does not support axis=1" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.apply(how, axis=1, args=args) | ||||
|  | ||||
|  | ||||
| def test_transform_axis_1_raises(): | ||||
|     # GH 35964 | ||||
|     msg = "No axis named 1 for object type Series" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         Series([1]).transform("sum", axis=1) | ||||
|  | ||||
|  | ||||
| def test_apply_modify_traceback(): | ||||
|     data = DataFrame( | ||||
|         { | ||||
|             "A": [ | ||||
|                 "foo", | ||||
|                 "foo", | ||||
|                 "foo", | ||||
|                 "foo", | ||||
|                 "bar", | ||||
|                 "bar", | ||||
|                 "bar", | ||||
|                 "bar", | ||||
|                 "foo", | ||||
|                 "foo", | ||||
|                 "foo", | ||||
|             ], | ||||
|             "B": [ | ||||
|                 "one", | ||||
|                 "one", | ||||
|                 "one", | ||||
|                 "two", | ||||
|                 "one", | ||||
|                 "one", | ||||
|                 "one", | ||||
|                 "two", | ||||
|                 "two", | ||||
|                 "two", | ||||
|                 "one", | ||||
|             ], | ||||
|             "C": [ | ||||
|                 "dull", | ||||
|                 "dull", | ||||
|                 "shiny", | ||||
|                 "dull", | ||||
|                 "dull", | ||||
|                 "shiny", | ||||
|                 "shiny", | ||||
|                 "dull", | ||||
|                 "shiny", | ||||
|                 "shiny", | ||||
|                 "shiny", | ||||
|             ], | ||||
|             "D": np.random.default_rng(2).standard_normal(11), | ||||
|             "E": np.random.default_rng(2).standard_normal(11), | ||||
|             "F": np.random.default_rng(2).standard_normal(11), | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     data.loc[4, "C"] = np.nan | ||||
|  | ||||
|     def transform(row): | ||||
|         if row["C"].startswith("shin") and row["A"] == "foo": | ||||
|             row["D"] = 7 | ||||
|         return row | ||||
|  | ||||
|     msg = "'float' object has no attribute 'startswith'" | ||||
|     with pytest.raises(AttributeError, match=msg): | ||||
|         data.apply(transform, axis=1) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "df, func, expected", | ||||
|     tm.get_cython_table_params( | ||||
|         DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]] | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string): | ||||
|     # GH 21224 | ||||
|     if using_infer_string: | ||||
|         expected = (expected, NotImplementedError) | ||||
|  | ||||
|     msg = ( | ||||
|         "can't multiply sequence by non-int of type 'str'" | ||||
|         "|cannot perform cumprod with type str"  # NotImplementedError python backend | ||||
|         "|operation 'cumprod' not supported for dtype 'str'"  # TypeError pyarrow | ||||
|     ) | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|     with pytest.raises(expected, match=msg): | ||||
|         with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"): | ||||
|             df.agg(func, axis=axis) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "series, func, expected", | ||||
|     chain( | ||||
|         tm.get_cython_table_params( | ||||
|             Series("a b c".split()), | ||||
|             [ | ||||
|                 ("mean", TypeError),  # mean raises TypeError | ||||
|                 ("prod", TypeError), | ||||
|                 ("std", TypeError), | ||||
|                 ("var", TypeError), | ||||
|                 ("median", TypeError), | ||||
|                 ("cumprod", TypeError), | ||||
|             ], | ||||
|         ) | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_raises_series(series, func, expected, using_infer_string): | ||||
|     # GH21224 | ||||
|     msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type" | ||||
|     if func == "median" or func is np.nanmedian or func is np.median: | ||||
|         msg = r"Cannot convert \['a' 'b' 'c'\] to numeric" | ||||
|  | ||||
|     if using_infer_string and func in ("cumprod", np.cumprod, np.nancumprod): | ||||
|         expected = (expected, NotImplementedError) | ||||
|  | ||||
|     msg = ( | ||||
|         msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation" | ||||
|     ) | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|  | ||||
|     with pytest.raises(expected, match=msg): | ||||
|         # e.g. Series('a b'.split()).cumprod() will raise | ||||
|         with tm.assert_produces_warning(warn, match="is currently using Series.*"): | ||||
|             series.agg(func) | ||||
|  | ||||
|  | ||||
| def test_agg_none_to_type(): | ||||
|     # GH 40543 | ||||
|     df = DataFrame({"a": [None]}) | ||||
|     msg = re.escape("int() argument must be a string") | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.agg({"a": lambda x: int(x.iloc[0])}) | ||||
|  | ||||
|  | ||||
| def test_transform_none_to_type(): | ||||
|     # GH#34377 | ||||
|     df = DataFrame({"a": [None]}) | ||||
|     msg = "argument must be a" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         df.transform({"a": lambda x: int(x.iloc[0])}) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "func", | ||||
|     [ | ||||
|         lambda x: np.array([1, 2]).reshape(-1, 2), | ||||
|         lambda x: [1, 2], | ||||
|         lambda x: Series([1, 2]), | ||||
|     ], | ||||
| ) | ||||
| def test_apply_broadcast_error(func): | ||||
|     df = DataFrame( | ||||
|         np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1, | ||||
|         columns=["A", "B", "C"], | ||||
|     ) | ||||
|  | ||||
|     # > 1 ndim | ||||
|     msg = "too many dims to broadcast|cannot broadcast result" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         df.apply(func, axis=1, result_type="broadcast") | ||||
|  | ||||
|  | ||||
| def test_transform_and_agg_err_agg(axis, float_frame): | ||||
|     # cannot both transform and agg | ||||
|     msg = "cannot combine transform and aggregation operations" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         with np.errstate(all="ignore"): | ||||
|             float_frame.agg(["max", "sqrt"], axis=axis) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore::FutureWarning")  # GH53325 | ||||
| @pytest.mark.parametrize( | ||||
|     "func, msg", | ||||
|     [ | ||||
|         (["sqrt", "max"], "cannot combine transform and aggregation"), | ||||
|         ( | ||||
|             {"foo": np.sqrt, "bar": "sum"}, | ||||
|             "cannot perform both aggregation and transformation", | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_transform_and_agg_err_series(string_series, func, msg): | ||||
|     # we are trying to transform with an aggregator | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         with np.errstate(all="ignore"): | ||||
|             string_series.agg(func) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) | ||||
| def test_transform_wont_agg_frame(axis, float_frame, func): | ||||
|     # GH 35964 | ||||
|     # cannot both transform and agg | ||||
|     msg = "Function did not transform" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         float_frame.transform(func, axis=axis) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]]) | ||||
| def test_transform_wont_agg_series(string_series, func): | ||||
|     # GH 35964 | ||||
|     # we are trying to transform with an aggregator | ||||
|     msg = "Function did not transform" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         string_series.transform(func) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] | ||||
| ) | ||||
| def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): | ||||
|     # GH 35964 | ||||
|     op = op_wrapper(all_reductions) | ||||
|  | ||||
|     obj = DataFrame({"A": [1, 2, 3]}) | ||||
|     obj = tm.get_obj(obj, frame_or_series) | ||||
|  | ||||
|     msg = "Function did not transform" | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         obj.transform(op) | ||||
							
								
								
									
										129
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_numba.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_numba.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,129 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import is_platform_arm | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.util.version import Version | ||||
|  | ||||
| pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu, pytest.mark.skipif()] | ||||
|  | ||||
| numba = pytest.importorskip("numba") | ||||
| pytestmark.append( | ||||
|     pytest.mark.skipif( | ||||
|         Version(numba.__version__) == Version("0.61") and is_platform_arm(), | ||||
|         reason=f"Segfaults on ARM platforms with numba {numba.__version__}", | ||||
|     ) | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[0, 1]) | ||||
| def apply_axis(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_numba_vs_python_noop(float_frame, apply_axis): | ||||
|     func = lambda x: x | ||||
|     result = float_frame.apply(func, engine="numba", axis=apply_axis) | ||||
|     expected = float_frame.apply(func, engine="python", axis=apply_axis) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_numba_vs_python_string_index(): | ||||
|     # GH#56189 | ||||
|     df = DataFrame( | ||||
|         1, | ||||
|         index=Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)), | ||||
|         columns=Index(["x", "y"], dtype=pd.StringDtype(na_value=np.nan)), | ||||
|     ) | ||||
|     func = lambda x: x | ||||
|     result = df.apply(func, engine="numba", axis=0) | ||||
|     expected = df.apply(func, engine="python", axis=0) | ||||
|     tm.assert_frame_equal( | ||||
|         result, expected, check_column_type=False, check_index_type=False | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_numba_vs_python_indexing(): | ||||
|     frame = DataFrame( | ||||
|         {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]}, | ||||
|         index=Index(["A", "B", "C"]), | ||||
|     ) | ||||
|     row_func = lambda x: x["c"] | ||||
|     result = frame.apply(row_func, engine="numba", axis=1) | ||||
|     expected = frame.apply(row_func, engine="python", axis=1) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     col_func = lambda x: x["A"] | ||||
|     result = frame.apply(col_func, engine="numba", axis=0) | ||||
|     expected = frame.apply(col_func, engine="python", axis=0) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "reduction", | ||||
|     [lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()], | ||||
| ) | ||||
| def test_numba_vs_python_reductions(reduction, apply_axis): | ||||
|     df = DataFrame(np.ones((4, 4), dtype=np.float64)) | ||||
|     result = df.apply(reduction, engine="numba", axis=apply_axis) | ||||
|     expected = df.apply(reduction, engine="python", axis=apply_axis) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]]) | ||||
| def test_numba_numeric_colnames(colnames): | ||||
|     # Check that numeric column names lower properly and can be indxed on | ||||
|     df = DataFrame( | ||||
|         np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames | ||||
|     ) | ||||
|     first_col = colnames[0] | ||||
|     f = lambda x: x[first_col]  # Get the first column | ||||
|     result = df.apply(f, engine="numba", axis=1) | ||||
|     expected = df.apply(f, engine="python", axis=1) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_numba_parallel_unsupported(float_frame): | ||||
|     f = lambda x: x | ||||
|     with pytest.raises( | ||||
|         NotImplementedError, | ||||
|         match="Parallel apply is not supported when raw=False and engine='numba'", | ||||
|     ): | ||||
|         float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True}) | ||||
|  | ||||
|  | ||||
| def test_numba_nonunique_unsupported(apply_axis): | ||||
|     f = lambda x: x | ||||
|     df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"])) | ||||
|     with pytest.raises( | ||||
|         NotImplementedError, | ||||
|         match="The index/columns must be unique when raw=False and engine='numba'", | ||||
|     ): | ||||
|         df.apply(f, engine="numba", axis=apply_axis) | ||||
|  | ||||
|  | ||||
| def test_numba_unsupported_dtypes(apply_axis): | ||||
|     pytest.importorskip("pyarrow") | ||||
|     f = lambda x: x | ||||
|     df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]}) | ||||
|     df["c"] = df["c"].astype("double[pyarrow]") | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, | ||||
|         match="Column b must have a numeric dtype. Found 'object|str' instead", | ||||
|     ): | ||||
|         df.apply(f, engine="numba", axis=apply_axis) | ||||
|  | ||||
|     with pytest.raises( | ||||
|         ValueError, | ||||
|         match="Column c is backed by an extension array, " | ||||
|         "which is not supported by the numba engine.", | ||||
|     ): | ||||
|         df["c"].to_frame().apply(f, engine="numba", axis=apply_axis) | ||||
| @ -0,0 +1,701 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     concat, | ||||
|     date_range, | ||||
|     timedelta_range, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.apply.common import series_transform_kernels | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[False, "compat"]) | ||||
| def by_row(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| def test_series_map_box_timedelta(by_row): | ||||
|     # GH#11349 | ||||
|     ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h")) | ||||
|  | ||||
|     def f(x): | ||||
|         return x.total_seconds() if by_row else x.dt.total_seconds() | ||||
|  | ||||
|     result = ser.apply(f, by_row=by_row) | ||||
|  | ||||
|     expected = ser.map(lambda x: x.total_seconds()) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     expected = Series([86401.0, 90001.0, 93601.0]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply(datetime_series, by_row): | ||||
|     result = datetime_series.apply(np.sqrt, by_row=by_row) | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = np.sqrt(datetime_series) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # element-wise apply (ufunc) | ||||
|     result = datetime_series.apply(np.exp, by_row=by_row) | ||||
|     expected = np.exp(datetime_series) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     # empty series | ||||
|     s = Series(dtype=object, name="foo", index=Index([], name="bar")) | ||||
|     rs = s.apply(lambda x: x, by_row=by_row) | ||||
|     tm.assert_series_equal(s, rs) | ||||
|  | ||||
|     # check all metadata (GH 9322) | ||||
|     assert s is not rs | ||||
|     assert s.index is rs.index | ||||
|     assert s.dtype == rs.dtype | ||||
|     assert s.name == rs.name | ||||
|  | ||||
|     # index but no data | ||||
|     s = Series(index=[1, 2, 3], dtype=np.float64) | ||||
|     rs = s.apply(lambda x: x, by_row=by_row) | ||||
|     tm.assert_series_equal(s, rs) | ||||
|  | ||||
|  | ||||
| def test_apply_map_same_length_inference_bug(): | ||||
|     s = Series([1, 2]) | ||||
|  | ||||
|     def f(x): | ||||
|         return (x, x + 1) | ||||
|  | ||||
|     result = s.apply(f, by_row="compat") | ||||
|     expected = s.map(f) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("convert_dtype", [True, False]) | ||||
| def test_apply_convert_dtype_deprecated(convert_dtype): | ||||
|     ser = Series(np.random.default_rng(2).standard_normal(10)) | ||||
|  | ||||
|     def func(x): | ||||
|         return x if x > 0 else np.nan | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning): | ||||
|         ser.apply(func, convert_dtype=convert_dtype, by_row="compat") | ||||
|  | ||||
|  | ||||
| def test_apply_args(): | ||||
|     s = Series(["foo,bar"]) | ||||
|  | ||||
|     result = s.apply(str.split, args=(",",)) | ||||
|     assert result[0] == ["foo", "bar"] | ||||
|     assert isinstance(result[0], list) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "args, kwargs, increment", | ||||
|     [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], | ||||
| ) | ||||
| def test_agg_args(args, kwargs, increment): | ||||
|     # GH 43357 | ||||
|     def f(x, a=0, b=0, c=0): | ||||
|         return x + a + 10 * b + 100 * c | ||||
|  | ||||
|     s = Series([1, 2]) | ||||
|     msg = ( | ||||
|         "in Series.agg cannot aggregate and has been deprecated. " | ||||
|         "Use Series.transform to keep behavior unchanged." | ||||
|     ) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = s.agg(f, 0, *args, **kwargs) | ||||
|     expected = s + increment | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_agg_mapping_func_deprecated(): | ||||
|     # GH 53325 | ||||
|     s = Series([1, 2, 3]) | ||||
|  | ||||
|     def foo1(x, a=1, c=0): | ||||
|         return x + a + c | ||||
|  | ||||
|     def foo2(x, b=2, c=0): | ||||
|         return x + b + c | ||||
|  | ||||
|     msg = "using .+ in Series.agg cannot aggregate and" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         s.agg(foo1, 0, 3, c=4) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         s.agg([foo1, foo2], 0, 3, c=4) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         s.agg({"a": foo1, "b": foo2}, 0, 3, c=4) | ||||
|  | ||||
|  | ||||
| def test_series_apply_map_box_timestamps(by_row): | ||||
|     # GH#2689, GH#2627 | ||||
|     ser = Series(date_range("1/1/2000", periods=10)) | ||||
|  | ||||
|     def func(x): | ||||
|         return (x.hour, x.day, x.month) | ||||
|  | ||||
|     if not by_row: | ||||
|         msg = "Series' object has no attribute 'hour'" | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             ser.apply(func, by_row=by_row) | ||||
|         return | ||||
|  | ||||
|     result = ser.apply(func, by_row=by_row) | ||||
|     expected = ser.map(func) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply_box_dt64(): | ||||
|     # ufunc will not be boxed. Same test cases as the test_map_box | ||||
|     vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] | ||||
|     ser = Series(vals, dtype="M8[ns]") | ||||
|     assert ser.dtype == "datetime64[ns]" | ||||
|     # boxed value must be Timestamp instance | ||||
|     res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") | ||||
|     exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_apply_box_dt64tz(): | ||||
|     vals = [ | ||||
|         pd.Timestamp("2011-01-01", tz="US/Eastern"), | ||||
|         pd.Timestamp("2011-01-02", tz="US/Eastern"), | ||||
|     ] | ||||
|     ser = Series(vals, dtype="M8[ns, US/Eastern]") | ||||
|     assert ser.dtype == "datetime64[ns, US/Eastern]" | ||||
|     res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") | ||||
|     exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_apply_box_td64(): | ||||
|     # timedelta | ||||
|     vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] | ||||
|     ser = Series(vals) | ||||
|     assert ser.dtype == "timedelta64[ns]" | ||||
|     res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat") | ||||
|     exp = Series(["Timedelta_1", "Timedelta_2"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_apply_box_period(): | ||||
|     # period | ||||
|     vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] | ||||
|     ser = Series(vals) | ||||
|     assert ser.dtype == "Period[M]" | ||||
|     res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat") | ||||
|     exp = Series(["Period_M", "Period_M"]) | ||||
|     tm.assert_series_equal(res, exp) | ||||
|  | ||||
|  | ||||
| def test_apply_datetimetz(by_row): | ||||
|     values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo") | ||||
|     s = Series(values, name="XX") | ||||
|  | ||||
|     result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row) | ||||
|     exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize( | ||||
|         "Asia/Tokyo" | ||||
|     ) | ||||
|     exp = Series(exp_values, name="XX") | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row) | ||||
|     exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32") | ||||
|     tm.assert_series_equal(result, exp) | ||||
|  | ||||
|     # not vectorized | ||||
|     def f(x): | ||||
|         return str(x.tz) if by_row else str(x.dt.tz) | ||||
|  | ||||
|     result = s.apply(f, by_row=by_row) | ||||
|     if by_row: | ||||
|         exp = Series(["Asia/Tokyo"] * 25, name="XX") | ||||
|         tm.assert_series_equal(result, exp) | ||||
|     else: | ||||
|         assert result == "Asia/Tokyo" | ||||
|  | ||||
|  | ||||
| def test_apply_categorical(by_row, using_infer_string): | ||||
|     values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) | ||||
|     ser = Series(values, name="XX", index=list("abcdefg")) | ||||
|  | ||||
|     if not by_row: | ||||
|         msg = "Series' object has no attribute 'lower" | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             ser.apply(lambda x: x.lower(), by_row=by_row) | ||||
|         assert ser.apply(lambda x: "A", by_row=by_row) == "A" | ||||
|         return | ||||
|  | ||||
|     result = ser.apply(lambda x: x.lower(), by_row=by_row) | ||||
|  | ||||
|     # should be categorical dtype when the number of categories are | ||||
|     # the same | ||||
|     values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) | ||||
|     exp = Series(values, name="XX", index=list("abcdefg")) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|     tm.assert_categorical_equal(result.values, exp.values) | ||||
|  | ||||
|     result = ser.apply(lambda x: "A") | ||||
|     exp = Series(["A"] * 7, name="XX", index=list("abcdefg")) | ||||
|     tm.assert_series_equal(result, exp) | ||||
|     assert result.dtype == object if not using_infer_string else "str" | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]]) | ||||
| def test_apply_categorical_with_nan_values(series, by_row): | ||||
|     # GH 20714 bug fixed in: GH 24275 | ||||
|     s = Series(series, dtype="category") | ||||
|     if not by_row: | ||||
|         msg = "'Series' object has no attribute 'split'" | ||||
|         with pytest.raises(AttributeError, match=msg): | ||||
|             s.apply(lambda x: x.split("-")[0], by_row=by_row) | ||||
|         return | ||||
|  | ||||
|     result = s.apply(lambda x: x.split("-")[0], by_row=by_row) | ||||
|     result = result.astype(object) | ||||
|     expected = Series(["1", "1", np.nan], dtype="category") | ||||
|     expected = expected.astype(object) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply_empty_integer_series_with_datetime_index(by_row): | ||||
|     # GH 21245 | ||||
|     s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int) | ||||
|     result = s.apply(lambda x: x, by_row=by_row) | ||||
|     tm.assert_series_equal(result, s) | ||||
|  | ||||
|  | ||||
| def test_apply_dataframe_iloc(): | ||||
|     uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"]) | ||||
|     indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"]) | ||||
|  | ||||
|     def retrieve(targetRow, targetDF): | ||||
|         val = targetDF["Numbers"].iloc[targetRow] | ||||
|         return val | ||||
|  | ||||
|     result = indexDF["Indices"].apply(retrieve, args=(uintDF,)) | ||||
|     expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_transform(string_series, by_row): | ||||
|     # transforming functions | ||||
|  | ||||
|     with np.errstate(all="ignore"): | ||||
|         f_sqrt = np.sqrt(string_series) | ||||
|         f_abs = np.abs(string_series) | ||||
|  | ||||
|         # ufunc | ||||
|         result = string_series.apply(np.sqrt, by_row=by_row) | ||||
|         expected = f_sqrt.copy() | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         # list-like | ||||
|         result = string_series.apply([np.sqrt], by_row=by_row) | ||||
|         expected = f_sqrt.to_frame().copy() | ||||
|         expected.columns = ["sqrt"] | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         result = string_series.apply(["sqrt"], by_row=by_row) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # multiple items in list | ||||
|         # these are in the order as if we are applying both functions per | ||||
|         # series and then concatting | ||||
|         expected = concat([f_sqrt, f_abs], axis=1) | ||||
|         expected.columns = ["sqrt", "absolute"] | ||||
|         result = string_series.apply([np.sqrt, np.abs], by_row=by_row) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|         # dict, provide renaming | ||||
|         expected = concat([f_sqrt, f_abs], axis=1) | ||||
|         expected.columns = ["foo", "bar"] | ||||
|         expected = expected.unstack().rename("series") | ||||
|  | ||||
|         result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row) | ||||
|         tm.assert_series_equal(result.reindex_like(expected), expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", series_transform_kernels) | ||||
| def test_transform_partial_failure(op, request): | ||||
|     # GH 35964 | ||||
|     if op in ("ffill", "bfill", "pad", "backfill", "shift"): | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(reason=f"{op} is successful on any dtype") | ||||
|         ) | ||||
|  | ||||
|     # Using object makes most transform kernels fail | ||||
|     ser = Series(3 * [object]) | ||||
|  | ||||
|     if op in ("fillna", "ngroup"): | ||||
|         error = ValueError | ||||
|         msg = "Transform function failed" | ||||
|     else: | ||||
|         error = TypeError | ||||
|         msg = "|".join( | ||||
|             [ | ||||
|                 "not supported between instances of 'type' and 'type'", | ||||
|                 "unsupported operand type", | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         ser.transform([op, "shift"]) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         ser.transform({"A": op, "B": "shift"}) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         ser.transform({"A": [op], "B": ["shift"]}) | ||||
|  | ||||
|     with pytest.raises(error, match=msg): | ||||
|         ser.transform({"A": [op, "shift"], "B": [op]}) | ||||
|  | ||||
|  | ||||
| def test_transform_partial_failure_valueerror(): | ||||
|     # GH 40211 | ||||
|     def noop(x): | ||||
|         return x | ||||
|  | ||||
|     def raising_op(_): | ||||
|         raise ValueError | ||||
|  | ||||
|     ser = Series(3 * [object]) | ||||
|     msg = "Transform function failed" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser.transform([noop, raising_op]) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser.transform({"A": raising_op, "B": noop}) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser.transform({"A": [raising_op], "B": [noop]}) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         ser.transform({"A": [noop, raising_op], "B": [noop]}) | ||||
|  | ||||
|  | ||||
| def test_demo(): | ||||
|     # demonstration tests | ||||
|     s = Series(range(6), dtype="int64", name="series") | ||||
|  | ||||
|     result = s.agg(["min", "max"]) | ||||
|     expected = Series([0, 5], index=["min", "max"], name="series") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = s.agg({"foo": "min"}) | ||||
|     expected = Series([0], index=["foo"], name="series") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", [str, lambda x: str(x)]) | ||||
| def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): | ||||
|     # test that we are evaluating row-by-row first if by_row="compat" | ||||
|     # else vectorized evaluation | ||||
|     result = string_series.apply(func, by_row=by_row) | ||||
|  | ||||
|     if by_row: | ||||
|         expected = string_series.map(func) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|     else: | ||||
|         assert result == str(string_series) | ||||
|  | ||||
|  | ||||
| def test_agg_evaluate_lambdas(string_series): | ||||
|     # GH53325 | ||||
|     # in the future, the result will be a Series class. | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning): | ||||
|         result = string_series.agg(lambda x: type(x)) | ||||
|     assert isinstance(result, Series) and len(result) == len(string_series) | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning): | ||||
|         result = string_series.agg(type) | ||||
|     assert isinstance(result, Series) and len(result) == len(string_series) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op_name", ["agg", "apply"]) | ||||
| def test_with_nested_series(datetime_series, op_name): | ||||
|     # GH 2316 | ||||
|     # .agg with a reducer and a transform, what to do | ||||
|     msg = "cannot aggregate" | ||||
|     warning = FutureWarning if op_name == "agg" else None | ||||
|     with tm.assert_produces_warning(warning, match=msg): | ||||
|         # GH52123 | ||||
|         result = getattr(datetime_series, op_name)( | ||||
|             lambda x: Series([x, x**2], index=["x", "x^2"]) | ||||
|         ) | ||||
|     expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_replicate_describe(string_series): | ||||
|     # this also tests a result set that is all scalars | ||||
|     expected = string_series.describe() | ||||
|     result = string_series.apply( | ||||
|         { | ||||
|             "count": "count", | ||||
|             "mean": "mean", | ||||
|             "std": "std", | ||||
|             "min": "min", | ||||
|             "25%": lambda x: x.quantile(0.25), | ||||
|             "50%": "median", | ||||
|             "75%": lambda x: x.quantile(0.75), | ||||
|             "max": "max", | ||||
|         }, | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_reduce(string_series): | ||||
|     # reductions with named functions | ||||
|     result = string_series.agg(["sum", "mean"]) | ||||
|     expected = Series( | ||||
|         [string_series.sum(), string_series.mean()], | ||||
|         ["sum", "mean"], | ||||
|         name=string_series.name, | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "how, kwds", | ||||
|     [("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})], | ||||
| ) | ||||
| def test_non_callable_aggregates(how, kwds): | ||||
|     # test agg using non-callable series attributes | ||||
|     # GH 39116 - expand to apply | ||||
|     s = Series([1, 2, None]) | ||||
|  | ||||
|     # Calling agg w/ just a string arg same as calling s.arg | ||||
|     result = getattr(s, how)("size", **kwds) | ||||
|     expected = s.size | ||||
|     assert result == expected | ||||
|  | ||||
|     # test when mixed w/ callable reducers | ||||
|     result = getattr(s, how)(["size", "count", "mean"], **kwds) | ||||
|     expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_apply_no_suffix_index(by_row): | ||||
|     # GH36189 | ||||
|     s = Series([4] * 3) | ||||
|     result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row) | ||||
|     expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"]) | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dti,exp", | ||||
|     [ | ||||
|         ( | ||||
|             Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), | ||||
|             DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), | ||||
|         ), | ||||
|         ( | ||||
|             Series( | ||||
|                 np.arange(10, dtype=np.float64), | ||||
|                 index=date_range("2020-01-01", periods=10), | ||||
|                 name="ts", | ||||
|             ), | ||||
|             DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("aware", [True, False]) | ||||
| def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): | ||||
|     # GH 25959 | ||||
|     # Calling apply on a localized time series should not cause an error | ||||
|     if aware: | ||||
|         index = dti.tz_localize("UTC").index | ||||
|     else: | ||||
|         index = dti.index | ||||
|     result = Series(index).apply(lambda x: Series([1, 2])) | ||||
|     tm.assert_frame_equal(result, exp) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)] | ||||
| ) | ||||
| def test_apply_scalar_on_date_time_index_aware_series(by_row, expected): | ||||
|     # GH 25959 | ||||
|     # Calling apply on a localized time series should not cause an error | ||||
|     series = Series( | ||||
|         np.arange(10, dtype=np.float64), | ||||
|         index=date_range("2020-01-01", periods=10, tz="UTC"), | ||||
|     ) | ||||
|     result = Series(series.index).apply(lambda x: 1, by_row=by_row) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply_to_timedelta(by_row): | ||||
|     list_of_valid_strings = ["00:00:01", "00:00:02"] | ||||
|     a = pd.to_timedelta(list_of_valid_strings) | ||||
|     b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row) | ||||
|     tm.assert_series_equal(Series(a), b) | ||||
|  | ||||
|     list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] | ||||
|  | ||||
|     a = pd.to_timedelta(list_of_strings) | ||||
|     ser = Series(list_of_strings) | ||||
|     b = ser.apply(pd.to_timedelta, by_row=by_row) | ||||
|     tm.assert_series_equal(Series(a), b) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, names", | ||||
|     [ | ||||
|         ([np.sum], ["sum"]), | ||||
|         ([np.sum, np.mean], ["sum", "mean"]), | ||||
|         (np.array([np.sum]), ["sum"]), | ||||
|         (np.array([np.sum, np.mean]), ["sum", "mean"]), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "how, kwargs", | ||||
|     [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]], | ||||
| ) | ||||
| def test_apply_listlike_reducer(string_series, ops, names, how, kwargs): | ||||
|     # GH 39140 | ||||
|     expected = Series({name: op(string_series) for name, op in zip(names, ops)}) | ||||
|     expected.name = "series" | ||||
|     warn = FutureWarning if how == "agg" else None | ||||
|     msg = f"using Series.[{'|'.join(names)}]" | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = getattr(string_series, how)(ops, **kwargs) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops", | ||||
|     [ | ||||
|         {"A": np.sum}, | ||||
|         {"A": np.sum, "B": np.mean}, | ||||
|         Series({"A": np.sum}), | ||||
|         Series({"A": np.sum, "B": np.mean}), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "how, kwargs", | ||||
|     [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]], | ||||
| ) | ||||
| def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row): | ||||
|     # GH 39140 | ||||
|     expected = Series({name: op(string_series) for name, op in ops.items()}) | ||||
|     expected.name = string_series.name | ||||
|     warn = FutureWarning if how == "agg" else None | ||||
|     msg = "using Series.[sum|mean]" | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         result = getattr(string_series, how)(ops, **kwargs) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, names", | ||||
|     [ | ||||
|         ([np.sqrt], ["sqrt"]), | ||||
|         ([np.abs, np.sqrt], ["absolute", "sqrt"]), | ||||
|         (np.array([np.sqrt]), ["sqrt"]), | ||||
|         (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), | ||||
|     ], | ||||
| ) | ||||
| def test_apply_listlike_transformer(string_series, ops, names, by_row): | ||||
|     # GH 39140 | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = concat([op(string_series) for op in ops], axis=1) | ||||
|         expected.columns = names | ||||
|         result = string_series.apply(ops, by_row=by_row) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, expected", | ||||
|     [ | ||||
|         ([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})), | ||||
|         ([lambda x: x.sum()], Series([6], index=["<lambda>"])), | ||||
|     ], | ||||
| ) | ||||
| def test_apply_listlike_lambda(ops, expected, by_row): | ||||
|     # GH53400 | ||||
|     ser = Series([1, 2, 3]) | ||||
|     result = ser.apply(ops, by_row=by_row) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops", | ||||
|     [ | ||||
|         {"A": np.sqrt}, | ||||
|         {"A": np.sqrt, "B": np.exp}, | ||||
|         Series({"A": np.sqrt}), | ||||
|         Series({"A": np.sqrt, "B": np.exp}), | ||||
|     ], | ||||
| ) | ||||
| def test_apply_dictlike_transformer(string_series, ops, by_row): | ||||
|     # GH 39140 | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = concat({name: op(string_series) for name, op in ops.items()}) | ||||
|         expected.name = string_series.name | ||||
|         result = string_series.apply(ops, by_row=by_row) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, expected", | ||||
|     [ | ||||
|         ( | ||||
|             {"a": lambda x: x}, | ||||
|             Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])), | ||||
|         ), | ||||
|         ({"a": lambda x: x.sum()}, Series([6], index=["a"])), | ||||
|     ], | ||||
| ) | ||||
| def test_apply_dictlike_lambda(ops, by_row, expected): | ||||
|     # GH53400 | ||||
|     ser = Series([1, 2, 3]) | ||||
|     result = ser.apply(ops, by_row=by_row) | ||||
|     tm.assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply_retains_column_name(by_row): | ||||
|     # GH 16380 | ||||
|     df = DataFrame({"x": range(3)}, Index(range(3), name="x")) | ||||
|     result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) | ||||
|     expected = DataFrame( | ||||
|         [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], | ||||
|         columns=Index(range(3), name="y"), | ||||
|         index=Index(range(3), name="x"), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_apply_type(): | ||||
|     # GH 46719 | ||||
|     s = Series([3, "string", float], index=["a", "b", "c"]) | ||||
|     result = s.apply(type) | ||||
|     expected = Series([int, str, type], index=["a", "b", "c"]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_series_apply_unpack_nested_data(): | ||||
|     # GH#55189 | ||||
|     ser = Series([[1, 2, 3], [4, 5, 6, 7]]) | ||||
|     result = ser.apply(lambda x: Series(x)) | ||||
|     expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
| @ -0,0 +1,39 @@ | ||||
| import pandas as pd | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| def test_relabel_no_duplicated_method(): | ||||
|     # this is to test there is no duplicated method used in agg | ||||
|     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) | ||||
|  | ||||
|     result = df["A"].agg(foo="sum") | ||||
|     expected = df["A"].agg({"foo": "sum"}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = df["B"].agg(foo="min", bar="max") | ||||
|     expected = df["B"].agg({"foo": "min", "bar": "max"}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     msg = "using Series.[sum|min|max]" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = df["B"].agg(foo=sum, bar=min, cat="max") | ||||
|     msg = "using Series.[sum|min|max]" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"}) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_relabel_duplicated_method(): | ||||
|     # this is to test with nested renaming, duplicated method can be used | ||||
|     # if they are assigned with different new names | ||||
|     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]}) | ||||
|  | ||||
|     result = df["A"].agg(foo="sum", bar="sum") | ||||
|     expected = pd.Series([6, 6], index=["foo", "bar"], name="A") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     msg = "using Series.min" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = df["B"].agg(foo=min, bar="min") | ||||
|     expected = pd.Series([1, 1], index=["foo", "bar"], name="B") | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,84 @@ | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     MultiIndex, | ||||
|     Series, | ||||
|     concat, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "args, kwargs, increment", | ||||
|     [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], | ||||
| ) | ||||
| def test_agg_args(args, kwargs, increment): | ||||
|     # GH 43357 | ||||
|     def f(x, a=0, b=0, c=0): | ||||
|         return x + a + 10 * b + 100 * c | ||||
|  | ||||
|     s = Series([1, 2]) | ||||
|     result = s.transform(f, 0, *args, **kwargs) | ||||
|     expected = s + increment | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ops, names", | ||||
|     [ | ||||
|         ([np.sqrt], ["sqrt"]), | ||||
|         ([np.abs, np.sqrt], ["absolute", "sqrt"]), | ||||
|         (np.array([np.sqrt]), ["sqrt"]), | ||||
|         (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), | ||||
|     ], | ||||
| ) | ||||
| def test_transform_listlike(string_series, ops, names): | ||||
|     # GH 35964 | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = concat([op(string_series) for op in ops], axis=1) | ||||
|         expected.columns = names | ||||
|         result = string_series.transform(ops) | ||||
|         tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_transform_listlike_func_with_args(): | ||||
|     # GH 50624 | ||||
|  | ||||
|     s = Series([1, 2, 3]) | ||||
|  | ||||
|     def foo1(x, a=1, c=0): | ||||
|         return x + a + c | ||||
|  | ||||
|     def foo2(x, b=2, c=0): | ||||
|         return x + b + c | ||||
|  | ||||
|     msg = r"foo1\(\) got an unexpected keyword argument 'b'" | ||||
|     with pytest.raises(TypeError, match=msg): | ||||
|         s.transform([foo1, foo2], 0, 3, b=3, c=4) | ||||
|  | ||||
|     result = s.transform([foo1, foo2], 0, 3, c=4) | ||||
|     expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("box", [dict, Series]) | ||||
| def test_transform_dictlike(string_series, box): | ||||
|     # GH 35964 | ||||
|     with np.errstate(all="ignore"): | ||||
|         expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1) | ||||
|     expected.columns = ["foo", "bar"] | ||||
|     result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs})) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_transform_dictlike_mixed(): | ||||
|     # GH 40018 - mix of lists and non-lists in values of a dictionary | ||||
|     df = Series([1, 4]) | ||||
|     result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) | ||||
|     expected = DataFrame( | ||||
|         [[1.0, 1, 1.0], [2.0, 4, 2.0]], | ||||
|         columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), | ||||
|     ) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
							
								
								
									
										326
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_str.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										326
									
								
								lib/python3.11/site-packages/pandas/tests/apply/test_str.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,326 @@ | ||||
| from itertools import chain | ||||
| import operator | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.core.dtypes.common import is_number | ||||
|  | ||||
| from pandas import ( | ||||
|     DataFrame, | ||||
|     Series, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.tests.apply.common import ( | ||||
|     frame_transform_kernels, | ||||
|     series_transform_kernels, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "args,kwds", | ||||
|     [ | ||||
|         pytest.param([], {}, id="no_args_or_kwds"), | ||||
|         pytest.param([1], {}, id="axis_from_args"), | ||||
|         pytest.param([], {"axis": 1}, id="axis_from_kwds"), | ||||
|         pytest.param([], {"numeric_only": True}, id="optional_kwds"), | ||||
|         pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize("how", ["agg", "apply"]) | ||||
| def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): | ||||
|     if len(args) > 1 and how == "agg": | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail( | ||||
|                 raises=TypeError, | ||||
|                 reason="agg/apply signature mismatch - agg passes 2nd " | ||||
|                 "argument to func", | ||||
|             ) | ||||
|         ) | ||||
|     result = getattr(float_frame, how)(func, *args, **kwds) | ||||
|     expected = getattr(float_frame, func)(*args, **kwds) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"]) | ||||
| def test_with_string_args(datetime_series, arg): | ||||
|     result = datetime_series.apply(arg) | ||||
|     expected = getattr(datetime_series, arg)() | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", ["mean", "median", "std", "var"]) | ||||
| @pytest.mark.parametrize("how", ["agg", "apply"]) | ||||
| def test_apply_np_reducer(op, how): | ||||
|     # GH 39116 | ||||
|     float_frame = DataFrame({"a": [1, 2], "b": [3, 4]}) | ||||
|     result = getattr(float_frame, how)(op) | ||||
|     # pandas ddof defaults to 1, numpy to 0 | ||||
|     kwargs = {"ddof": 1} if op in ("std", "var") else {} | ||||
|     expected = Series( | ||||
|         getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns | ||||
|     ) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"] | ||||
| ) | ||||
| @pytest.mark.parametrize("how", ["transform", "apply"]) | ||||
| def test_apply_np_transformer(float_frame, op, how): | ||||
|     # GH 39116 | ||||
|  | ||||
|     # float_frame will _usually_ have negative values, which will | ||||
|     #  trigger the warning here, but let's put one in just to be sure | ||||
|     float_frame.iloc[0, 0] = -1.0 | ||||
|     warn = None | ||||
|     if op in ["log", "sqrt"]: | ||||
|         warn = RuntimeWarning | ||||
|  | ||||
|     with tm.assert_produces_warning(warn, check_stacklevel=False): | ||||
|         # float_frame fixture is defined in conftest.py, so we don't check the | ||||
|         # stacklevel as otherwise the test would fail. | ||||
|         result = getattr(float_frame, how)(op) | ||||
|         expected = getattr(np, op)(float_frame) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "series, func, expected", | ||||
|     chain( | ||||
|         tm.get_cython_table_params( | ||||
|             Series(dtype=np.float64), | ||||
|             [ | ||||
|                 ("sum", 0), | ||||
|                 ("max", np.nan), | ||||
|                 ("min", np.nan), | ||||
|                 ("all", True), | ||||
|                 ("any", False), | ||||
|                 ("mean", np.nan), | ||||
|                 ("prod", 1), | ||||
|                 ("std", np.nan), | ||||
|                 ("var", np.nan), | ||||
|                 ("median", np.nan), | ||||
|             ], | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             Series([np.nan, 1, 2, 3]), | ||||
|             [ | ||||
|                 ("sum", 6), | ||||
|                 ("max", 3), | ||||
|                 ("min", 1), | ||||
|                 ("all", True), | ||||
|                 ("any", True), | ||||
|                 ("mean", 2), | ||||
|                 ("prod", 6), | ||||
|                 ("std", 1), | ||||
|                 ("var", 1), | ||||
|                 ("median", 2), | ||||
|             ], | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             Series("a b c".split()), | ||||
|             [ | ||||
|                 ("sum", "abc"), | ||||
|                 ("max", "c"), | ||||
|                 ("min", "a"), | ||||
|                 ("all", True), | ||||
|                 ("any", True), | ||||
|             ], | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_series(series, func, expected): | ||||
|     # GH21224 | ||||
|     # test reducing functions in | ||||
|     # pandas.core.base.SelectionMixin._cython_table | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|     with tm.assert_produces_warning(warn, match="is currently using Series.*"): | ||||
|         result = series.agg(func) | ||||
|     if is_number(expected): | ||||
|         assert np.isclose(result, expected, equal_nan=True) | ||||
|     else: | ||||
|         assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "series, func, expected", | ||||
|     chain( | ||||
|         tm.get_cython_table_params( | ||||
|             Series(dtype=np.float64), | ||||
|             [ | ||||
|                 ("cumprod", Series([], dtype=np.float64)), | ||||
|                 ("cumsum", Series([], dtype=np.float64)), | ||||
|             ], | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             Series([np.nan, 1, 2, 3]), | ||||
|             [ | ||||
|                 ("cumprod", Series([np.nan, 1, 2, 6])), | ||||
|                 ("cumsum", Series([np.nan, 1, 3, 6])), | ||||
|             ], | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_transform_series(series, func, expected): | ||||
|     # GH21224 | ||||
|     # test transforming functions in | ||||
|     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|     with tm.assert_produces_warning(warn, match="is currently using Series.*"): | ||||
|         result = series.agg(func) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "df, func, expected", | ||||
|     chain( | ||||
|         tm.get_cython_table_params( | ||||
|             DataFrame(), | ||||
|             [ | ||||
|                 ("sum", Series(dtype="float64")), | ||||
|                 ("max", Series(dtype="float64")), | ||||
|                 ("min", Series(dtype="float64")), | ||||
|                 ("all", Series(dtype=bool)), | ||||
|                 ("any", Series(dtype=bool)), | ||||
|                 ("mean", Series(dtype="float64")), | ||||
|                 ("prod", Series(dtype="float64")), | ||||
|                 ("std", Series(dtype="float64")), | ||||
|                 ("var", Series(dtype="float64")), | ||||
|                 ("median", Series(dtype="float64")), | ||||
|             ], | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             DataFrame([[np.nan, 1], [1, 2]]), | ||||
|             [ | ||||
|                 ("sum", Series([1.0, 3])), | ||||
|                 ("max", Series([1.0, 2])), | ||||
|                 ("min", Series([1.0, 1])), | ||||
|                 ("all", Series([True, True])), | ||||
|                 ("any", Series([True, True])), | ||||
|                 ("mean", Series([1, 1.5])), | ||||
|                 ("prod", Series([1.0, 2])), | ||||
|                 ("std", Series([np.nan, 0.707107])), | ||||
|                 ("var", Series([np.nan, 0.5])), | ||||
|                 ("median", Series([1, 1.5])), | ||||
|             ], | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_frame(df, func, expected, axis): | ||||
|     # GH 21224 | ||||
|     # test reducing functions in | ||||
|     # pandas.core.base.SelectionMixin._cython_table | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|     with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"): | ||||
|         # GH#53425 | ||||
|         result = df.agg(func, axis=axis) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "df, func, expected", | ||||
|     chain( | ||||
|         tm.get_cython_table_params( | ||||
|             DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] | ||||
|         ), | ||||
|         tm.get_cython_table_params( | ||||
|             DataFrame([[np.nan, 1], [1, 2]]), | ||||
|             [ | ||||
|                 ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), | ||||
|                 ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), | ||||
|             ], | ||||
|         ), | ||||
|     ), | ||||
| ) | ||||
| def test_agg_cython_table_transform_frame(df, func, expected, axis): | ||||
|     # GH 21224 | ||||
|     # test transforming functions in | ||||
|     # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) | ||||
|     if axis in ("columns", 1): | ||||
|         # operating blockwise doesn't let us preserve dtypes | ||||
|         expected = expected.astype("float64") | ||||
|  | ||||
|     warn = None if isinstance(func, str) else FutureWarning | ||||
|     with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"): | ||||
|         # GH#53425 | ||||
|         result = df.agg(func, axis=axis) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", series_transform_kernels) | ||||
| def test_transform_groupby_kernel_series(request, string_series, op): | ||||
|     # GH 35964 | ||||
|     if op == "ngroup": | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") | ||||
|         ) | ||||
|     args = [0.0] if op == "fillna" else [] | ||||
|     ones = np.ones(string_series.shape[0]) | ||||
|  | ||||
|     warn = FutureWarning if op == "fillna" else None | ||||
|     msg = "SeriesGroupBy.fillna is deprecated" | ||||
|     with tm.assert_produces_warning(warn, match=msg): | ||||
|         expected = string_series.groupby(ones).transform(op, *args) | ||||
|     result = string_series.transform(op, 0, *args) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("op", frame_transform_kernels) | ||||
| def test_transform_groupby_kernel_frame(request, axis, float_frame, op): | ||||
|     if op == "ngroup": | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") | ||||
|         ) | ||||
|  | ||||
|     # GH 35964 | ||||
|  | ||||
|     args = [0.0] if op == "fillna" else [] | ||||
|     if axis in (0, "index"): | ||||
|         ones = np.ones(float_frame.shape[0]) | ||||
|         msg = "The 'axis' keyword in DataFrame.groupby is deprecated" | ||||
|     else: | ||||
|         ones = np.ones(float_frame.shape[1]) | ||||
|         msg = "DataFrame.groupby with axis=1 is deprecated" | ||||
|  | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         gb = float_frame.groupby(ones, axis=axis) | ||||
|  | ||||
|     warn = FutureWarning if op == "fillna" else None | ||||
|     op_msg = "DataFrameGroupBy.fillna is deprecated" | ||||
|     with tm.assert_produces_warning(warn, match=op_msg): | ||||
|         expected = gb.transform(op, *args) | ||||
|  | ||||
|     result = float_frame.transform(op, axis, *args) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
|  | ||||
|     # same thing, but ensuring we have multiple blocks | ||||
|     assert "E" not in float_frame.columns | ||||
|     float_frame["E"] = float_frame["A"].copy() | ||||
|     assert len(float_frame._mgr.arrays) > 1 | ||||
|  | ||||
|     if axis in (0, "index"): | ||||
|         ones = np.ones(float_frame.shape[0]) | ||||
|     else: | ||||
|         ones = np.ones(float_frame.shape[1]) | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         gb2 = float_frame.groupby(ones, axis=axis) | ||||
|     warn = FutureWarning if op == "fillna" else None | ||||
|     op_msg = "DataFrameGroupBy.fillna is deprecated" | ||||
|     with tm.assert_produces_warning(warn, match=op_msg): | ||||
|         expected2 = gb2.transform(op, *args) | ||||
|     result2 = float_frame.transform(op, axis, *args) | ||||
|     tm.assert_frame_equal(result2, expected2) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) | ||||
| def test_transform_method_name(method): | ||||
|     # GH 19760 | ||||
|     df = DataFrame({"A": [-1, 2]}) | ||||
|     result = df.transform(method) | ||||
|     expected = operator.methodcaller(method)(df) | ||||
|     tm.assert_frame_equal(result, expected) | ||||
		Reference in New Issue
	
	Block a user