done
This commit is contained in:
		
							
								
								
									
										3911
									
								
								lib/python3.11/site-packages/pandas/tests/tools/test_to_datetime.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3911
									
								
								lib/python3.11/site-packages/pandas/tests/tools/test_to_datetime.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -0,0 +1,978 @@ | ||||
| import decimal | ||||
|  | ||||
| import numpy as np | ||||
| from numpy import iinfo | ||||
| import pytest | ||||
|  | ||||
| import pandas.util._test_decorators as td | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     ArrowDtype, | ||||
|     DataFrame, | ||||
|     Index, | ||||
|     Series, | ||||
|     option_context, | ||||
|     to_numeric, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[None, "ignore", "raise", "coerce"]) | ||||
| def errors(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def signed(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"]) | ||||
| def transform(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[47393996303418497800, 100000000000000000000]) | ||||
| def large_val(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture(params=[True, False]) | ||||
| def multiple_elts(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.fixture( | ||||
|     params=[ | ||||
|         (lambda x: Index(x, name="idx"), tm.assert_index_equal), | ||||
|         (lambda x: Series(x, name="ser"), tm.assert_series_equal), | ||||
|         (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal), | ||||
|     ] | ||||
| ) | ||||
| def transform_assert_equal(request): | ||||
|     return request.param | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "input_kwargs,result_kwargs", | ||||
|     [ | ||||
|         ({}, {"dtype": np.int64}), | ||||
|         ({"errors": "coerce", "downcast": "integer"}, {"dtype": np.int8}), | ||||
|     ], | ||||
| ) | ||||
| def test_empty(input_kwargs, result_kwargs): | ||||
|     # see gh-16302 | ||||
|     ser = Series([], dtype=object) | ||||
|     result = to_numeric(ser, **input_kwargs) | ||||
|  | ||||
|     expected = Series([], **result_kwargs) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] | ||||
| ) | ||||
| @pytest.mark.parametrize("last_val", ["7", 7]) | ||||
| def test_series(last_val, infer_string): | ||||
|     with option_context("future.infer_string", infer_string): | ||||
|         ser = Series(["1", "-3.14", last_val]) | ||||
|         result = to_numeric(ser) | ||||
|  | ||||
|     expected = Series([1, -3.14, 7]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         [1, 3, 4, 5], | ||||
|         [1.0, 3.0, 4.0, 5.0], | ||||
|         # Bool is regarded as numeric. | ||||
|         [True, False, True, True], | ||||
|     ], | ||||
| ) | ||||
| def test_series_numeric(data): | ||||
|     ser = Series(data, index=list("ABCD"), name="EFG") | ||||
|  | ||||
|     result = to_numeric(ser) | ||||
|     tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,msg", | ||||
|     [ | ||||
|         ([1, -3.14, "apple"], 'Unable to parse string "apple" at position 2'), | ||||
|         ( | ||||
|             ["orange", 1, -3.14, "apple"], | ||||
|             'Unable to parse string "orange" at position 0', | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_error(data, msg): | ||||
|     ser = Series(data) | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         to_numeric(ser, errors="raise") | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] | ||||
| ) | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_ignore_error(errors, exp_data): | ||||
|     ser = Series([1, -3.14, "apple"]) | ||||
|     result = to_numeric(ser, errors=errors) | ||||
|  | ||||
|     expected = Series(exp_data) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "errors,exp", | ||||
|     [ | ||||
|         ("raise", 'Unable to parse string "apple" at position 2'), | ||||
|         ("ignore", [True, False, "apple"]), | ||||
|         # Coerces to float. | ||||
|         ("coerce", [1.0, 0.0, np.nan]), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_bool_handling(errors, exp): | ||||
|     ser = Series([True, False, "apple"]) | ||||
|  | ||||
|     if isinstance(exp, str): | ||||
|         with pytest.raises(ValueError, match=exp): | ||||
|             to_numeric(ser, errors=errors) | ||||
|     else: | ||||
|         result = to_numeric(ser, errors=errors) | ||||
|         expected = Series(exp) | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_list(): | ||||
|     ser = ["1", "-3.14", "7"] | ||||
|     res = to_numeric(ser) | ||||
|  | ||||
|     expected = np.array([1, -3.14, 7]) | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,arr_kwargs", | ||||
|     [ | ||||
|         ([1, 3, 4, 5], {"dtype": np.int64}), | ||||
|         ([1.0, 3.0, 4.0, 5.0], {}), | ||||
|         # Boolean is regarded as numeric. | ||||
|         ([True, False, True, True], {}), | ||||
|     ], | ||||
| ) | ||||
| def test_list_numeric(data, arr_kwargs): | ||||
|     result = to_numeric(data) | ||||
|     expected = np.array(data, **arr_kwargs) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("kwargs", [{"dtype": "O"}, {}]) | ||||
| def test_numeric(kwargs): | ||||
|     data = [1, -3.14, 7] | ||||
|  | ||||
|     ser = Series(data, **kwargs) | ||||
|     result = to_numeric(ser) | ||||
|  | ||||
|     expected = Series(data) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "columns", | ||||
|     [ | ||||
|         # One column. | ||||
|         "a", | ||||
|         # Multiple columns. | ||||
|         ["a", "b"], | ||||
|     ], | ||||
| ) | ||||
| def test_numeric_df_columns(columns): | ||||
|     # see gh-14827 | ||||
|     df = DataFrame( | ||||
|         { | ||||
|             "a": [1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"], | ||||
|             "b": [1.0, 2.0, 3.0, 4.0], | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     expected = DataFrame({"a": [1.2, 3.14, np.inf, 0.1], "b": [1.0, 2.0, 3.0, 4.0]}) | ||||
|  | ||||
|     df_copy = df.copy() | ||||
|     df_copy[columns] = df_copy[columns].apply(to_numeric) | ||||
|  | ||||
|     tm.assert_frame_equal(df_copy, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,exp_data", | ||||
|     [ | ||||
|         ( | ||||
|             [[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1], | ||||
|             [[3.14, 1.0], 1.6, 0.1], | ||||
|         ), | ||||
|         ([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]), | ||||
|     ], | ||||
| ) | ||||
| def test_numeric_embedded_arr_likes(data, exp_data): | ||||
|     # Test to_numeric with embedded lists and arrays | ||||
|     df = DataFrame({"a": data}) | ||||
|     df["a"] = df["a"].apply(to_numeric) | ||||
|  | ||||
|     expected = DataFrame({"a": exp_data}) | ||||
|     tm.assert_frame_equal(df, expected) | ||||
|  | ||||
|  | ||||
| def test_all_nan(): | ||||
|     ser = Series(["a", "b", "c"]) | ||||
|     result = to_numeric(ser, errors="coerce") | ||||
|  | ||||
|     expected = Series([np.nan, np.nan, np.nan]) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_type_check(errors): | ||||
|     # see gh-11776 | ||||
|     df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) | ||||
|     kwargs = {"errors": errors} if errors is not None else {} | ||||
|     with pytest.raises(TypeError, match="1-d array"): | ||||
|         to_numeric(df, **kwargs) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [1, 1.1, 20001]) | ||||
| def test_scalar(val, signed, transform): | ||||
|     val = -val if signed else val | ||||
|     assert to_numeric(transform(val)) == float(val) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_really_large_scalar(large_val, signed, transform, errors): | ||||
|     # see gh-24910 | ||||
|     kwargs = {"errors": errors} if errors is not None else {} | ||||
|     val = -large_val if signed else large_val | ||||
|  | ||||
|     val = transform(val) | ||||
|     val_is_string = isinstance(val, str) | ||||
|  | ||||
|     if val_is_string and errors in (None, "raise"): | ||||
|         msg = "Integer out of range. at position 0" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_numeric(val, **kwargs) | ||||
|     else: | ||||
|         expected = float(val) if (errors == "coerce" and val_is_string) else val | ||||
|         tm.assert_almost_equal(to_numeric(val, **kwargs), expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors): | ||||
|     # see gh-24910 | ||||
|     kwargs = {"errors": errors} if errors is not None else {} | ||||
|     val = -large_val if signed else large_val | ||||
|     val = transform(val) | ||||
|  | ||||
|     extra_elt = "string" | ||||
|     arr = [val] + multiple_elts * [extra_elt] | ||||
|  | ||||
|     val_is_string = isinstance(val, str) | ||||
|     coercing = errors == "coerce" | ||||
|  | ||||
|     if errors in (None, "raise") and (val_is_string or multiple_elts): | ||||
|         if val_is_string: | ||||
|             msg = "Integer out of range. at position 0" | ||||
|         else: | ||||
|             msg = 'Unable to parse string "string" at position 1' | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_numeric(arr, **kwargs) | ||||
|     else: | ||||
|         result = to_numeric(arr, **kwargs) | ||||
|  | ||||
|         exp_val = float(val) if (coercing and val_is_string) else val | ||||
|         expected = [exp_val] | ||||
|  | ||||
|         if multiple_elts: | ||||
|             if coercing: | ||||
|                 expected.append(np.nan) | ||||
|                 exp_dtype = float | ||||
|             else: | ||||
|                 expected.append(extra_elt) | ||||
|                 exp_dtype = object | ||||
|         else: | ||||
|             exp_dtype = float if isinstance(exp_val, (int, float)) else object | ||||
|  | ||||
|         tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors): | ||||
|     # see gh-24910 | ||||
|     # | ||||
|     # Even if we discover that we have to hold float, does not mean | ||||
|     # we should be lenient on subsequent elements that fail to be integer. | ||||
|     kwargs = {"errors": errors} if errors is not None else {} | ||||
|     arr = [str(-large_val if signed else large_val)] | ||||
|  | ||||
|     if multiple_elts: | ||||
|         arr.insert(0, large_val) | ||||
|  | ||||
|     if errors in (None, "raise"): | ||||
|         index = int(multiple_elts) | ||||
|         msg = f"Integer out of range. at position {index}" | ||||
|  | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_numeric(arr, **kwargs) | ||||
|     else: | ||||
|         result = to_numeric(arr, **kwargs) | ||||
|  | ||||
|         if errors == "coerce": | ||||
|             expected = [float(i) for i in arr] | ||||
|             exp_dtype = float | ||||
|         else: | ||||
|             expected = arr | ||||
|             exp_dtype = object | ||||
|  | ||||
|         tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "errors,checker", | ||||
|     [ | ||||
|         ("raise", 'Unable to parse string "fail" at position 0'), | ||||
|         ("ignore", lambda x: x == "fail"), | ||||
|         ("coerce", lambda x: np.isnan(x)), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_scalar_fail(errors, checker): | ||||
|     scalar = "fail" | ||||
|  | ||||
|     if isinstance(checker, str): | ||||
|         with pytest.raises(ValueError, match=checker): | ||||
|             to_numeric(scalar, errors=errors) | ||||
|     else: | ||||
|         assert checker(to_numeric(scalar, errors=errors)) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("data", [[1, 2, 3], [1.0, np.nan, 3, np.nan]]) | ||||
| def test_numeric_dtypes(data, transform_assert_equal): | ||||
|     transform, assert_equal = transform_assert_equal | ||||
|     data = transform(data) | ||||
|  | ||||
|     result = to_numeric(data) | ||||
|     assert_equal(result, data) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,exp", | ||||
|     [ | ||||
|         (["1", "2", "3"], np.array([1, 2, 3], dtype="int64")), | ||||
|         (["1.5", "2.7", "3.4"], np.array([1.5, 2.7, 3.4])), | ||||
|     ], | ||||
| ) | ||||
| def test_str(data, exp, transform_assert_equal): | ||||
|     transform, assert_equal = transform_assert_equal | ||||
|     result = to_numeric(transform(data)) | ||||
|  | ||||
|     expected = transform(exp) | ||||
|     assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_datetime_like(tz_naive_fixture, transform_assert_equal): | ||||
|     transform, assert_equal = transform_assert_equal | ||||
|     idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture) | ||||
|  | ||||
|     result = to_numeric(transform(idx)) | ||||
|     expected = transform(idx.asi8) | ||||
|     assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_timedelta(transform_assert_equal): | ||||
|     transform, assert_equal = transform_assert_equal | ||||
|     idx = pd.timedelta_range("1 days", periods=3, freq="D") | ||||
|  | ||||
|     result = to_numeric(transform(idx)) | ||||
|     expected = transform(idx.asi8) | ||||
|     assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_period(request, transform_assert_equal): | ||||
|     transform, assert_equal = transform_assert_equal | ||||
|  | ||||
|     idx = pd.period_range("2011-01", periods=3, freq="M", name="") | ||||
|     inp = transform(idx) | ||||
|  | ||||
|     if not isinstance(inp, Index): | ||||
|         request.applymarker( | ||||
|             pytest.mark.xfail(reason="Missing PeriodDtype support in to_numeric") | ||||
|         ) | ||||
|     result = to_numeric(inp) | ||||
|     expected = transform(idx.asi8) | ||||
|     assert_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "errors,expected", | ||||
|     [ | ||||
|         ("raise", "Invalid object type at position 0"), | ||||
|         ("ignore", Series([[10.0, 2], 1.0, "apple"])), | ||||
|         ("coerce", Series([np.nan, 1.0, np.nan])), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_non_hashable(errors, expected): | ||||
|     # see gh-13324 | ||||
|     ser = Series([[10.0, 2], 1.0, "apple"]) | ||||
|  | ||||
|     if isinstance(expected, str): | ||||
|         with pytest.raises(TypeError, match=expected): | ||||
|             to_numeric(ser, errors=errors) | ||||
|     else: | ||||
|         result = to_numeric(ser, errors=errors) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_downcast_invalid_cast(): | ||||
|     # see gh-13352 | ||||
|     data = ["1", 2, 3] | ||||
|     invalid_downcast = "unsigned-integer" | ||||
|     msg = "invalid downcasting method provided" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         to_numeric(data, downcast=invalid_downcast) | ||||
|  | ||||
|  | ||||
| def test_errors_invalid_value(): | ||||
|     # see gh-26466 | ||||
|     data = ["1", 2, 3] | ||||
|     invalid_error_value = "invalid" | ||||
|     msg = "invalid error value specified" | ||||
|  | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         to_numeric(data, errors=invalid_error_value) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         ["1", 2, 3], | ||||
|         [1, 2, 3], | ||||
|         np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.parametrize( | ||||
|     "kwargs,exp_dtype", | ||||
|     [ | ||||
|         # Basic function tests. | ||||
|         ({}, np.int64), | ||||
|         ({"downcast": None}, np.int64), | ||||
|         # Support below np.float32 is rare and far between. | ||||
|         ({"downcast": "float"}, np.dtype(np.float32).char), | ||||
|         # Basic dtype support. | ||||
|         ({"downcast": "unsigned"}, np.dtype(np.typecodes["UnsignedInteger"][0])), | ||||
|     ], | ||||
| ) | ||||
| def test_downcast_basic(data, kwargs, exp_dtype): | ||||
|     # see gh-13352 | ||||
|     result = to_numeric(data, **kwargs) | ||||
|     expected = np.array([1, 2, 3], dtype=exp_dtype) | ||||
|     tm.assert_numpy_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("signed_downcast", ["integer", "signed"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "data", | ||||
|     [ | ||||
|         ["1", 2, 3], | ||||
|         [1, 2, 3], | ||||
|         np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), | ||||
|     ], | ||||
| ) | ||||
| def test_signed_downcast(data, signed_downcast): | ||||
|     # see gh-13352 | ||||
|     smallest_int_dtype = np.dtype(np.typecodes["Integer"][0]) | ||||
|     expected = np.array([1, 2, 3], dtype=smallest_int_dtype) | ||||
|  | ||||
|     res = to_numeric(data, downcast=signed_downcast) | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_ignore_downcast_invalid_data(): | ||||
|     # If we can't successfully cast the given | ||||
|     # data to a numeric dtype, do not bother | ||||
|     # with the downcast parameter. | ||||
|     data = ["foo", 2, 3] | ||||
|     expected = np.array(data, dtype=object) | ||||
|  | ||||
|     msg = "errors='ignore' is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         res = to_numeric(data, errors="ignore", downcast="unsigned") | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| def test_ignore_downcast_neg_to_unsigned(): | ||||
|     # Cannot cast to an unsigned integer | ||||
|     # because we have a negative number. | ||||
|     data = ["-1", 2, 3] | ||||
|     expected = np.array([-1, 2, 3], dtype=np.int64) | ||||
|  | ||||
|     res = to_numeric(data, downcast="unsigned") | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| # Warning in 32 bit platforms | ||||
| @pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") | ||||
| @pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"]) | ||||
| @pytest.mark.parametrize( | ||||
|     "data,expected", | ||||
|     [ | ||||
|         (["1.1", 2, 3], np.array([1.1, 2, 3], dtype=np.float64)), | ||||
|         ( | ||||
|             [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], | ||||
|             np.array( | ||||
|                 [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], dtype=np.float64 | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_ignore_downcast_cannot_convert_float(data, expected, downcast): | ||||
|     # Cannot cast to an integer (signed or unsigned) | ||||
|     # because we have a float number. | ||||
|     res = to_numeric(data, downcast=downcast) | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "downcast,expected_dtype", | ||||
|     [("integer", np.int16), ("signed", np.int16), ("unsigned", np.uint16)], | ||||
| ) | ||||
| def test_downcast_not8bit(downcast, expected_dtype): | ||||
|     # the smallest integer dtype need not be np.(u)int8 | ||||
|     data = ["256", 257, 258] | ||||
|  | ||||
|     expected = np.array([256, 257, 258], dtype=expected_dtype) | ||||
|     res = to_numeric(data, downcast=downcast) | ||||
|     tm.assert_numpy_array_equal(res, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype,downcast,min_max", | ||||
|     [ | ||||
|         ("int8", "integer", [iinfo(np.int8).min, iinfo(np.int8).max]), | ||||
|         ("int16", "integer", [iinfo(np.int16).min, iinfo(np.int16).max]), | ||||
|         ("int32", "integer", [iinfo(np.int32).min, iinfo(np.int32).max]), | ||||
|         ("int64", "integer", [iinfo(np.int64).min, iinfo(np.int64).max]), | ||||
|         ("uint8", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max]), | ||||
|         ("uint16", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max]), | ||||
|         ("uint32", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max]), | ||||
|         ("uint64", "unsigned", [iinfo(np.uint64).min, iinfo(np.uint64).max]), | ||||
|         ("int16", "integer", [iinfo(np.int8).min, iinfo(np.int8).max + 1]), | ||||
|         ("int32", "integer", [iinfo(np.int16).min, iinfo(np.int16).max + 1]), | ||||
|         ("int64", "integer", [iinfo(np.int32).min, iinfo(np.int32).max + 1]), | ||||
|         ("int16", "integer", [iinfo(np.int8).min - 1, iinfo(np.int16).max]), | ||||
|         ("int32", "integer", [iinfo(np.int16).min - 1, iinfo(np.int32).max]), | ||||
|         ("int64", "integer", [iinfo(np.int32).min - 1, iinfo(np.int64).max]), | ||||
|         ("uint16", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), | ||||
|         ("uint32", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), | ||||
|         ("uint64", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]), | ||||
|     ], | ||||
| ) | ||||
| def test_downcast_limits(dtype, downcast, min_max): | ||||
|     # see gh-14404: test the limits of each downcast. | ||||
|     series = to_numeric(Series(min_max), downcast=downcast) | ||||
|     assert series.dtype == dtype | ||||
|  | ||||
|  | ||||
| def test_downcast_float64_to_float32(): | ||||
|     # GH-43693: Check float64 preservation when >= 16,777,217 | ||||
|     series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64) | ||||
|     result = to_numeric(series, downcast="float") | ||||
|  | ||||
|     assert series.dtype == result.dtype | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "ser,expected", | ||||
|     [ | ||||
|         ( | ||||
|             Series([0, 9223372036854775808]), | ||||
|             Series([0, 9223372036854775808], dtype=np.uint64), | ||||
|         ) | ||||
|     ], | ||||
| ) | ||||
| def test_downcast_uint64(ser, expected): | ||||
|     # see gh-14422: | ||||
|     # BUG: to_numeric doesn't work uint64 numbers | ||||
|  | ||||
|     result = to_numeric(ser, downcast="unsigned") | ||||
|  | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data,exp_data", | ||||
|     [ | ||||
|         ( | ||||
|             [200, 300, "", "NaN", 30000000000000000000], | ||||
|             [200, 300, np.nan, np.nan, 30000000000000000000], | ||||
|         ), | ||||
|         ( | ||||
|             ["12345678901234567890", "1234567890", "ITEM"], | ||||
|             [12345678901234567890, 1234567890, np.nan], | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_coerce_uint64_conflict(data, exp_data): | ||||
|     # see gh-17007 and gh-17125 | ||||
|     # | ||||
|     # Still returns float despite the uint64-nan conflict, | ||||
|     # which would normally force the casting to object. | ||||
|     result = to_numeric(Series(data), errors="coerce") | ||||
|     expected = Series(exp_data, dtype=float) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "errors,exp", | ||||
|     [ | ||||
|         ("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])), | ||||
|         ("raise", "Unable to parse string"), | ||||
|     ], | ||||
| ) | ||||
| @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
| def test_non_coerce_uint64_conflict(errors, exp): | ||||
|     # see gh-17007 and gh-17125 | ||||
|     # | ||||
|     # For completeness. | ||||
|     ser = Series(["12345678901234567890", "1234567890", "ITEM"]) | ||||
|  | ||||
|     if isinstance(exp, str): | ||||
|         with pytest.raises(ValueError, match=exp): | ||||
|             to_numeric(ser, errors=errors) | ||||
|     else: | ||||
|         result = to_numeric(ser, errors=errors) | ||||
|         tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) | ||||
| @pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) | ||||
| def test_downcast_empty(dc1, dc2): | ||||
|     # GH32493 | ||||
|  | ||||
|     tm.assert_numpy_array_equal( | ||||
|         to_numeric([], downcast=dc1), | ||||
|         to_numeric([], downcast=dc2), | ||||
|         check_dtype=False, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_failure_to_convert_uint64_string_to_NaN(): | ||||
|     # GH 32394 | ||||
|     result = to_numeric("uint64", errors="coerce") | ||||
|     assert np.isnan(result) | ||||
|  | ||||
|     ser = Series([32, 64, np.nan]) | ||||
|     result = to_numeric(Series(["32", "64", "uint64"]), errors="coerce") | ||||
|     tm.assert_series_equal(result, ser) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "strrep", | ||||
|     [ | ||||
|         "243.164", | ||||
|         "245.968", | ||||
|         "249.585", | ||||
|         "259.745", | ||||
|         "265.742", | ||||
|         "272.567", | ||||
|         "279.196", | ||||
|         "280.366", | ||||
|         "275.034", | ||||
|         "271.351", | ||||
|         "272.889", | ||||
|         "270.627", | ||||
|         "280.828", | ||||
|         "290.383", | ||||
|         "308.153", | ||||
|         "319.945", | ||||
|         "336.0", | ||||
|         "344.09", | ||||
|         "351.385", | ||||
|         "356.178", | ||||
|         "359.82", | ||||
|         "361.03", | ||||
|         "367.701", | ||||
|         "380.812", | ||||
|         "387.98", | ||||
|         "391.749", | ||||
|         "391.171", | ||||
|         "385.97", | ||||
|         "385.345", | ||||
|         "386.121", | ||||
|         "390.996", | ||||
|         "399.734", | ||||
|         "413.073", | ||||
|         "421.532", | ||||
|         "430.221", | ||||
|         "437.092", | ||||
|         "439.746", | ||||
|         "446.01", | ||||
|         "451.191", | ||||
|         "460.463", | ||||
|         "469.779", | ||||
|         "472.025", | ||||
|         "479.49", | ||||
|         "474.864", | ||||
|         "467.54", | ||||
|         "471.978", | ||||
|     ], | ||||
| ) | ||||
| def test_precision_float_conversion(strrep): | ||||
|     # GH 31364 | ||||
|     result = to_numeric(strrep) | ||||
|  | ||||
|     assert result == float(strrep) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "values, expected", | ||||
|     [ | ||||
|         (["1", "2", None], Series([1, 2, np.nan], dtype="Int64")), | ||||
|         (["1", "2", "3"], Series([1, 2, 3], dtype="Int64")), | ||||
|         (["1", "2", 3], Series([1, 2, 3], dtype="Int64")), | ||||
|         (["1", "2", 3.5], Series([1, 2, 3.5], dtype="Float64")), | ||||
|         (["1", None, 3.5], Series([1, np.nan, 3.5], dtype="Float64")), | ||||
|         (["1", "2", "3.5"], Series([1, 2, 3.5], dtype="Float64")), | ||||
|     ], | ||||
| ) | ||||
| def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected): | ||||
|     # https://github.com/pandas-dev/pandas/issues/37262 | ||||
|     s = Series(values, dtype=nullable_string_dtype) | ||||
|     result = to_numeric(s) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numeric_from_nullable_string_coerce(nullable_string_dtype): | ||||
|     # GH#52146 | ||||
|     values = ["a", "1"] | ||||
|     ser = Series(values, dtype=nullable_string_dtype) | ||||
|     result = to_numeric(ser, errors="coerce") | ||||
|     expected = Series([pd.NA, 1], dtype="Int64") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype): | ||||
|     # GH#52146 | ||||
|     values = ["a", "1"] | ||||
|     ser = Series(values, dtype=nullable_string_dtype) | ||||
|     expected = ser.copy() | ||||
|     msg = "errors='ignore' is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = to_numeric(ser, errors="ignore") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "data, input_dtype, downcast, expected_dtype", | ||||
|     ( | ||||
|         ([1, 1], "Int64", "integer", "Int8"), | ||||
|         ([1.0, pd.NA], "Float64", "integer", "Int8"), | ||||
|         ([1.0, 1.1], "Float64", "integer", "Float64"), | ||||
|         ([1, pd.NA], "Int64", "integer", "Int8"), | ||||
|         ([450, 300], "Int64", "integer", "Int16"), | ||||
|         ([1, 1], "Float64", "integer", "Int8"), | ||||
|         ([np.iinfo(np.int64).max - 1, 1], "Int64", "integer", "Int64"), | ||||
|         ([1, 1], "Int64", "signed", "Int8"), | ||||
|         ([1.0, 1.0], "Float32", "signed", "Int8"), | ||||
|         ([1.0, 1.1], "Float64", "signed", "Float64"), | ||||
|         ([1, pd.NA], "Int64", "signed", "Int8"), | ||||
|         ([450, -300], "Int64", "signed", "Int16"), | ||||
|         ([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"), | ||||
|         ([1, 1], "Int64", "unsigned", "UInt8"), | ||||
|         ([1.0, 1.0], "Float32", "unsigned", "UInt8"), | ||||
|         ([1.0, 1.1], "Float64", "unsigned", "Float64"), | ||||
|         ([1, pd.NA], "Int64", "unsigned", "UInt8"), | ||||
|         ([450, -300], "Int64", "unsigned", "Int64"), | ||||
|         ([-1, -1], "Int32", "unsigned", "Int32"), | ||||
|         ([1, 1], "Float64", "float", "Float32"), | ||||
|         ([1, 1.1], "Float64", "float", "Float32"), | ||||
|         ([1, 1], "Float32", "float", "Float32"), | ||||
|         ([1, 1.1], "Float32", "float", "Float32"), | ||||
|     ), | ||||
| ) | ||||
| def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype): | ||||
|     arr = pd.array(data, dtype=input_dtype) | ||||
|     result = to_numeric(arr, downcast=downcast) | ||||
|     expected = pd.array(data, dtype=expected_dtype) | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_downcast_nullable_mask_is_copied(): | ||||
|     # GH38974 | ||||
|  | ||||
|     arr = pd.array([1, 2, pd.NA], dtype="Int64") | ||||
|  | ||||
|     result = to_numeric(arr, downcast="integer") | ||||
|     expected = pd.array([1, 2, pd.NA], dtype="Int8") | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|     arr[1] = pd.NA  # should not modify result | ||||
|     tm.assert_extension_array_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numeric_scientific_notation(): | ||||
|     # GH 15898 | ||||
|     result = to_numeric("1.7e+308") | ||||
|     expected = np.float64(1.7e308) | ||||
|     assert result == expected | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("val", [9876543210.0, 2.0**128]) | ||||
| def test_to_numeric_large_float_not_downcast_to_float_32(val): | ||||
|     # GH 19729 | ||||
|     expected = Series([val]) | ||||
|     result = to_numeric(expected, downcast="float") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")] | ||||
| ) | ||||
| def test_to_numeric_dtype_backend(val, dtype): | ||||
|     # GH#50505 | ||||
|     ser = Series([val], dtype=object) | ||||
|     result = to_numeric(ser, dtype_backend="numpy_nullable") | ||||
|     expected = Series([val], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "val, dtype", | ||||
|     [ | ||||
|         (1, "Int64"), | ||||
|         (1.5, "Float64"), | ||||
|         (True, "boolean"), | ||||
|         (1, "int64[pyarrow]"), | ||||
|         (1.5, "float64[pyarrow]"), | ||||
|         (True, "bool[pyarrow]"), | ||||
|     ], | ||||
| ) | ||||
| def test_to_numeric_dtype_backend_na(val, dtype): | ||||
|     # GH#50505 | ||||
|     if "pyarrow" in dtype: | ||||
|         pytest.importorskip("pyarrow") | ||||
|         dtype_backend = "pyarrow" | ||||
|     else: | ||||
|         dtype_backend = "numpy_nullable" | ||||
|     ser = Series([val, None], dtype=object) | ||||
|     result = to_numeric(ser, dtype_backend=dtype_backend) | ||||
|     expected = Series([val, pd.NA], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "val, dtype, downcast", | ||||
|     [ | ||||
|         (1, "Int8", "integer"), | ||||
|         (1.5, "Float32", "float"), | ||||
|         (1, "Int8", "signed"), | ||||
|         (1, "int8[pyarrow]", "integer"), | ||||
|         (1.5, "float[pyarrow]", "float"), | ||||
|         (1, "int8[pyarrow]", "signed"), | ||||
|     ], | ||||
| ) | ||||
| def test_to_numeric_dtype_backend_downcasting(val, dtype, downcast): | ||||
|     # GH#50505 | ||||
|     if "pyarrow" in dtype: | ||||
|         pytest.importorskip("pyarrow") | ||||
|         dtype_backend = "pyarrow" | ||||
|     else: | ||||
|         dtype_backend = "numpy_nullable" | ||||
|     ser = Series([val, None], dtype=object) | ||||
|     result = to_numeric(ser, dtype_backend=dtype_backend, downcast=downcast) | ||||
|     expected = Series([val, pd.NA], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "smaller, dtype_backend", | ||||
|     [["UInt8", "numpy_nullable"], ["uint8[pyarrow]", "pyarrow"]], | ||||
| ) | ||||
| def test_to_numeric_dtype_backend_downcasting_uint(smaller, dtype_backend): | ||||
|     # GH#50505 | ||||
|     if dtype_backend == "pyarrow": | ||||
|         pytest.importorskip("pyarrow") | ||||
|     ser = Series([1, pd.NA], dtype="UInt64") | ||||
|     result = to_numeric(ser, dtype_backend=dtype_backend, downcast="unsigned") | ||||
|     expected = Series([1, pd.NA], dtype=smaller) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
|     "dtype", | ||||
|     [ | ||||
|         "Int64", | ||||
|         "UInt64", | ||||
|         "Float64", | ||||
|         "boolean", | ||||
|         "int64[pyarrow]", | ||||
|         "uint64[pyarrow]", | ||||
|         "float64[pyarrow]", | ||||
|         "bool[pyarrow]", | ||||
|     ], | ||||
| ) | ||||
| def test_to_numeric_dtype_backend_already_nullable(dtype): | ||||
|     # GH#50505 | ||||
|     if "pyarrow" in dtype: | ||||
|         pytest.importorskip("pyarrow") | ||||
|     ser = Series([1, pd.NA], dtype=dtype) | ||||
|     result = to_numeric(ser, dtype_backend="numpy_nullable") | ||||
|     expected = Series([1, pd.NA], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_to_numeric_dtype_backend_error(dtype_backend): | ||||
|     # GH#50505 | ||||
|     ser = Series(["a", "b", ""]) | ||||
|     expected = ser.copy() | ||||
|     with pytest.raises(ValueError, match="Unable to parse string"): | ||||
|         to_numeric(ser, dtype_backend=dtype_backend) | ||||
|  | ||||
|     msg = "errors='ignore' is deprecated" | ||||
|     with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|         result = to_numeric(ser, dtype_backend=dtype_backend, errors="ignore") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     result = to_numeric(ser, dtype_backend=dtype_backend, errors="coerce") | ||||
|     if dtype_backend == "pyarrow": | ||||
|         dtype = "double[pyarrow]" | ||||
|     else: | ||||
|         dtype = "Float64" | ||||
|     expected = Series([np.nan, np.nan, np.nan], dtype=dtype) | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| def test_invalid_dtype_backend(): | ||||
|     ser = Series([1, 2, 3]) | ||||
|     msg = ( | ||||
|         "dtype_backend numpy is invalid, only 'numpy_nullable' and " | ||||
|         "'pyarrow' are allowed." | ||||
|     ) | ||||
|     with pytest.raises(ValueError, match=msg): | ||||
|         to_numeric(ser, dtype_backend="numpy") | ||||
|  | ||||
|  | ||||
| def test_coerce_pyarrow_backend(): | ||||
|     # GH 52588 | ||||
|     pa = pytest.importorskip("pyarrow") | ||||
|     ser = Series(list("12x"), dtype=ArrowDtype(pa.string())) | ||||
|     result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow") | ||||
|     expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64())) | ||||
|     tm.assert_series_equal(result, expected) | ||||
| @ -0,0 +1,72 @@ | ||||
| from datetime import time | ||||
| import locale | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import PY311 | ||||
|  | ||||
| from pandas import Series | ||||
| import pandas._testing as tm | ||||
| from pandas.core.tools.times import to_time | ||||
|  | ||||
| # The tests marked with this are locale-dependent. | ||||
| # They pass, except when the machine locale is zh_CN or it_IT. | ||||
| fails_on_non_english = pytest.mark.xfail( | ||||
|     locale.getlocale()[0] in ("zh_CN", "it_IT"), | ||||
|     reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", | ||||
|     strict=False, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestToTime: | ||||
|     @pytest.mark.parametrize( | ||||
|         "time_string", | ||||
|         [ | ||||
|             "14:15", | ||||
|             "1415", | ||||
|             pytest.param("2:15pm", marks=fails_on_non_english), | ||||
|             pytest.param("0215pm", marks=fails_on_non_english), | ||||
|             "14:15:00", | ||||
|             "141500", | ||||
|             pytest.param("2:15:00pm", marks=fails_on_non_english), | ||||
|             pytest.param("021500pm", marks=fails_on_non_english), | ||||
|             time(14, 15), | ||||
|         ], | ||||
|     ) | ||||
|     def test_parsers_time(self, time_string): | ||||
|         # GH#11818 | ||||
|         assert to_time(time_string) == time(14, 15) | ||||
|  | ||||
|     def test_odd_format(self): | ||||
|         new_string = "14.15" | ||||
|         msg = r"Cannot convert arg \['14\.15'\] to a time" | ||||
|         if not PY311: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 to_time(new_string) | ||||
|         assert to_time(new_string, format="%H.%M") == time(14, 15) | ||||
|  | ||||
|     def test_arraylike(self): | ||||
|         arg = ["14:15", "20:20"] | ||||
|         expected_arr = [time(14, 15), time(20, 20)] | ||||
|         assert to_time(arg) == expected_arr | ||||
|         assert to_time(arg, format="%H:%M") == expected_arr | ||||
|         assert to_time(arg, infer_time_format=True) == expected_arr | ||||
|         assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] | ||||
|  | ||||
|         msg = "errors='ignore' is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             res = to_time(arg, format="%I:%M%p", errors="ignore") | ||||
|         tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) | ||||
|  | ||||
|         msg = "Cannot convert.+to a time with given format" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_time(arg, format="%I:%M%p", errors="raise") | ||||
|  | ||||
|         tm.assert_series_equal( | ||||
|             to_time(Series(arg, name="test")), Series(expected_arr, name="test") | ||||
|         ) | ||||
|  | ||||
|         res = to_time(np.array(arg)) | ||||
|         assert isinstance(res, list) | ||||
|         assert res == expected_arr | ||||
| @ -0,0 +1,340 @@ | ||||
| from datetime import ( | ||||
|     time, | ||||
|     timedelta, | ||||
| ) | ||||
|  | ||||
| import numpy as np | ||||
| import pytest | ||||
|  | ||||
| from pandas.compat import IS64 | ||||
| from pandas.errors import OutOfBoundsTimedelta | ||||
|  | ||||
| import pandas as pd | ||||
| from pandas import ( | ||||
|     Series, | ||||
|     TimedeltaIndex, | ||||
|     isna, | ||||
|     to_timedelta, | ||||
| ) | ||||
| import pandas._testing as tm | ||||
| from pandas.core.arrays import TimedeltaArray | ||||
|  | ||||
|  | ||||
| class TestTimedeltas: | ||||
|     def test_to_timedelta_dt64_raises(self): | ||||
|         # Passing datetime64-dtype data to TimedeltaIndex is no longer | ||||
|         #  supported GH#29794 | ||||
|         msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" | ||||
|  | ||||
|         ser = Series([pd.NaT]) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             to_timedelta(ser) | ||||
|         with pytest.raises(TypeError, match=msg): | ||||
|             ser.to_frame().apply(to_timedelta) | ||||
|  | ||||
|     @pytest.mark.parametrize("readonly", [True, False]) | ||||
|     def test_to_timedelta_readonly(self, readonly): | ||||
|         # GH#34857 | ||||
|         arr = np.array([], dtype=object) | ||||
|         if readonly: | ||||
|             arr.setflags(write=False) | ||||
|         result = to_timedelta(arr) | ||||
|         expected = to_timedelta([]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_null(self): | ||||
|         result = to_timedelta(["", ""]) | ||||
|         assert isna(result).all() | ||||
|  | ||||
|     def test_to_timedelta_same_np_timedelta64(self): | ||||
|         # pass thru | ||||
|         result = to_timedelta(np.array([np.timedelta64(1, "s")])) | ||||
|         expected = pd.Index(np.array([np.timedelta64(1, "s")])) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_series(self): | ||||
|         # Series | ||||
|         expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) | ||||
|         result = to_timedelta(Series(["1d", "1days 00:00:01"])) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_units(self): | ||||
|         # with units | ||||
|         result = TimedeltaIndex( | ||||
|             [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")] | ||||
|         ) | ||||
|         expected = to_timedelta([0, 10], unit="s") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "dtype, unit", | ||||
|         [ | ||||
|             ["int64", "s"], | ||||
|             ["int64", "m"], | ||||
|             ["int64", "h"], | ||||
|             ["timedelta64[s]", "s"], | ||||
|             ["timedelta64[D]", "D"], | ||||
|         ], | ||||
|     ) | ||||
|     def test_to_timedelta_units_dtypes(self, dtype, unit): | ||||
|         # arrays of various dtypes | ||||
|         arr = np.array([1] * 5, dtype=dtype) | ||||
|         result = to_timedelta(arr, unit=unit) | ||||
|         exp_dtype = "m8[ns]" if dtype == "int64" else "m8[s]" | ||||
|         expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5, dtype=exp_dtype) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_oob_non_nano(self): | ||||
|         arr = np.array([pd.NaT._value + 1], dtype="timedelta64[m]") | ||||
|  | ||||
|         msg = ( | ||||
|             "Cannot convert -9223372036854775807 minutes to " | ||||
|             r"timedelta64\[s\] without overflow" | ||||
|         ) | ||||
|         with pytest.raises(OutOfBoundsTimedelta, match=msg): | ||||
|             to_timedelta(arr) | ||||
|  | ||||
|         with pytest.raises(OutOfBoundsTimedelta, match=msg): | ||||
|             TimedeltaIndex(arr) | ||||
|  | ||||
|         with pytest.raises(OutOfBoundsTimedelta, match=msg): | ||||
|             TimedeltaArray._from_sequence(arr, dtype="m8[s]") | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))] | ||||
|     ) | ||||
|     @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) | ||||
|     @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") | ||||
|     def test_to_timedelta_dataframe(self, arg, errors): | ||||
|         # GH 11776 | ||||
|         with pytest.raises(TypeError, match="1-d array"): | ||||
|             to_timedelta(arg, errors=errors) | ||||
|  | ||||
|     def test_to_timedelta_invalid_errors(self): | ||||
|         # bad value for errors parameter | ||||
|         msg = "errors must be one of" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_timedelta(["foo"], errors="never") | ||||
|  | ||||
|     @pytest.mark.parametrize("arg", [[1, 2], 1]) | ||||
|     def test_to_timedelta_invalid_unit(self, arg): | ||||
|         # these will error | ||||
|         msg = "invalid unit abbreviation: foo" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_timedelta(arg, unit="foo") | ||||
|  | ||||
|     def test_to_timedelta_time(self): | ||||
|         # time not supported ATM | ||||
|         msg = ( | ||||
|             "Value must be Timedelta, string, integer, float, timedelta or convertible" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_timedelta(time(second=1)) | ||||
|         assert to_timedelta(time(second=1), errors="coerce") is pd.NaT | ||||
|  | ||||
|     def test_to_timedelta_bad_value(self): | ||||
|         msg = "Could not convert 'foo' to NumPy timedelta" | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_timedelta(["foo", "bar"]) | ||||
|  | ||||
|     def test_to_timedelta_bad_value_coerce(self): | ||||
|         tm.assert_index_equal( | ||||
|             TimedeltaIndex([pd.NaT, pd.NaT]), | ||||
|             to_timedelta(["foo", "bar"], errors="coerce"), | ||||
|         ) | ||||
|  | ||||
|         tm.assert_index_equal( | ||||
|             TimedeltaIndex(["1 day", pd.NaT, "1 min"]), | ||||
|             to_timedelta(["1 day", "bar", "1 min"], errors="coerce"), | ||||
|         ) | ||||
|  | ||||
|     def test_to_timedelta_invalid_errors_ignore(self): | ||||
|         # gh-13613: these should not error because errors='ignore' | ||||
|         msg = "errors='ignore' is deprecated" | ||||
|         invalid_data = "apple" | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = to_timedelta(invalid_data, errors="ignore") | ||||
|         assert invalid_data == result | ||||
|  | ||||
|         invalid_data = ["apple", "1 days"] | ||||
|         expected = np.array(invalid_data, dtype=object) | ||||
|  | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = to_timedelta(invalid_data, errors="ignore") | ||||
|         tm.assert_numpy_array_equal(expected, result) | ||||
|  | ||||
|         invalid_data = pd.Index(["apple", "1 days"]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = to_timedelta(invalid_data, errors="ignore") | ||||
|         tm.assert_index_equal(invalid_data, result) | ||||
|  | ||||
|         invalid_data = Series(["apple", "1 days"]) | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = to_timedelta(invalid_data, errors="ignore") | ||||
|         tm.assert_series_equal(invalid_data, result) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "val, errors", | ||||
|         [ | ||||
|             ("1M", True), | ||||
|             ("1 M", True), | ||||
|             ("1Y", True), | ||||
|             ("1 Y", True), | ||||
|             ("1y", True), | ||||
|             ("1 y", True), | ||||
|             ("1m", False), | ||||
|             ("1 m", False), | ||||
|             ("1 day", False), | ||||
|             ("2day", False), | ||||
|         ], | ||||
|     ) | ||||
|     def test_unambiguous_timedelta_values(self, val, errors): | ||||
|         # GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y' | ||||
|         # in pd.to_timedelta | ||||
|         msg = "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta" | ||||
|         if errors: | ||||
|             with pytest.raises(ValueError, match=msg): | ||||
|                 to_timedelta(val) | ||||
|         else: | ||||
|             # check it doesn't raise | ||||
|             to_timedelta(val) | ||||
|  | ||||
|     def test_to_timedelta_via_apply(self): | ||||
|         # GH 5458 | ||||
|         expected = Series([np.timedelta64(1, "s")]) | ||||
|         result = Series(["00:00:01"]).apply(to_timedelta) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|         result = Series([to_timedelta("00:00:01")]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_inference_without_warning(self): | ||||
|         # GH#41731 inference produces a warning in the Series constructor, | ||||
|         #  but _not_ in to_timedelta | ||||
|         vals = ["00:00:01", pd.NaT] | ||||
|         with tm.assert_produces_warning(None): | ||||
|             result = to_timedelta(vals) | ||||
|  | ||||
|         expected = TimedeltaIndex([pd.Timedelta(seconds=1), pd.NaT]) | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_on_missing_values(self): | ||||
|         # GH5438 | ||||
|         timedelta_NaT = np.timedelta64("NaT") | ||||
|  | ||||
|         actual = to_timedelta(Series(["00:00:01", np.nan])) | ||||
|         expected = Series( | ||||
|             [np.timedelta64(1000000000, "ns"), timedelta_NaT], | ||||
|             dtype=f"{tm.ENDIAN}m8[ns]", | ||||
|         ) | ||||
|         tm.assert_series_equal(actual, expected) | ||||
|  | ||||
|         ser = Series(["00:00:01", pd.NaT], dtype="m8[ns]") | ||||
|         actual = to_timedelta(ser) | ||||
|         tm.assert_series_equal(actual, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA]) | ||||
|     def test_to_timedelta_on_missing_values_scalar(self, val): | ||||
|         actual = to_timedelta(val) | ||||
|         assert actual._value == np.timedelta64("NaT").astype("int64") | ||||
|  | ||||
|     @pytest.mark.parametrize("val", [np.nan, pd.NaT, pd.NA]) | ||||
|     def test_to_timedelta_on_missing_values_list(self, val): | ||||
|         actual = to_timedelta([val]) | ||||
|         assert actual[0]._value == np.timedelta64("NaT").astype("int64") | ||||
|  | ||||
|     @pytest.mark.xfail(not IS64, reason="Floating point error") | ||||
|     def test_to_timedelta_float(self): | ||||
|         # https://github.com/pandas-dev/pandas/issues/25077 | ||||
|         arr = np.arange(0, 1, 1e-6)[-10:] | ||||
|         result = to_timedelta(arr, unit="s") | ||||
|         expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64") | ||||
|         tm.assert_numpy_array_equal(result.asi8, expected_asi8) | ||||
|  | ||||
|     def test_to_timedelta_coerce_strings_unit(self): | ||||
|         arr = np.array([1, 2, "error"], dtype=object) | ||||
|         result = to_timedelta(arr, unit="ns", errors="coerce") | ||||
|         expected = to_timedelta([1, 2, pd.NaT], unit="ns") | ||||
|         tm.assert_index_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_ignore_strings_unit(self): | ||||
|         arr = np.array([1, 2, "error"], dtype=object) | ||||
|         msg = "errors='ignore' is deprecated" | ||||
|         with tm.assert_produces_warning(FutureWarning, match=msg): | ||||
|             result = to_timedelta(arr, unit="ns", errors="ignore") | ||||
|         tm.assert_numpy_array_equal(result, arr) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         "expected_val, result_val", [[timedelta(days=2), 2], [None, None]] | ||||
|     ) | ||||
|     def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val): | ||||
|         # GH 35574 | ||||
|         expected = Series([timedelta(days=1), expected_val]) | ||||
|         result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days") | ||||
|  | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     @pytest.mark.parametrize( | ||||
|         ("input", "expected"), | ||||
|         [ | ||||
|             ("8:53:08.71800000001", "8:53:08.718"), | ||||
|             ("8:53:08.718001", "8:53:08.718001"), | ||||
|             ("8:53:08.7180000001", "8:53:08.7180000001"), | ||||
|             ("-8:53:08.71800000001", "-8:53:08.718"), | ||||
|             ("8:53:08.7180000089", "8:53:08.718000008"), | ||||
|         ], | ||||
|     ) | ||||
|     @pytest.mark.parametrize("func", [pd.Timedelta, to_timedelta]) | ||||
|     def test_to_timedelta_precision_over_nanos(self, input, expected, func): | ||||
|         # GH: 36738 | ||||
|         expected = pd.Timedelta(expected) | ||||
|         result = func(input) | ||||
|         assert result == expected | ||||
|  | ||||
|     def test_to_timedelta_zerodim(self, fixed_now_ts): | ||||
|         # ndarray.item() incorrectly returns int for dt64[ns] and td64[ns] | ||||
|         dt64 = fixed_now_ts.to_datetime64() | ||||
|         arg = np.array(dt64) | ||||
|  | ||||
|         msg = ( | ||||
|             "Value must be Timedelta, string, integer, float, timedelta " | ||||
|             "or convertible, not datetime64" | ||||
|         ) | ||||
|         with pytest.raises(ValueError, match=msg): | ||||
|             to_timedelta(arg) | ||||
|  | ||||
|         arg2 = arg.view("m8[ns]") | ||||
|         result = to_timedelta(arg2) | ||||
|         assert isinstance(result, pd.Timedelta) | ||||
|         assert result._value == dt64.view("i8") | ||||
|  | ||||
|     def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype): | ||||
|         # GH#48796 | ||||
|         ser = Series([1, pd.NA], dtype=any_numeric_ea_dtype) | ||||
|         result = to_timedelta(ser) | ||||
|         expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT]) | ||||
|         tm.assert_series_equal(result, expected) | ||||
|  | ||||
|     def test_to_timedelta_fraction(self): | ||||
|         result = to_timedelta(1.0 / 3, unit="h") | ||||
|         expected = pd.Timedelta("0 days 00:19:59.999999998") | ||||
|         assert result == expected | ||||
|  | ||||
|  | ||||
| def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): | ||||
|     # GH 52425 | ||||
|     pytest.importorskip("pyarrow") | ||||
|     ser = Series([1, 2], dtype=f"{any_numeric_ea_dtype.lower()}[pyarrow]") | ||||
|     result = to_timedelta(ser) | ||||
|     expected = Series([1, 2], dtype="timedelta64[ns]") | ||||
|     tm.assert_series_equal(result, expected) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize("unit", ["ns", "ms"]) | ||||
| def test_from_timedelta_arrow_dtype(unit): | ||||
|     # GH 54298 | ||||
|     pytest.importorskip("pyarrow") | ||||
|     expected = Series([timedelta(1)], dtype=f"duration[{unit}][pyarrow]") | ||||
|     result = to_timedelta(expected) | ||||
|     tm.assert_series_equal(result, expected) | ||||
		Reference in New Issue
	
	Block a user