This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,7 @@
"""
Test files dedicated to individual (stand-alone) Series methods
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
These may also present opportunities for sharing/de-duplicating test code.
"""

View File

@ -0,0 +1,41 @@
import pytest
from pandas import Index
import pandas._testing as tm
def test_add_prefix_suffix(string_series):
with_prefix = string_series.add_prefix("foo#")
expected = Index([f"foo#{c}" for c in string_series.index])
tm.assert_index_equal(with_prefix.index, expected)
with_suffix = string_series.add_suffix("#foo")
expected = Index([f"{c}#foo" for c in string_series.index])
tm.assert_index_equal(with_suffix.index, expected)
with_pct_prefix = string_series.add_prefix("%")
expected = Index([f"%{c}" for c in string_series.index])
tm.assert_index_equal(with_pct_prefix.index, expected)
with_pct_suffix = string_series.add_suffix("%")
expected = Index([f"{c}%" for c in string_series.index])
tm.assert_index_equal(with_pct_suffix.index, expected)
def test_add_prefix_suffix_axis(string_series):
# GH 47819
with_prefix = string_series.add_prefix("foo#", axis=0)
expected = Index([f"foo#{c}" for c in string_series.index])
tm.assert_index_equal(with_prefix.index, expected)
with_pct_suffix = string_series.add_suffix("#foo", axis=0)
expected = Index([f"{c}#foo" for c in string_series.index])
tm.assert_index_equal(with_pct_suffix.index, expected)
def test_add_prefix_suffix_invalid_axis(string_series):
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
string_series.add_prefix("foo#", axis=1)
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
string_series.add_suffix("foo#", axis=1)

View File

@ -0,0 +1,262 @@
from datetime import timezone
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
period_range,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("fill", [None, -1])
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
assert aa.name == "ts"
assert ea.name == "ts"
assert ab.name == "ts"
assert eb.name == "ts"
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("method", ["pad", "bfill"])
@pytest.mark.parametrize("limit", [None, 1])
def test_align_fill_method(
datetime_series, first_slice, second_slice, join_type, method, limit
):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
msg = (
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
"are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
msg2 = "Series.fillna with 'method' is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
def test_align_nocopy(datetime_series, using_copy_on_write):
b = datetime_series[:5].copy()
# do copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left")
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left", copy=False)
ra[:5] = 5
if using_copy_on_write:
assert not (a[:5] == 5).any()
else:
assert (a[:5] == 5).all()
# do copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right")
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right", copy=False)
rb[:2] = 5
if using_copy_on_write:
assert not (b[:2] == 5).any()
else:
assert (b[:2] == 5).all()
def test_align_same_index(datetime_series, using_copy_on_write):
a, b = datetime_series.align(datetime_series, copy=False)
if not using_copy_on_write:
assert a.index is datetime_series.index
assert b.index is datetime_series.index
else:
assert a.index.is_(datetime_series.index)
assert b.index.is_(datetime_series.index)
a, b = datetime_series.align(datetime_series, copy=True)
assert a.index is not datetime_series.index
assert b.index is not datetime_series.index
assert a.index.is_(datetime_series.index)
assert b.index.is_(datetime_series.index)
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product(
[range(2), range(3), range(2)], names=("a", "b", "c")
)
idx = pd.Index(range(2), name="b")
s1 = Series(np.arange(12, dtype="int64"), index=midx)
s2 = Series(np.arange(2, dtype="int64"), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join="left")
res2l, res2r = s2.align(s1, join="right")
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join="right")
res2l, res2r = s2.align(s1, join="left")
exp_idx = pd.MultiIndex.from_product(
[range(2), range(2), range(2)], names=("a", "b", "c")
)
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
def test_align_with_dataframe_method(method):
# GH31788
ser = Series(range(3), index=range(3))
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
msg = (
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
"are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result_ser, result_df = ser.align(df, method=method)
tm.assert_series_equal(result_ser, ser)
tm.assert_frame_equal(result_df, df)
def test_align_dt64tzindex_mismatched_tzs():
idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern")
ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1)
ser_central = ser.tz_convert("US/Central")
# different timezones convert to UTC
new1, new2 = ser.align(ser_central)
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc
def test_align_periodindex(join_type):
rng = period_range("1/1/2000", "1/1/2010", freq="Y")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
# TODO: assert something?
ts.align(ts[::2], join=join_type)
def test_align_stringindex(any_string_dtype):
left = Series(range(3), index=pd.Index(["a", "b", "d"], dtype=any_string_dtype))
right = Series(range(3), index=pd.Index(["a", "b", "c"], dtype=any_string_dtype))
result_left, result_right = left.align(right)
expected_idx = pd.Index(["a", "b", "c", "d"], dtype=any_string_dtype)
expected_left = Series([0, 1, np.nan, 2], index=expected_idx)
expected_right = Series([0, 1, 2, np.nan], index=expected_idx)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)
def test_align_left_fewer_levels():
# GH#45224
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
)
result_left, result_right = left.align(right)
expected_right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
)
expected_left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)
def test_align_left_different_named_levels():
# GH#45224
left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
)
right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
)
result_left, result_right = left.align(right)
expected_left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
expected_right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)

View File

@ -0,0 +1,84 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
isna,
)
import pandas._testing as tm
class TestSeriesArgsort:
def test_argsort_axis(self):
# GH#54257
ser = Series(range(3))
msg = "No axis named 2 for object type Series"
with pytest.raises(ValueError, match=msg):
ser.argsort(axis=2)
def test_argsort_numpy(self, datetime_series):
ser = datetime_series
res = np.argsort(ser).values
expected = np.argsort(np.array(ser))
tm.assert_numpy_array_equal(res, expected)
# with missing values
ts = ser.copy()
ts[::2] = np.nan
msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
result = np.argsort(ts)[1::2]
expected = np.argsort(np.array(ts.dropna()))
tm.assert_numpy_array_equal(result.values, expected)
def test_argsort(self, datetime_series):
argsorted = datetime_series.argsort()
assert issubclass(argsorted.dtype.type, np.integer)
def test_argsort_dt64(self, unit):
# GH#2967 (introduced bug in 0.11-dev I think)
ser = Series(
[Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]"
)
assert ser.dtype == f"datetime64[{unit}]"
shifted = ser.shift(-1)
assert shifted.dtype == f"datetime64[{unit}]"
assert isna(shifted[4])
result = ser.argsort()
expected = Series(range(5), dtype=np.intp)
tm.assert_series_equal(result, expected)
msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = shifted.argsort()
expected = Series(list(range(4)) + [-1], dtype=np.intp)
tm.assert_series_equal(result, expected)
def test_argsort_stable(self):
ser = Series(np.random.default_rng(2).integers(0, 100, size=10000))
mindexer = ser.argsort(kind="mergesort")
qindexer = ser.argsort()
mexpected = np.argsort(ser.values, kind="mergesort")
qexpected = np.argsort(ser.values, kind="quicksort")
tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
msg = (
r"ndarray Expected type <class 'numpy\.ndarray'>, "
r"found <class 'pandas\.core\.series\.Series'> instead"
)
with pytest.raises(AssertionError, match=msg):
tm.assert_numpy_array_equal(qindexer, mindexer)
def test_argsort_preserve_name(self, datetime_series):
result = datetime_series.argsort()
assert result.name == datetime_series.name

View File

@ -0,0 +1,205 @@
import numpy as np
import pytest
from pandas._libs.tslibs import IncompatibleFrequency
from pandas import (
DatetimeIndex,
PeriodIndex,
Series,
Timestamp,
date_range,
isna,
notna,
offsets,
period_range,
)
import pandas._testing as tm
class TestSeriesAsof:
def test_asof_nanosecond_index_access(self):
ts = Timestamp("20130101").as_unit("ns")._value
dti = DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(np.random.default_rng(2).standard_normal(100), index=dti)
first_value = ser.asof(ser.index[0])
# GH#46903 previously incorrectly was "day"
assert dti.resolution == "nanosecond"
# this used to not work bc parsing was done by dateutil that didn't
# handle nanoseconds
assert first_value == ser["2013-01-01 00:00:00.000000050"]
expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
assert first_value == ser[Timestamp(expected_ts)]
def test_basic(self):
# array or list or dates
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
val = result[result.index[result.index >= ub][0]]
assert ts[ub] == val
def test_scalar(self):
N = 30
rng = date_range("1/1/1990", periods=N, freq="53s")
# Explicit cast to float avoid implicit cast when setting nan
ts = Series(np.arange(N), index=rng, dtype="float")
ts.iloc[5:10] = np.nan
ts.iloc[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts.iloc[4]
assert val2 == ts.iloc[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts.iloc[4]
# in there
result = ts.asof(ts.index[3])
assert result == ts.iloc[3]
# no as of value
d = ts.index[0] - offsets.BDay()
assert np.isnan(ts.asof(d))
def test_with_nan(self):
# basic asof test
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
s = Series(np.arange(len(rng)), index=rng)
r = s.resample("2h").mean()
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[3:5] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[-3:] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
def test_periodindex(self):
# array or list or dates
N = 50
rng = period_range("1/1/1990", periods=N, freq="h")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
pix = PeriodIndex(result.index.values, freq="h")
mask = (pix >= lb) & (pix < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
ts.iloc[5:10] = np.nan
ts.iloc[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts.iloc[4]
assert val2 == ts.iloc[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts.iloc[4]
# in there
assert ts.asof(ts.index[3]) == ts.iloc[3]
# no as of value
d = ts.index[0].to_timestamp() - offsets.BDay()
assert isna(ts.asof(d))
# Mismatched freq
msg = "Input has different freq"
with pytest.raises(IncompatibleFrequency, match=msg):
ts.asof(rng.asfreq("D"))
def test_errors(self):
s = Series(
[1, 2, 3],
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
)
# non-monotonic
assert not s.index.is_monotonic_increasing
with pytest.raises(ValueError, match="requires a sorted index"):
s.asof(s.index[0])
# subset with Series
N = 10
rng = date_range("1/1/1990", periods=N, freq="53s")
s = Series(np.random.default_rng(2).standard_normal(N), index=rng)
with pytest.raises(ValueError, match="not valid for Series"):
s.asof(s.index[0], subset="foo")
def test_all_nans(self):
# GH 15713
# series is all nans
# testing non-default indexes
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)
# testing scalar input
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
result = Series(np.nan, index=rng).asof(date)
assert isna(result)
# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
expected = Series(np.nan, index=[4, 5], name="test")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,689 @@
from datetime import (
datetime,
timedelta,
)
from importlib import reload
import string
import sys
import numpy as np
import pytest
from pandas._libs.tslibs import iNaT
import pandas.util._test_decorators as td
from pandas import (
NA,
Categorical,
CategoricalDtype,
DatetimeTZDtype,
Index,
Interval,
NaT,
Series,
Timedelta,
Timestamp,
cut,
date_range,
to_datetime,
)
import pandas._testing as tm
def rand_str(nchars: int) -> str:
"""
Generate one random byte string.
"""
RANDS_CHARS = np.array(
list(string.ascii_letters + string.digits), dtype=(np.str_, 1)
)
return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
class TestAstypeAPI:
def test_astype_unitless_dt64_raises(self):
# GH#47844
ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]")
df = ser.to_frame()
msg = "Casting to unit-less dtype 'datetime64' is not supported"
with pytest.raises(TypeError, match=msg):
ser.astype(np.datetime64)
with pytest.raises(TypeError, match=msg):
df.astype(np.datetime64)
with pytest.raises(TypeError, match=msg):
ser.astype("datetime64")
with pytest.raises(TypeError, match=msg):
df.astype("datetime64")
def test_arg_for_errors_in_astype(self):
# see GH#14878
ser = Series([1, 2, 3])
msg = (
r"Expected value of kwarg 'errors' to be one of \['raise', "
r"'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
ser.astype(np.float64, errors=False)
ser.astype(np.int8, errors="raise")
@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see GH#7271
ser = Series(range(0, 10, 2), name="abc")
dt1 = dtype_class({"abc": str})
result = ser.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype="str")
tm.assert_series_equal(result, expected)
dt2 = dtype_class({"abc": "float64"})
result = ser.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)
dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype "
r"mappings\."
)
with pytest.raises(KeyError, match=msg):
ser.astype(dt3)
dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
ser.astype(dt4)
# GH#16717
# if dtypes provided is empty, it should error
if dtype_class is Series:
dt5 = dtype_class({}, dtype=object)
else:
dt5 = dtype_class({})
with pytest.raises(KeyError, match=msg):
ser.astype(dt5)
class TestAstype:
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_astype_object_to_dt64_non_nano(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
ser = Series(vals, dtype=object)
result = ser.astype(dtype)
# The 2500 is interpreted as microseconds, consistent with what
# we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
# and concated the results.
pointwise = [
vals[0].tz_localize(tz),
Timestamp(vals[1], tz=tz),
to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
]
exp_vals = [x.as_unit("us").asm8 for x in pointwise]
exp_arr = np.array(exp_vals, dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]")
if tz is not None:
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
tm.assert_series_equal(result, expected)
def test_astype_mixed_object_to_dt64tz(self):
# pre-2.0 this raised ValueError bc of tz mismatch
# xref GH#32581
ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific")
ts2 = ts.tz_convert("Asia/Tokyo")
ser = Series([ts, ts2], dtype=object)
res = ser.astype("datetime64[ns, Europe/Brussels]")
expected = Series(
[ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")],
dtype="datetime64[ns, Europe/Brussels]",
)
tm.assert_series_equal(res, expected)
@pytest.mark.parametrize("dtype", np.typecodes["All"])
def test_astype_empty_constructor_equality(self, dtype):
# see GH#15524
if dtype not in (
"S",
"V", # poor support (if any) currently
"M",
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)
@pytest.mark.parametrize("dtype", [str, np.str_])
@pytest.mark.parametrize(
"series",
[
Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
],
)
def test_astype_str_map(self, dtype, series, using_infer_string):
# see GH#4405
using_string_dtype = using_infer_string and dtype is str
result = series.astype(dtype)
if using_string_dtype:
expected = series.map(lambda val: str(val) if val is not np.nan else np.nan)
else:
expected = series.map(str)
if using_infer_string:
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
def test_astype_float_to_period(self):
result = Series([np.nan]).astype("period[D]")
expected = Series([NaT], dtype="period[D]")
tm.assert_series_equal(result, expected)
def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = Series([1, 2], dtype="int64")
# Don't have NumpyEADtype in the public API, so we use `.array.dtype`,
# which is a NumpyEADtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
# see GH#15524, GH#15987
data = [1]
ser = Series(data)
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
request.applymarker(mark)
msg = (
rf"The '{dtype.__name__}' dtype has no unit\. "
rf"Please pass in '{dtype.__name__}\[ns\]' instead."
)
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
def test_astype_dt64_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti = date_range("2012-01-01", periods=3)
result = Series(dti).astype(str)
expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype="str")
tm.assert_series_equal(result, expected)
def test_astype_dt64tz_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
result = Series(dti_tz).astype(str)
expected = Series(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
dtype="str",
)
tm.assert_series_equal(result, expected)
def test_astype_datetime(self, unit):
ser = Series(iNaT, dtype=f"M8[{unit}]", index=range(5))
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series([datetime(2001, 1, 2, 0, 0)])
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series(
[datetime(2001, 1, 2, 0, 0) for i in range(3)], dtype=f"M8[{unit}]"
)
ser[1] = np.nan
assert ser.dtype == f"M8[{unit}]"
ser = ser.astype("O")
assert ser.dtype == np.object_
def test_astype_datetime64tz(self):
ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
# astype
result = ser.astype(object)
expected = Series(ser.astype(object), dtype=object)
tm.assert_series_equal(result, expected)
result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz)
tm.assert_series_equal(result, ser)
# astype - object, preserves on construction
result = Series(ser.astype(object))
expected = ser.astype(object)
tm.assert_series_equal(result, expected)
# astype - datetime64[ns, tz]
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype(ser.dtype)
result = ser.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)
def test_astype_str_cast_dt64(self):
# see GH#9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
res = ts.astype(str)
expected = Series(["2010-01-04"], dtype="str")
tm.assert_series_equal(res, expected)
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
res = ts.astype(str)
expected = Series(["2010-01-04 00:00:00-05:00"], dtype="str")
tm.assert_series_equal(res, expected)
def test_astype_str_cast_td64(self):
# see GH#9757
td = Series([Timedelta(1, unit="d")])
ser = td.astype(str)
expected = Series(["1 days"], dtype="str")
tm.assert_series_equal(ser, expected)
def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_
def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_
@pytest.mark.parametrize(
"data, dtype",
[
(["x", "y", "z"], "string[python]"),
pytest.param(
["x", "y", "z"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
(["x", "y", "z"], "category"),
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
(3 * [Interval(0, 1)], None),
],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
# https://github.com/pandas-dev/pandas/issues/35471
ser = Series(data, dtype=dtype)
if errors == "ignore":
expected = ser
result = ser.astype(float, errors="ignore")
tm.assert_series_equal(result, expected)
else:
msg = "(Cannot cast)|(could not convert)"
with pytest.raises((ValueError, TypeError), match=msg):
ser.astype(float, errors=errors)
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
def test_astype_from_float_to_str(self, dtype):
# https://github.com/pandas-dev/pandas/issues/36451
ser = Series([0.1], dtype=dtype)
result = ser.astype(str)
expected = Series(["0.1"], dtype="str")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"value, string_value",
[
(None, "None"),
(np.nan, "nan"),
(NA, "<NA>"),
],
)
def test_astype_to_str_preserves_na(self, value, string_value, using_infer_string):
# https://github.com/pandas-dev/pandas/issues/36904
ser = Series(["a", "b", value], dtype=object)
result = ser.astype(str)
expected = Series(
["a", "b", None if using_infer_string else string_value], dtype="str"
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
ser = Series(np.random.default_rng(2).standard_normal(5), name="foo")
as_typed = ser.astype(dtype)
assert as_typed.dtype == dtype
assert as_typed.name == ser.name
@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
ser = Series([value])
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)
def test_astype_float_to_uint_negatives_raise(
self, float_numpy_dtype, any_unsigned_int_numpy_dtype
):
# GH#45151 We don't cast negative numbers to nonsense values
# TODO: same for EA float/uint dtypes, signed integers?
arr = np.arange(5).astype(float_numpy_dtype) - 3 # includes negatives
ser = Series(arr)
msg = "Cannot losslessly cast from .* to .*"
with pytest.raises(ValueError, match=msg):
ser.astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
ser.to_frame().astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
# We currently catch and re-raise in Index.astype
Index(ser).astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
ser.array.astype(any_unsigned_int_numpy_dtype)
def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)
tm.assert_series_equal(result, Series(np.arange(1, 5)))
def test_astype_unicode(self, using_infer_string):
# see GH#7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]
former_encoding = None
if sys.getdefaultencoding() == "utf-8":
# GH#45326 as of 2.0 Series.astype matches Index.astype by handling
# bytes with obj.decode() instead of str(obj)
item = "野菜食べないとやばい"
ser = Series([item.encode()])
result = ser.astype(np.str_)
expected = Series([item], dtype=object)
tm.assert_series_equal(result, expected)
for ser in test_series:
res = ser.astype(np.str_)
expec = ser.map(str)
if using_infer_string:
expec = expec.astype(object)
tm.assert_series_equal(res, expec)
# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)
def test_astype_bytes(self):
# GH#39474
result = Series(["foo", "bar", "baz"]).astype(bytes)
assert result.dtypes == np.dtype("S3")
def test_astype_nan_to_bool(self):
# GH#43018
ser = Series(np.nan, dtype="object")
result = ser.astype("bool")
expected = Series(True, dtype="bool")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES,
)
def test_astype_ea_to_datetimetzdtype(self, dtype):
# GH37553
ser = Series([4, 0, 9], dtype=dtype)
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
expected = Series(
{
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"),
2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"),
}
)
tm.assert_series_equal(result, expected)
def test_astype_retain_attrs(self, any_numpy_dtype):
# GH#44414
ser = Series([0, 1, 2, 3])
ser.attrs["Location"] = "Michigan"
result = ser.astype(any_numpy_dtype).attrs
expected = ser.attrs
tm.assert_dict_equal(expected, result)
class TestAstypeString:
@pytest.mark.parametrize(
"data, dtype",
[
([True, NA], "boolean"),
(["A", NA], "category"),
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
(
["2012-01-01 00:00:00-05:00", NaT],
"datetime64[ns, US/Eastern]",
),
([1, None], "UInt16"),
(["1/1/2021", "2/1/2021"], "period[M]"),
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
# currently no way to parse IntervalArray from a list of strings
],
)
def test_astype_string_to_extension_dtype_roundtrip(
self, data, dtype, request, nullable_string_dtype
):
if dtype == "boolean":
mark = pytest.mark.xfail(
reason="TODO StringArray.astype() with missing values #GH40566"
)
request.applymarker(mark)
# GH-40351
ser = Series(data, dtype=dtype)
# Note: just passing .astype(dtype) fails for dtype="category"
# with bc ser.dtype.categories will be object dtype whereas
# result.dtype.categories will have string dtype
result = ser.astype(nullable_string_dtype).astype(ser.dtype)
tm.assert_series_equal(result, ser)
class TestAstypeCategorical:
def test_astype_categorical_to_other(self):
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
expected = ser
tm.assert_series_equal(ser.astype("category"), expected)
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
msg = r"Cannot cast object|str dtype to float64"
with pytest.raises(ValueError, match=msg):
ser.astype("float64")
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype="str")
tm.assert_series_equal(cat.astype("str"), exp)
s2 = Series(Categorical(["1", "2", "3", "4"]))
exp2 = Series([1, 2, 3, 4]).astype("int")
tm.assert_series_equal(s2.astype("int"), exp2)
# object don't sort correctly, so just compare that we have the same
# values
def cmp(a, b):
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
expected = Series(np.array(ser.values), name="value_group")
cmp(ser.astype("object"), expected)
cmp(ser.astype(np.object_), expected)
# array conversion
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
tm.assert_series_equal(ser.astype("category"), ser)
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
roundtrip_expected = ser.cat.set_categories(
ser.cat.categories.sort_values()
).cat.remove_unused_categories()
result = ser.astype("object").astype("category")
tm.assert_series_equal(result, roundtrip_expected)
result = ser.astype("object").astype(CategoricalDtype())
tm.assert_series_equal(result, roundtrip_expected)
def test_astype_categorical_invalid_conversions(self):
# invalid conversion (these are NOT a dtype)
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
msg = (
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
"not understood"
)
with pytest.raises(TypeError, match=msg):
ser.astype(Categorical)
with pytest.raises(TypeError, match=msg):
ser.astype("object").astype(Categorical)
def test_astype_categoricaldtype(self):
ser = Series(["a", "b", "a"])
result = ser.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
ser = Series(s_data, dtype=s_dtype, name=name)
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = ser.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = ser.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
if dtype_ordered is False:
# not specifying ordered, so only test once
expected = ser
result = ser.astype("category")
tm.assert_series_equal(result, expected)
def test_astype_bool_missing_to_categorical(self):
# GH-19182
ser = Series([True, False, np.nan])
assert ser.dtypes == np.object_
result = ser.astype(CategoricalDtype(categories=[True, False]))
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
tm.assert_series_equal(result, expected)
def test_astype_categories_raises(self):
# deprecated GH#17636, removed in GH#27141
ser = Series(["a", "b", "a"])
with pytest.raises(TypeError, match="got an unexpected"):
ser.astype("category", categories=["a", "b"], ordered=True)
@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
def test_astype_from_categorical(self, items):
ser = Series(items)
exp = Series(Categorical(items))
res = ser.astype("category")
tm.assert_series_equal(res, exp)
def test_astype_from_categorical_with_keywords(self):
# with keywords
lst = ["a", "b", "c", "a"]
ser = Series(lst)
exp = Series(Categorical(lst, ordered=True))
res = ser.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True))
tm.assert_series_equal(res, exp)
def test_astype_timedelta64_with_np_nan(self):
# GH45798
result = Series([Timedelta(1), np.nan], dtype="timedelta64[ns]")
expected = Series([Timedelta(1), NaT], dtype="timedelta64[ns]")
tm.assert_series_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_astype_int_na_string(self):
# GH#57418
ser = Series([12, NA], dtype="Int64[pyarrow]")
result = ser.astype("string[pyarrow]")
expected = Series(["12", NA], dtype="string[pyarrow]")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,30 @@
import numpy as np
class TestAutoCorr:
def test_autocorr(self, datetime_series):
# Just run the function
corr1 = datetime_series.autocorr()
# Now run it with the lag parameter
corr2 = datetime_series.autocorr(lag=1)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2
# Choose a random lag between 1 and length of Series - 2
# and compare the result with the Series corr() function
n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2))
corr1 = datetime_series.corr(datetime_series.shift(n))
corr2 = datetime_series.autocorr(lag=n)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2

View File

@ -0,0 +1,75 @@
import numpy as np
import pytest
from pandas import (
Series,
bdate_range,
date_range,
period_range,
)
import pandas._testing as tm
class TestBetween:
def test_between(self):
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right)
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
def test_between_datetime_object_dtype(self):
ser = Series(bdate_range("1/1/2000", periods=20), dtype=object)
ser[::2] = np.nan
result = ser[ser.between(ser[3], ser[17])]
expected = ser[3:18].dropna()
tm.assert_series_equal(result, expected)
result = ser[ser.between(ser[3], ser[17], inclusive="neither")]
expected = ser[5:16].dropna()
tm.assert_series_equal(result, expected)
def test_between_period_values(self):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
left, right = ser[[2, 7]]
result = ser.between(left, right)
expected = (ser >= left) & (ser <= right)
tm.assert_series_equal(result, expected)
def test_between_inclusive_string(self):
# GH 40628
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right, inclusive="both")
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="left")
expected = (series >= left) & (series < right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="right")
expected = (series > left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="neither")
expected = (series > left) & (series < right)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("inclusive", ["yes", True, False])
def test_between_error_args(self, inclusive):
# GH 40628
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
value_error_msg = (
"Inclusive has to be either string of 'both',"
"'left', 'right', or 'neither'."
)
with pytest.raises(ValueError, match=value_error_msg):
series = Series(date_range("1/1/2000", periods=10))
series.between(left, right, inclusive=inclusive)

View File

@ -0,0 +1,148 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
array as pd_array,
date_range,
)
import pandas._testing as tm
@pytest.fixture
def df():
"""
base dataframe for testing
"""
return DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
def test_case_when_caselist_is_not_a_list(df):
"""
Raise ValueError if caselist is not a list.
"""
msg = "The caselist argument should be a list; "
msg += "instead got.+"
with pytest.raises(TypeError, match=msg): # GH39154
df["a"].case_when(caselist=())
def test_case_when_no_caselist(df):
"""
Raise ValueError if no caselist is provided.
"""
msg = "provide at least one boolean condition, "
msg += "with a corresponding replacement."
with pytest.raises(ValueError, match=msg): # GH39154
df["a"].case_when([])
def test_case_when_odd_caselist(df):
"""
Raise ValueError if no of caselist is odd.
"""
msg = "Argument 0 must have length 2; "
msg += "a condition and replacement; instead got length 3."
with pytest.raises(ValueError, match=msg):
df["a"].case_when([(df["a"].eq(1), 1, df.a.gt(1))])
def test_case_when_raise_error_from_mask(df):
"""
Raise Error from within Series.mask
"""
msg = "Failed to apply condition0 and replacement0."
with pytest.raises(ValueError, match=msg):
df["a"].case_when([(df["a"].eq(1), [1, 2])])
def test_case_when_single_condition(df):
"""
Test output on a single condition.
"""
result = Series([np.nan, np.nan, np.nan]).case_when([(df.a.eq(1), 1)])
expected = Series([1, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions(df):
"""
Test output when booleans are derived from a computation
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[(df.a.eq(1), 1), (Series([False, True, False]), 2)]
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_list(df):
"""
Test output when replacement is a list
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[([True, False, False], 1), (df["a"].gt(1) & df["b"].eq(5), [1, 2, 3])]
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_extension_dtype(df):
"""
Test output when replacement has an extension dtype
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[
([True, False, False], 1),
(df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")),
],
)
expected = Series([1, 2, np.nan], dtype="Float64")
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_series(df):
"""
Test output when replacement is a Series
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[
(np.array([True, False, False]), 1),
(df["a"].gt(1) & df["b"].eq(5), Series([1, 2, 3])),
],
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_non_range_index():
"""
Test output if index is not RangeIndex
"""
rng = np.random.default_rng(seed=123)
dates = date_range("1/1/2000", periods=8)
df = DataFrame(
rng.standard_normal(size=(8, 4)), index=dates, columns=["A", "B", "C", "D"]
)
result = Series(5, index=df.index, name="A").case_when([(df.A.gt(0), df.B)])
expected = df.A.mask(df.A.gt(0), df.B).where(df.A.gt(0), 5)
tm.assert_series_equal(result, expected)
def test_case_when_callable():
"""
Test output on a callable
"""
# https://numpy.org/doc/stable/reference/generated/numpy.piecewise.html
x = np.linspace(-2.5, 2.5, 6)
ser = Series(x)
result = ser.case_when(
caselist=[
(lambda df: df < 0, lambda df: -df),
(lambda df: df >= 0, lambda df: df),
]
)
expected = np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x])
tm.assert_series_equal(result, Series(expected))

View File

@ -0,0 +1,146 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
isna,
notna,
)
import pandas._testing as tm
class TestSeriesClip:
def test_clip(self, datetime_series):
val = datetime_series.median()
assert datetime_series.clip(lower=val).min() == val
assert datetime_series.clip(upper=val).max() == val
result = datetime_series.clip(-0.5, 0.5)
expected = np.clip(datetime_series, -0.5, 0.5)
tm.assert_series_equal(result, expected)
assert isinstance(expected, Series)
def test_clip_types_and_nulls(self):
sers = [
Series([np.nan, 1.0, 2.0, 3.0]),
Series([None, "a", "b", "c"]),
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
]
for s in sers:
thresh = s[2]
lower = s.clip(lower=thresh)
upper = s.clip(upper=thresh)
assert lower[notna(lower)].min() == thresh
assert upper[notna(upper)].max() == thresh
assert list(isna(s)) == list(isna(lower))
assert list(isna(s)) == list(isna(upper))
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
# Ensure that clipping method can handle NA values with out failing
# GH#40581
if nulls_fixture is pd.NaT:
# constructor will raise, see
# test_constructor_mismatched_null_nullable_dtype
pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
s_clipped_upper = ser.clip(upper=2.0)
s_clipped_lower = ser.clip(lower=2.0)
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
tm.assert_series_equal(s_clipped_upper, expected_upper)
tm.assert_series_equal(s_clipped_lower, expected_lower)
def test_clip_with_na_args(self):
"""Should process np.nan argument as None"""
# GH#17276
s = Series([1, 2, 3])
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
# GH#19992
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
# TODO: avoid this warning here? seems like we should never be upcasting
# in the first place?
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(lower=[0, 4, np.nan])
tm.assert_series_equal(res, Series([1, 4, 3]))
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(upper=[1, np.nan, 1])
tm.assert_series_equal(res, Series([1, 2, 1]))
# GH#40420
s = Series([1, 2, 3])
result = s.clip(0, [np.nan, np.nan, np.nan])
tm.assert_series_equal(s, result)
def test_clip_against_series(self):
# GH#6966
s = Series([1.0, 1.0, 4.0])
lower = Series([1.0, 2.0, 3.0])
upper = Series([1.5, 2.5, 3.5])
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH#15390
original = Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = Series([1, 2, 3])
if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)
def test_clip_with_datetimes(self):
# GH#11838
# naive and tz-aware datetimes
t = Timestamp("2015-12-01 09:30:30")
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
result = s.clip(upper=t)
expected = Series(
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
)
tm.assert_series_equal(result, expected)
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
s = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
]
)
result = s.clip(upper=t)
expected = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
# GH-42794
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,17 @@
from pandas import Series
import pandas._testing as tm
class TestCombine:
def test_combine_scalar(self):
# GH#21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
ser = Series([i * 10 for i in range(5)])
result = ser.combine(3, lambda x, y: x + y)
expected = Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)
result = ser.combine(22, lambda x, y: min(x, y))
expected = Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,150 @@
from datetime import datetime
import numpy as np
import pandas as pd
from pandas import (
Period,
Series,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm
class TestCombineFirst:
def test_combine_first_period_datetime(self):
# GH#3367
didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME")
pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M")
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = Series([9, 9, 9, 9, 9, 9, 9], index=idx)
result = a.combine_first(b)
expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
tm.assert_series_equal(result, expected)
def test_combine_first_name(self, datetime_series):
result = datetime_series.combine_first(datetime_series[:5])
assert result.name == datetime_series.name
def test_combine_first(self, using_infer_string):
values = np.arange(20, dtype=np.float64)
series = Series(values, index=np.arange(20, dtype=np.int64))
series_copy = series * 2
series_copy[::2] = np.nan
# nothing used from the input
combined = series.combine_first(series_copy)
tm.assert_series_equal(combined, series)
# Holes filled from input
combined = series_copy.combine_first(series)
assert np.isfinite(combined).all()
tm.assert_series_equal(combined[::2], series[::2])
tm.assert_series_equal(combined[1::2], series_copy[1::2])
# mixed types
index = pd.Index([str(i) for i in range(20)])
floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
strings = Series([str(i) for i in range(10)], index=index[::2], dtype=object)
combined = strings.combine_first(floats)
tm.assert_series_equal(strings, combined.loc[index[::2]])
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
# corner case
ser = Series([1.0, 2, 3], index=[0, 1, 2])
empty = Series([], index=[], dtype=object)
msg = "The behavior of array concatenation with empty entries is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.combine_first(empty)
if not using_infer_string:
ser.index = ser.index.astype("O")
tm.assert_series_equal(ser, result)
def test_combine_first_dt64(self, unit):
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
s1 = to_datetime(Series([np.nan, "2011"])).dt.as_unit(unit)
rs = s0.combine_first(s1)
xp = to_datetime(Series(["2010", "2011"])).dt.as_unit(unit)
tm.assert_series_equal(rs, xp)
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
s1 = Series([np.nan, "2011"])
rs = s0.combine_first(s1)
xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]")
tm.assert_series_equal(rs, xp)
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
ser1 = Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
name="ser1",
)
ser2 = Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
index=[2, 3, 4],
name="ser2",
)
result = ser1.combine_first(ser2)
exp_vals = pd.DatetimeIndex(
["20150101", "20150102", "20150103", "20160515", "20160516"],
tz=tz_naive_fixture,
)
exp = Series(exp_vals, name="ser1")
tm.assert_series_equal(exp, result)
def test_combine_first_timezone_series_with_empty_series(self):
# GH 41800
time_index = date_range(
datetime(2021, 1, 1, 1),
datetime(2021, 1, 1, 10),
freq="h",
tz="Europe/Rome",
)
s1 = Series(range(10), index=time_index)
s2 = Series(index=time_index)
msg = "The behavior of array concatenation with empty entries is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s1.combine_first(s2)
tm.assert_series_equal(result, s1)
def test_combine_first_preserves_dtype(self):
# GH51764
s1 = Series([1666880195890293744, 1666880195890293837])
s2 = Series([1, 2, 3])
result = s1.combine_first(s2)
expected = Series([1666880195890293744, 1666880195890293837, 3])
tm.assert_series_equal(result, expected)
def test_combine_mixed_timezone(self):
# GH 26283
uniform_tz = Series({pd.Timestamp("2019-05-01", tz="UTC"): 1.0})
multi_tz = Series(
{
pd.Timestamp("2019-05-01 01:00:00+0100", tz="Europe/London"): 2.0,
pd.Timestamp("2019-05-02", tz="UTC"): 3.0,
}
)
result = uniform_tz.combine_first(multi_tz)
expected = Series(
[1.0, 3.0],
index=pd.Index(
[
pd.Timestamp("2019-05-01 00:00:00+00:00", tz="UTC"),
pd.Timestamp("2019-05-02 00:00:00+00:00", tz="UTC"),
],
dtype="object",
),
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,141 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
def test_compare_axis(align_axis):
# GH#30429
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, align_axis=align_axis)
if align_axis in (1, "columns"):
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
else:
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep_shape, keep_equal",
[
(True, False),
(False, True),
(True, True),
# False, False case is already covered in test_compare_axis
],
)
def test_compare_various_formats(keep_shape, keep_equal):
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
if keep_shape:
indices = pd.Index([0, 1, 2])
columns = pd.Index(["self", "other"])
if keep_equal:
expected = pd.DataFrame(
[["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
)
else:
expected = pd.DataFrame(
[["a", "x"], [np.nan, np.nan], ["c", "z"]],
index=indices,
columns=columns,
)
else:
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
def test_compare_with_equal_nulls():
# We want to make sure two NaNs are considered the same
# and dropped where applicable
s1 = pd.Series(["a", "b", np.nan])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2)
expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
tm.assert_frame_equal(result, expected)
def test_compare_with_non_equal_nulls():
# We want to make sure the relevant NaNs do not get dropped
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", np.nan], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_multi_index():
index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
s1 = pd.Series(["a", "b", "c"], index=index)
s2 = pd.Series(["x", "b", "z"], index=index)
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
)
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_unaligned_objects():
# test Series with different indices
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
ser1.compare(ser2)
# test Series with different lengths
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3])
ser2 = pd.Series([1, 2, 3, 4])
ser1.compare(ser2)
def test_compare_datetime64_and_string():
# Issue https://github.com/pandas-dev/pandas/issues/45506
# Catch OverflowError when comparing datetime64 and string
data = [
{"a": "2015-07-01", "b": "08335394550"},
{"a": "2015-07-02", "b": "+49 (0) 0345 300033"},
{"a": "2015-07-03", "b": "+49(0)2598 04457"},
{"a": "2015-07-04", "b": "0741470003"},
{"a": "2015-07-05", "b": "04181 83668"},
]
dtypes = {"a": "datetime64[ns]", "b": "string"}
df = pd.DataFrame(data=data).astype(dtypes)
result_eq1 = df["a"].eq(df["b"])
result_eq2 = df["a"] == df["b"]
result_neq = df["a"] != df["b"]
expected_eq = pd.Series([False] * 5) # For .eq and ==
expected_neq = pd.Series([True] * 5) # For !=
tm.assert_series_equal(result_eq1, expected_eq)
tm.assert_series_equal(result_eq2, expected_eq)
tm.assert_series_equal(result_neq, expected_neq)

View File

@ -0,0 +1,309 @@
from itertools import product
import numpy as np
import pytest
from pandas._config import using_string_dtype
from pandas._libs import lib
import pandas as pd
import pandas._testing as tm
# Each test case consists of a tuple with the data and dtype to create the
# test Series, the default dtype for the expected result (which is valid
# for most cases), and the specific cases where the result deviates from
# this default. Those overrides are defined as a dict with (keyword, val) as
# dictionary key. In case of multiple items, the last override takes precedence.
@pytest.fixture(
params=[
(
# data
[1, 2, 3],
# original dtype
np.dtype("int32"),
# default expected dtype
"Int32",
# exceptions on expected dtype
{("convert_integer", False): np.dtype("int32")},
),
(
[1, 2, 3],
np.dtype("int64"),
"Int64",
{("convert_integer", False): np.dtype("int64")},
),
(
["x", "y", "z"],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
(
[True, False, np.nan],
np.dtype("O"),
pd.BooleanDtype(),
{("convert_boolean", False): np.dtype("O")},
),
(
["h", "i", np.nan],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
( # GH32117
["h", "i", 1],
np.dtype("O"),
np.dtype("O"),
{},
),
(
[10, np.nan, 20],
np.dtype("float"),
"Int64",
{
("convert_integer", False, "convert_floating", True): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype(
"float"
),
},
),
(
[np.nan, 100.5, 200],
np.dtype("float"),
"Float64",
{("convert_floating", False): np.dtype("float")},
),
(
[3, 4, 5],
"Int8",
"Int8",
{},
),
(
[[1, 2], [3, 4], [5]],
None,
np.dtype("O"),
{},
),
(
[4, 5, 6],
np.dtype("uint32"),
"UInt32",
{("convert_integer", False): np.dtype("uint32")},
),
(
[-10, 12, 13],
np.dtype("i1"),
"Int8",
{("convert_integer", False): np.dtype("i1")},
),
(
[1.2, 1.3],
np.dtype("float32"),
"Float32",
{("convert_floating", False): np.dtype("float32")},
),
(
[1, 2.0],
object,
"Int64",
{
("convert_integer", False): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype(
"float"
),
("infer_objects", False): np.dtype("object"),
},
),
(
[1, 2.5],
object,
"Float64",
{
("convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("s"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ms"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("us"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
"datetime64[ns]",
np.dtype("datetime64[ns]"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
object,
np.dtype("datetime64[ns]"),
{("infer_objects", False): np.dtype("object")},
),
(
pd.period_range("1/1/2011", freq="M", periods=3),
None,
pd.PeriodDtype("M"),
{},
),
(
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
None,
pd.IntervalDtype("int64", "right"),
{},
),
]
)
def test_cases(request):
return request.param
class TestSeriesConvertDtypes:
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
def test_convert_dtypes(
self,
test_cases,
params,
using_infer_string,
):
data, maindtype, expected_default, expected_other = test_cases
if (
hasattr(data, "dtype")
and lib.is_np_dtype(data.dtype, "M")
and isinstance(maindtype, pd.DatetimeTZDtype)
):
# this astype is deprecated in favor of tz_localize
msg = "Cannot use .astype to convert from timezone-naive dtype"
with pytest.raises(TypeError, match=msg):
pd.Series(data, dtype=maindtype)
return
if maindtype is not None:
series = pd.Series(data, dtype=maindtype)
else:
series = pd.Series(data)
result = series.convert_dtypes(*params)
param_names = [
"infer_objects",
"convert_string",
"convert_integer",
"convert_boolean",
"convert_floating",
]
params_dict = dict(zip(param_names, params))
expected_dtype = expected_default
for spec, dtype in expected_other.items():
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
expected_dtype = dtype
if (
using_infer_string
and expected_default == "string"
and expected_dtype == object
and params[0]
and not params[1]
):
# If convert_string=False and infer_objects=True, we end up with the
# default string dtype instead of preserving object for string data
expected_dtype = pd.StringDtype(na_value=np.nan)
expected = pd.Series(data, dtype=expected_dtype)
tm.assert_series_equal(result, expected)
# Test that it is a copy
copy = series.copy(deep=True)
if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan
# Make sure original not changed
tm.assert_series_equal(series, copy)
def test_convert_string_dtype(self, nullable_string_dtype):
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
# that are already string dtype
df = pd.DataFrame(
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
)
result = df.convert_dtypes()
tm.assert_frame_equal(df, result)
def test_convert_bool_dtype(self):
# GH32287
df = pd.DataFrame({"A": pd.array([True])})
tm.assert_frame_equal(df, df.convert_dtypes())
def test_convert_byte_string_dtype(self):
# GH-43183
byte_str = b"binary-string"
df = pd.DataFrame(data={"A": byte_str}, index=[0])
result = df.convert_dtypes()
expected = df
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"infer_objects, dtype", [(True, "Int64"), (False, "object")]
)
def test_convert_dtype_object_with_na(self, infer_objects, dtype):
# GH#48791
ser = pd.Series([1, pd.NA])
result = ser.convert_dtypes(infer_objects=infer_objects)
expected = pd.Series([1, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"infer_objects, dtype", [(True, "Float64"), (False, "object")]
)
def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
# GH#48791
ser = pd.Series([1.5, pd.NA])
result = ser.convert_dtypes(infer_objects=infer_objects)
expected = pd.Series([1.5, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
def test_convert_dtypes_pyarrow_to_np_nullable(self):
# GH 53648
pytest.importorskip("pyarrow")
ser = pd.Series(range(2), dtype="int32[pyarrow]")
result = ser.convert_dtypes(dtype_backend="numpy_nullable")
expected = pd.Series(range(2), dtype="Int32")
tm.assert_series_equal(result, expected)
def test_convert_dtypes_pyarrow_null(self):
# GH#55346
pa = pytest.importorskip("pyarrow")
ser = pd.Series([None, None])
result = ser.convert_dtypes(dtype_backend="pyarrow")
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,91 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
)
import pandas._testing as tm
class TestCopy:
@pytest.mark.parametrize("deep", ["default", None, False, True])
def test_copy(self, deep, using_copy_on_write, warn_copy_on_write):
ser = Series(np.arange(10), dtype="float64")
# default deep is True
if deep == "default":
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
if using_copy_on_write:
# INFO(CoW) a shallow copy doesn't yet copy the data
# but parent will not be modified (CoW)
if deep is None or deep is False:
assert np.may_share_memory(ser.values, ser2.values)
else:
assert not np.may_share_memory(ser.values, ser2.values)
with tm.assert_cow_warning(warn_copy_on_write and deep is False):
ser2[::2] = np.nan
if deep is not False or using_copy_on_write:
# Did not modify original Series
assert np.isnan(ser2[0])
assert not np.isnan(ser[0])
else:
# we DID modify the original Series
assert np.isnan(ser2[0])
assert np.isnan(ser[0])
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
@pytest.mark.parametrize("deep", ["default", None, False, True])
def test_copy_tzaware(self, deep, using_copy_on_write):
# GH#11794
# copy of tz-aware
expected = Series([Timestamp("2012/01/01", tz="UTC")])
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
ser = Series([Timestamp("2012/01/01", tz="UTC")])
if deep == "default":
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
if using_copy_on_write:
# INFO(CoW) a shallow copy doesn't yet copy the data
# but parent will not be modified (CoW)
if deep is None or deep is False:
assert np.may_share_memory(ser.values, ser2.values)
else:
assert not np.may_share_memory(ser.values, ser2.values)
ser2[0] = Timestamp("1999/01/01", tz="UTC")
# default deep is True
if deep is not False or using_copy_on_write:
# Did not modify original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected)
else:
# we DID modify the original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected2)
def test_copy_name(self, datetime_series):
result = datetime_series.copy()
assert result.name == datetime_series.name
def test_copy_index_name_checking(self, datetime_series):
# don't want to be able to modify the index stored elsewhere after
# making a copy
datetime_series.index.name = None
assert datetime_series.index.name is None
assert datetime_series is datetime_series
cp = datetime_series.copy()
cp.index.name = "foo"
assert datetime_series.index.name is None

View File

@ -0,0 +1,34 @@
import numpy as np
import pandas as pd
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestSeriesCount:
def test_count(self, datetime_series):
assert datetime_series.count() == len(datetime_series)
datetime_series[::2] = np.nan
assert datetime_series.count() == np.isfinite(datetime_series).sum()
def test_count_inf_as_na(self):
# GH#29478
ser = Series([pd.Timestamp("1990/1/1")])
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("use_inf_as_na", True):
assert ser.count() == 1
def test_count_categorical(self):
ser = Series(
Categorical(
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
)
)
result = ser.count()
assert result == 2

View File

@ -0,0 +1,185 @@
import math
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
isna,
)
import pandas._testing as tm
class TestSeriesCov:
def test_cov(self, datetime_series):
# full overlap
tm.assert_almost_equal(
datetime_series.cov(datetime_series), datetime_series.std() ** 2
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].cov(datetime_series[5:]),
datetime_series[5:15].std() ** 2,
)
# No overlap
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.cov(cp))
# min_periods
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.cov(ts2, min_periods=12))
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
def test_cov_ddof(self, test_ddof, dtype):
# GH#34611
np_array1 = np.random.default_rng(2).random(10)
np_array2 = np.random.default_rng(2).random(10)
s1 = Series(np_array1, dtype=dtype)
s2 = Series(np_array2, dtype=dtype)
result = s1.cov(s2, ddof=test_ddof)
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
assert math.isclose(expected, result)
class TestSeriesCorr:
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
def test_corr(self, datetime_series, dtype):
stats = pytest.importorskip("scipy.stats")
datetime_series = datetime_series.astype(dtype)
# full overlap
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
# partial overlap
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.corr(ts2, min_periods=12))
# No overlap
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.corr(cp))
A = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
B = A.copy()
result = A.corr(B)
expected, _ = stats.pearsonr(A, B)
tm.assert_almost_equal(result, expected)
def test_corr_rank(self):
stats = pytest.importorskip("scipy.stats")
# kendall and spearman
A = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
B = A.copy()
A[-5:] = A[:5].copy()
result = A.corr(B, method="kendall")
expected = stats.kendalltau(A, B)[0]
tm.assert_almost_equal(result, expected)
result = A.corr(B, method="spearman")
expected = stats.spearmanr(A, B)[0]
tm.assert_almost_equal(result, expected)
# results from R
A = Series(
[
-0.89926396,
0.94209606,
-1.03289164,
-0.95445587,
0.76910310,
-0.06430576,
-2.09704447,
0.40660407,
-0.89926396,
0.94209606,
]
)
B = Series(
[
-1.01270225,
-0.62210117,
-1.56895827,
0.59592943,
-0.01680292,
1.17258718,
-1.06009347,
-0.10222060,
-0.89076239,
0.89372375,
]
)
kexp = 0.4319297
sexp = 0.5853767
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
def test_corr_invalid_method(self):
# GH PR #22298
s1 = Series(np.random.default_rng(2).standard_normal(10))
s2 = Series(np.random.default_rng(2).standard_normal(10))
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
with pytest.raises(ValueError, match=msg):
s1.corr(s2, method="____")
def test_corr_callable_method(self, datetime_series):
# simple correlation example
# returns 1 if exact equality, 0 otherwise
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
# simple example
s1 = Series([1, 2, 3, 4, 5])
s2 = Series([5, 4, 3, 2, 1])
expected = 0
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
# full overlap
tm.assert_almost_equal(
datetime_series.corr(datetime_series, method=my_corr), 1.0
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
)
# No overlap
assert np.isnan(
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
)
# dataframe example
df = pd.DataFrame([s1, s2])
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)

View File

@ -0,0 +1,203 @@
import numpy as np
import pytest
from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import (
is_complex_dtype,
is_extension_array_dtype,
)
from pandas import (
NA,
Period,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSeriesDescribe:
def test_describe_ints(self):
ser = Series([0, 1, 2, 3, 4], name="int_data")
result = ser.describe()
expected = Series(
[5, 2, ser.std(), 0, 1, 2, 3, 4],
name="int_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_bools(self):
ser = Series([True, True, False, False, False], name="bool_data")
result = ser.describe()
expected = Series(
[5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_strs(self):
ser = Series(["a", "a", "b", "c", "d"], name="str_data")
result = ser.describe()
expected = Series(
[5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_timedelta64(self):
ser = Series(
[
Timedelta("1 days"),
Timedelta("2 days"),
Timedelta("3 days"),
Timedelta("4 days"),
Timedelta("5 days"),
],
name="timedelta_data",
)
result = ser.describe()
expected = Series(
[5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]],
name="timedelta_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_period(self):
ser = Series(
[Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")],
name="period_data",
)
result = ser.describe()
expected = Series(
[3, 2, ser[0], 2],
name="period_data",
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
def test_describe_empty_object(self):
# https://github.com/pandas-dev/pandas/issues/27183
s = Series([None, None], dtype=object)
result = s.describe()
expected = Series(
[0, 0, np.nan, np.nan],
dtype=object,
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
result = s[:0].describe()
tm.assert_series_equal(result, expected)
# ensure NaN, not None
assert np.isnan(result.iloc[2])
assert np.isnan(result.iloc[3])
def test_describe_with_tz(self, tz_naive_fixture):
# GH 21332
tz = tz_naive_fixture
name = str(tz_naive_fixture)
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe()
expected = Series(
[
5,
Timestamp(2018, 1, 3).tz_localize(tz),
start.tz_localize(tz),
s[1],
s[2],
s[3],
end.tz_localize(tz),
],
name=name,
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_with_tz_numeric(self):
name = tz = "CET"
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe()
expected = Series(
[
5,
Timestamp("2018-01-03 00:00:00", tz=tz),
Timestamp("2018-01-01 00:00:00", tz=tz),
Timestamp("2018-01-02 00:00:00", tz=tz),
Timestamp("2018-01-03 00:00:00", tz=tz),
Timestamp("2018-01-04 00:00:00", tz=tz),
Timestamp("2018-01-05 00:00:00", tz=tz),
],
name=name,
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_datetime_is_numeric_includes_datetime(self):
s = Series(date_range("2012", periods=3))
result = s.describe()
expected = Series(
[
3,
Timestamp("2012-01-02"),
Timestamp("2012-01-01"),
Timestamp("2012-01-01T12:00:00"),
Timestamp("2012-01-02"),
Timestamp("2012-01-02T12:00:00"),
Timestamp("2012-01-03"),
],
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:Casting complex values to real discards")
def test_numeric_result_dtype(self, any_numeric_dtype):
# GH#48340 - describe should always return float on non-complex numeric input
if is_extension_array_dtype(any_numeric_dtype):
dtype = "Float64"
else:
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
ser = Series([0, 1], dtype=any_numeric_dtype)
if dtype == "complex128" and np_version_gte1p25:
with pytest.raises(
TypeError, match=r"^a must be an array of real numbers$"
):
ser.describe()
return
result = ser.describe()
expected = Series(
[
2.0,
0.5,
ser.std(),
0,
0.25,
0.5,
0.75,
1.0,
],
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
dtype=dtype,
)
tm.assert_series_equal(result, expected)
def test_describe_one_element_ea(self):
# GH#52515
ser = Series([0.0], dtype="Float64")
with tm.assert_produces_warning(None):
result = ser.describe()
expected = Series(
[1, 0, NA, 0, 0, 0, 0, 0],
dtype="Float64",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,88 @@
import numpy as np
import pytest
from pandas import (
Series,
TimedeltaIndex,
date_range,
)
import pandas._testing as tm
class TestSeriesDiff:
def test_diff_np(self):
# TODO(__array_function__): could make np.diff return a Series
# matching ser.diff()
ser = Series(np.arange(5))
res = np.diff(ser)
expected = np.array([1, 1, 1, 1])
tm.assert_numpy_array_equal(res, expected)
def test_diff_int(self):
# int dtype
a = 10000000000000000
b = a + 1
ser = Series([a, b])
result = ser.diff()
assert result[1] == 1
def test_diff_tz(self):
# Combined datetime diff, normal diff and boolean diff test
ts = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
ts.diff()
# neg n
result = ts.diff(-1)
expected = ts - ts.shift(-1)
tm.assert_series_equal(result, expected)
# 0
result = ts.diff(0)
expected = ts - ts
tm.assert_series_equal(result, expected)
def test_diff_dt64(self):
# datetime diff (GH#3100)
ser = Series(date_range("20130102", periods=5))
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)
# timedelta diff
result = result - result.shift(1) # previous result
expected = expected.diff() # previously expected
tm.assert_series_equal(result, expected)
def test_diff_dt64tz(self):
# with tz
ser = Series(
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
)
result = ser.diff()
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input,output,diff",
[([False, True, True, False, False], [np.nan, True, False, True, False], 1)],
)
def test_diff_bool(self, input, output, diff):
# boolean series (test for fixing #17294)
ser = Series(input)
result = ser.diff()
expected = Series(output)
tm.assert_series_equal(result, expected)
def test_diff_object_dtype(self):
# object series
ser = Series([False, True, 5.0, np.nan, True, False])
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,99 @@
import pytest
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.api.types import is_bool_dtype
@pytest.mark.parametrize(
"data, index, drop_labels, axis, expected_data, expected_index",
[
# Unique Index
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
# GH 5248 Non-Unique Index
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
],
)
def test_drop_unique_and_non_unique_index(
data, index, axis, drop_labels, expected_data, expected_index
):
ser = Series(data=data, index=index)
result = ser.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, index, drop_labels, axis, error_type, error_desc",
[
# single string/tuple-like
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
# bad axis
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
],
)
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
ser = Series(data, index=index)
with pytest.raises(error_type, match=error_desc):
ser.drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
ser = Series(range(3), index=list("abc"))
result = ser.drop("bc", errors="ignore")
tm.assert_series_equal(result, ser)
result = ser.drop(["a", "d"], errors="ignore")
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
ser = Series([2, 3], index=[True, False])
assert is_bool_dtype(ser.index)
assert ser.index.dtype == bool
result = ser.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = Series(index=index, dtype=object).drop(drop_labels)
expected = Series(index=expected_index, dtype=object)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, index, drop_labels",
[
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True]),
],
)
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
dtype = object if data is None else None
ser = Series(data=data, index=index, dtype=dtype)
with pytest.raises(KeyError, match="not found in axis"):
ser.drop(drop_labels)
def test_drop_index_ea_dtype(any_numeric_ea_dtype):
# GH#45860
df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype))
idx = Index([df.index[1]])
result = df.drop(idx)
expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,267 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, False, False, True, True, False])),
("last", Series([False, True, True, False, False, False, False])),
(False, Series([False, True, True, False, True, True, False])),
],
)
def test_drop_duplicates(any_numpy_dtype, keep, expected):
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
if tc.dtype == "bool":
pytest.skip("tested separately in test_drop_duplicates_bool")
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, True])),
("last", Series([True, True, False, False])),
(False, Series([True, True, True, True])),
],
)
def test_drop_duplicates_bool(keep, expected):
tc = Series([True, False, True, False])
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
tm.assert_series_equal(sc, tc[~expected])
assert return_value is None
@pytest.mark.parametrize("values", [[], list(range(5))])
def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
tc = Series(values, dtype=np.dtype(any_numpy_dtype))
expected = Series([False] * len(tc), dtype="bool")
if tc.dtype == "bool":
# 0 -> False and 1-> True
# any other value would be duplicated
tc = tc[:2]
expected = expected[:2]
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
result_dropped = tc.drop_duplicates(keep=keep)
tm.assert_series_equal(result_dropped, tc)
# validate shallow copy
assert result_dropped is not tc
class TestSeriesDropDuplicates:
@pytest.fixture(
params=["int_", "uint", "float64", "str_", "timedelta64[h]", "datetime64[D]"]
)
def dtype(self, request):
return request.param
@pytest.fixture
def cat_series_unused_category(self, dtype, ordered):
# Test case 1
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
cat = Categorical(input1, categories=cat_array, ordered=ordered)
tc1 = Series(cat)
return tc1
def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category):
tc1 = cat_series_unused_category
expected = Series([False, False, False, True])
result = tc1.duplicated()
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates()
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keeplast(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, False])
result = tc1.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keepfalse(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, True])
result = tc1.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
@pytest.fixture
def cat_series(self, dtype, ordered):
# no unused categories, unlike cat_series_unused_category
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
cat = Categorical(input2, categories=cat_array, ordered=ordered)
tc2 = Series(cat)
return tc2
def test_drop_duplicates_categorical_non_bool2(self, cat_series):
tc2 = cat_series
expected = Series([False, False, False, False, True, True, False])
result = tc2.duplicated()
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates()
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, False, False, False])
result = tc2.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, True, True, False])
result = tc2.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_bool(self, ordered):
tc = Series(
Categorical(
[True, False, True, False], categories=[True, False], ordered=ordered
)
)
expected = Series([False, False, True, True])
tm.assert_series_equal(tc.duplicated(), expected)
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, False, False])
tm.assert_series_equal(tc.duplicated(keep="last"), expected)
tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, True, True])
tm.assert_series_equal(tc.duplicated(keep=False), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.drop_duplicates()
expected = Series(
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
index=[0, 1, 4],
)
tm.assert_series_equal(result, expected)
def test_drop_duplicates_ignore_index(self):
# GH#48304
ser = Series([1, 2, 2, 3])
result = ser.drop_duplicates(ignore_index=True)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)
def test_duplicated_arrow_dtype(self):
pytest.importorskip("pyarrow")
ser = Series([True, False, None, False], dtype="bool[pyarrow]")
result = ser.drop_duplicates()
expected = Series([True, False, None], dtype="bool[pyarrow]")
tm.assert_series_equal(result, expected)
def test_drop_duplicates_arrow_strings(self):
# GH#54904
pa = pytest.importorskip("pyarrow")
ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string()))
result = ser.drop_duplicates()
expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string()))
tm.assert_series_equal(result, expecetd)

View File

@ -0,0 +1,117 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
NaT,
Period,
Series,
Timestamp,
)
import pandas._testing as tm
class TestDropna:
def test_dropna_empty(self):
ser = Series([], dtype=object)
assert len(ser.dropna()) == 0
return_value = ser.dropna(inplace=True)
assert return_value is None
assert len(ser) == 0
# invalid axis
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
ser.dropna(axis=1)
def test_dropna_preserve_name(self, datetime_series):
datetime_series[:5] = np.nan
result = datetime_series.dropna()
assert result.name == datetime_series.name
name = datetime_series.name
ts = datetime_series.copy()
return_value = ts.dropna(inplace=True)
assert return_value is None
assert ts.name == name
def test_dropna_no_nan(self):
for ser in [
Series([1, 2, 3], name="x"),
Series([False, True, False], name="x"),
]:
result = ser.dropna()
tm.assert_series_equal(result, ser)
assert result is not ser
s2 = ser.copy()
return_value = s2.dropna(inplace=True)
assert return_value is None
tm.assert_series_equal(s2, ser)
def test_dropna_intervals(self):
ser = Series(
[np.nan, 1, 2, 3],
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
)
result = ser.dropna()
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
def test_dropna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
result = ser.dropna()
expected = Series([Period("2011-01", freq="M")])
tm.assert_series_equal(result, expected)
def test_datetime64_tz_dropna(self, unit):
# DatetimeLikeBlock
ser = Series(
[
Timestamp("2011-01-01 10:00"),
NaT,
Timestamp("2011-01-03 10:00"),
NaT,
],
dtype=f"M8[{unit}]",
)
result = ser.dropna()
expected = Series(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")],
index=[0, 2],
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)
# DatetimeTZBlock
idx = DatetimeIndex(
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo"
).as_unit(unit)
ser = Series(idx)
assert ser.dtype == f"datetime64[{unit}, Asia/Tokyo]"
result = ser.dropna()
expected = Series(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
],
index=[0, 2],
dtype=f"datetime64[{unit}, Asia/Tokyo]",
)
assert result.dtype == f"datetime64[{unit}, Asia/Tokyo]"
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("val", [1, 1.5])
def test_dropna_ignore_index(self, val):
# GH#31725
ser = Series([1, 2, val], index=[3, 2, 1])
result = ser.dropna(ignore_index=True)
expected = Series([1, 2, val])
tm.assert_series_equal(result, expected)
ser.dropna(ignore_index=True, inplace=True)
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,7 @@
import numpy as np
class TestSeriesDtypes:
def test_dtype(self, datetime_series):
assert datetime_series.dtype == np.dtype("float64")
assert datetime_series.dtypes == np.dtype("float64")

View File

@ -0,0 +1,77 @@
import numpy as np
import pytest
from pandas import (
NA,
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True], name="name")),
("last", Series([True, True, False, False, False], name="name")),
(False, Series([True, True, True, False, True], name="name")),
],
)
def test_duplicated_keep(keep, expected):
ser = Series(["a", "b", "b", "c", "a"], name="name")
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True])),
("last", Series([True, True, False, False, False])),
(False, Series([True, True, True, False, True])),
],
)
def test_duplicated_nan_none(keep, expected):
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
def test_duplicated_categorical_bool_na(nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.duplicated()
expected = Series([False, False, True, True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, vals",
[
("last", [True, True, False]),
("first", [False, True, True]),
(False, [True, True, True]),
],
)
def test_duplicated_mask(keep, vals):
# GH#48150
ser = Series([1, 2, NA, NA, NA], dtype="Int64")
result = ser.duplicated(keep=keep)
expected = Series([False, False] + vals)
tm.assert_series_equal(result, expected)
def test_duplicated_mask_no_duplicated_na(keep):
# GH#48150
ser = Series([1, 2, NA], dtype="Int64")
result = ser.duplicated(keep=keep)
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,145 @@
from contextlib import nullcontext
import copy
import numpy as np
import pytest
from pandas._libs.missing import is_matching_na
from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import is_float
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"arr, idx",
[
([1, 2, 3, 4], [0, 2, 1, 3]),
([1, np.nan, 3, np.nan], [0, 2, 1, 3]),
(
[1, np.nan, 3, np.nan],
MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]),
),
],
)
def test_equals(arr, idx):
s1 = Series(arr, index=idx)
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = 9
assert not s1.equals(s2)
@pytest.mark.parametrize(
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
)
def test_equals_list_array(val):
# GH20676 Verify equals operator for list of Numpy arrays
arr = np.array([1, 2])
s1 = Series([arr, arr])
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = val
cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
if isinstance(val, str) and not np_version_gte1p25
else nullcontext()
)
with cm:
assert not s1.equals(s2)
def test_equals_false_negative():
# GH8437 Verify false negative behavior of equals function for dtype object
arr = [False, np.nan]
s1 = Series(arr)
s2 = s1.copy()
s3 = Series(index=range(2), dtype=object)
s4 = s3.copy()
s5 = s3.copy()
s6 = s3.copy()
s3[:-1] = s4[:-1] = s5[0] = s6[0] = False
assert s1.equals(s1)
assert s1.equals(s2)
assert s1.equals(s3)
assert s1.equals(s4)
assert s1.equals(s5)
assert s5.equals(s6)
def test_equals_matching_nas():
# matching but not identical NAs
left = Series([np.datetime64("NaT")], dtype=object)
right = Series([np.datetime64("NaT")], dtype=object)
assert left.equals(right)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.timedelta64("NaT")], dtype=object)
right = Series([np.timedelta64("NaT")], dtype=object)
assert left.equals(right)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.float64("NaN")], dtype=object)
right = Series([np.float64("NaN")], dtype=object)
assert left.equals(right)
assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype))
assert left.array.equals(right.array)
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
# GH#39650
left = nulls_fixture
right = nulls_fixture2
if hasattr(right, "copy"):
right = right.copy()
else:
right = copy.copy(right)
ser = Series([left], dtype=object)
ser2 = Series([right], dtype=object)
if is_matching_na(left, right):
assert ser.equals(ser2)
elif (left is None and is_float(right)) or (right is None and is_float(left)):
assert ser.equals(ser2)
else:
assert not ser.equals(ser2)
def test_equals_none_vs_nan():
# GH#39650
ser = Series([1, None], dtype=object)
ser2 = Series([1, np.nan], dtype=object)
assert ser.equals(ser2)
assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype))
assert ser.array.equals(ser2.array)
def test_equals_None_vs_float():
# GH#44190
left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object)
right = Series([None] * len(left))
# these series were found to be equal due to a bug, check that they are correctly
# found to not equal
assert not left.equals(right)
assert not right.equals(left)
assert not left.to_frame().equals(right.to_frame())
assert not right.to_frame().equals(left.to_frame())
assert not Index(left, dtype="object").equals(Index(right, dtype="object"))
assert not Index(right, dtype="object").equals(Index(left, dtype="object"))

View File

@ -0,0 +1,183 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_basic():
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_mixed_type():
s = pd.Series(
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
)
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
index=[0, 0, 0, 1, 2, 3, 4, 4],
dtype=object,
name="foo",
)
tm.assert_series_equal(result, expected)
def test_empty():
s = pd.Series(dtype=object)
result = s.explode()
expected = s.copy()
tm.assert_series_equal(result, expected)
def test_nested_lists():
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
result = s.explode()
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
tm.assert_series_equal(result, expected)
def test_multi_index():
s = pd.Series(
[[0, 1, 2], np.nan, [], (3, 4)],
name="foo",
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
)
result = s.explode()
index = pd.MultiIndex.from_tuples(
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
names=["foo", "bar"],
)
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_large():
s = pd.Series([range(256)]).explode()
result = s.explode()
tm.assert_series_equal(result, s)
def test_invert_array():
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
listify = df.apply(lambda x: x.array, axis=1)
result = listify.explode()
tm.assert_series_equal(result, df["a"].rename())
@pytest.mark.parametrize(
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
)
def test_non_object_dtype(s):
result = s.explode()
tm.assert_series_equal(result, s)
def test_typical_usecase():
df = pd.DataFrame(
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
columns=["var1", "var2"],
)
exploded = df.var1.str.split(",").explode()
result = df[["var2"]].join(exploded)
expected = pd.DataFrame(
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
columns=["var2", "var1"],
index=[0, 0, 0, 1, 1, 1],
)
tm.assert_frame_equal(result, expected)
def test_nested_EA():
# a nested EA array
s = pd.Series(
[
pd.date_range("20170101", periods=3, tz="UTC"),
pd.date_range("20170104", periods=3, tz="UTC"),
]
)
result = s.explode()
expected = pd.Series(
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
)
tm.assert_series_equal(result, expected)
def test_duplicate_index():
# GH 28005
s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
result = s.explode()
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
tm.assert_series_equal(result, expected)
def test_ignore_index():
# GH 34932
s = pd.Series([[1, 2], [3, 4]])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
def test_explode_sets():
# https://github.com/pandas-dev/pandas/issues/35614
s = pd.Series([{"a", "b", "c"}], index=[1])
result = s.explode().sort_values()
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
tm.assert_series_equal(result, expected)
def test_explode_scalars_can_ignore_index():
# https://github.com/pandas-dev/pandas/issues/40487
s = pd.Series([1, 2, 3], index=["a", "b", "c"])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ignore_index", [True, False])
def test_explode_pyarrow_list_type(ignore_index):
# GH 53602
pa = pytest.importorskip("pyarrow")
data = [
[None, None],
[1],
[],
[2, 3],
None,
]
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
result = ser.explode(ignore_index=ignore_index)
expected = pd.Series(
data=[None, None, 1, None, 2, 3, None],
index=None if ignore_index else [0, 0, 1, 2, 3, 3, 4],
dtype=pd.ArrowDtype(pa.int64()),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ignore_index", [True, False])
def test_explode_pyarrow_non_list_type(ignore_index):
pa = pytest.importorskip("pyarrow")
data = [1, 2, 3]
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.int64()))
result = ser.explode(ignore_index=ignore_index)
expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2])
tm.assert_series_equal(result, expected)
def test_str_dtype():
# https://github.com/pandas-dev/pandas/pull/61623
ser = pd.Series(["x", "y"], dtype="str")
result = ser.explode()
assert result is not ser
tm.assert_series_equal(result, ser)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
from pandas import (
Index,
Series,
date_range,
)
import pandas._testing as tm
class TestGetNumericData:
def test_get_numeric_data_preserve_dtype(
self, using_copy_on_write, warn_copy_on_write
):
# get the numeric data
obj = Series([1, 2, 3])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
# returned object is a shallow copy
with tm.assert_cow_warning(warn_copy_on_write):
result.iloc[0] = 0
if using_copy_on_write:
assert obj.iloc[0] == 1
else:
assert obj.iloc[0] == 0
obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
tm.assert_series_equal(result, expected)
obj = Series([True, False, True])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,8 @@
import pandas._testing as tm
def test_head_tail(string_series):
tm.assert_series_equal(string_series.head(), string_series[:5])
tm.assert_series_equal(string_series.head(0), string_series[0:0])
tm.assert_series_equal(string_series.tail(), string_series[-5:])
tm.assert_series_equal(string_series.tail(0), string_series[0:0])

View File

@ -0,0 +1,56 @@
import numpy as np
from pandas import (
Series,
interval_range,
)
import pandas._testing as tm
class TestInferObjects:
def test_copy(self, index_or_series):
# GH#50096
# case where we don't need to do inference because it is already non-object
obj = index_or_series(np.array([1, 2, 3], dtype="int64"))
result = obj.infer_objects(copy=False)
assert tm.shares_memory(result, obj)
# case where we try to do inference but can't do better than object
obj2 = index_or_series(np.array(["foo", 2], dtype=object))
result2 = obj2.infer_objects(copy=False)
assert tm.shares_memory(result2, obj2)
def test_infer_objects_series(self, index_or_series):
# GH#11221
actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = index_or_series([1, 2, 3])
tm.assert_equal(actual, expected)
actual = index_or_series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = index_or_series([1.0, 2.0, 3.0, np.nan])
tm.assert_equal(actual, expected)
# only soft conversions, unconvertible pass thru unchanged
obj = index_or_series(np.array([1, 2, 3, None, "a"], dtype="O"))
actual = obj.infer_objects()
expected = index_or_series([1, 2, 3, None, "a"], dtype=object)
assert actual.dtype == "object"
tm.assert_equal(actual, expected)
def test_infer_objects_interval(self, index_or_series):
# GH#50090
ii = interval_range(1, 10)
obj = index_or_series(ii)
result = obj.astype(object).infer_objects()
tm.assert_equal(result, obj)
def test_infer_objects_bytes(self):
# GH#49650
ser = Series([b"a"], dtype="bytes")
expected = ser.copy()
result = ser.infer_objects()
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,192 @@
from io import StringIO
from string import ascii_uppercase
import textwrap
import numpy as np
import pytest
from pandas._config import using_string_dtype
from pandas.compat import (
HAS_PYARROW,
PYPY,
)
from pandas import (
CategoricalIndex,
Index,
MultiIndex,
Series,
date_range,
)
def test_info_categorical_column_just_works():
n = 2500
data = np.array(list("abcdefghij")).take(
np.random.default_rng(2).integers(0, 10, size=n, dtype=int)
)
s = Series(data).astype("category")
s.isna()
buf = StringIO()
s.info(buf=buf)
s2 = s[s == "d"]
buf = StringIO()
s2.info(buf=buf)
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
s = Series(np.zeros(2), index=idx)
buf = StringIO()
s.info(buf=buf)
@pytest.mark.parametrize("verbose", [True, False])
def test_info_series(
lexsorted_two_level_string_multiindex, verbose, using_infer_string
):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
buf = StringIO()
ser.info(verbose=verbose, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.series.Series'>
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
"""
)
if verbose:
expected += textwrap.dedent(
"""\
Series name: sth
Non-Null Count Dtype
-------------- -----
10 non-null int64
"""
)
qualifier = "" if using_infer_string and HAS_PYARROW else "+"
expected += textwrap.dedent(
f"""\
dtypes: int64(1)
memory usage: {ser.memory_usage()}.0{qualifier} bytes
"""
)
assert result == expected
def test_info_memory():
s = Series([1, 2], dtype="i8")
buf = StringIO()
s.info(buf=buf)
result = buf.getvalue()
memory_bytes = float(s.memory_usage())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.series.Series'>
RangeIndex: 2 entries, 0 to 1
Series name: None
Non-Null Count Dtype
-------------- -----
2 non-null int64
dtypes: int64(1)
memory usage: {memory_bytes} bytes
"""
)
assert result == expected
def test_info_wide():
s = Series(np.random.default_rng(2).standard_normal(101))
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
with pytest.raises(ValueError, match=msg):
s.info(max_cols=1)
def test_info_shows_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
n = 10
for dtype in dtypes:
s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype))
buf = StringIO()
s.info(buf=buf)
res = buf.getvalue()
name = f"{n:d} non-null {dtype}"
assert name in res
@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) > s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) == s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
@pytest.mark.parametrize(
"index, plus",
[
([1, 2, 3], False),
(Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)),
(Index(list("ABC"), dtype=object), True),
(MultiIndex.from_product([range(3), range(3)]), False),
(
MultiIndex.from_product([range(3), ["foo", "bar"]]),
not (using_string_dtype() and HAS_PYARROW),
),
],
)
def test_info_memory_usage_qualified(index, plus):
series = Series(1, index=index)
buf = StringIO()
series.info(buf=buf)
if plus:
assert "+" in buf.getvalue()
else:
assert "+" not in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
N = 100
M = len(ascii_uppercase)
index = MultiIndex.from_product(
[list(ascii_uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
unstacked = s.unstack("id")
assert s.values.nbytes == unstacked.values.nbytes
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
# high upper bound
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
assert diff < 2000

View File

@ -0,0 +1,868 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
date_range,
isna,
)
import pandas._testing as tm
@pytest.fixture(
params=[
"linear",
"index",
"values",
"nearest",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def nontemporal_method(request):
"""Fixture that returns an (method name, required kwargs) pair.
This fixture does not include method 'time' as a parameterization; that
method requires a Series with a DatetimeIndex, and is generally tested
separately from these non-temporal methods.
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
@pytest.fixture(
params=[
"linear",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def interp_methods_ind(request):
"""Fixture that returns a (method name, required kwargs) pair to
be tested for various Index types.
This fixture does not include methods - 'time', 'index', 'nearest',
'values' as a parameterization
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
class TestSeriesInterpolateData:
@pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
def test_interpolate_period_values(self):
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = pd.NaT
# period cast
ser_per = ser.dt.to_period("D")
res_per = ser_per.interpolate()
expected_per = orig.dt.to_period("D")
tm.assert_series_equal(res_per, expected_per)
def test_interpolate(self, datetime_series):
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
ts_copy = ts.copy()
ts_copy[5:10] = np.nan
linear_interp = ts_copy.interpolate(method="linear")
tm.assert_series_equal(linear_interp, ts)
ord_ts = Series(
[d.toordinal() for d in datetime_series.index], index=datetime_series.index
).astype(float)
ord_ts_copy = ord_ts.copy()
ord_ts_copy[5:10] = np.nan
time_interp = ord_ts_copy.interpolate(method="time")
tm.assert_series_equal(time_interp, ord_ts)
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.nan])
msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method="time")
def test_interpolate_cubicspline(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3]
tm.assert_series_equal(result, expected)
def test_interpolate_pchip(self):
pytest.importorskip("scipy")
ser = Series(np.sort(np.random.default_rng(2).uniform(size=100)))
# interpolate at new_index
new_index = ser.index.union(
Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
).astype(float)
interp_s = ser.reindex(new_index).interpolate(method="pchip")
# does not blow up, GH5977
interp_s.loc[49:51]
def test_interpolate_akima(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
# interpolate at new_index where `der` is zero
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima")
tm.assert_series_equal(interp_s.loc[1:3], expected)
# interpolate at new_index where `der` is a non-zero int
expected = Series(
[11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
tm.assert_series_equal(interp_s.loc[1:3], expected)
def test_interpolate_piecewise_polynomial(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
tm.assert_series_equal(interp_s.loc[1:3], expected)
def test_interpolate_from_derivatives(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
tm.assert_series_equal(interp_s.loc[1:3], expected)
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
),
],
)
def test_interpolate_corners(self, kwargs):
s = Series([np.nan, np.nan])
tm.assert_series_equal(s.interpolate(**kwargs), s)
s = Series([], dtype=object).interpolate()
tm.assert_series_equal(s.interpolate(**kwargs), s)
def test_interpolate_index_values(self):
s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30)))
s.loc[::3] = np.random.default_rng(2).standard_normal(10)
vals = s.index.values.astype(float)
result = s.interpolate(method="index")
expected = s.copy()
bad = isna(expected.values)
good = ~bad
expected = Series(
np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]
)
tm.assert_series_equal(result[bad], expected)
# 'values' is synonymous with 'index' for the method kwarg
other_result = s.interpolate(method="values")
tm.assert_series_equal(other_result, result)
tm.assert_series_equal(other_result[bad], expected)
def test_interpolate_non_ts(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
"time-weighted interpolation only works on Series or DataFrames "
"with a DatetimeIndex"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="time")
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
),
],
)
def test_nan_interpolate(self, kwargs):
s = Series([0, 1, np.nan, 3])
result = s.interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0])
tm.assert_series_equal(result, expected)
def test_nan_irregular_index(self):
s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
result = s.interpolate()
expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
tm.assert_series_equal(result, expected)
def test_nan_str_index(self):
s = Series([0, 1, 2, np.nan], index=list("abcd"))
result = s.interpolate()
expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
tm.assert_series_equal(result, expected)
def test_interp_quad(self):
pytest.importorskip("scipy")
sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
result = sq.interpolate(method="quadratic")
expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
tm.assert_series_equal(result, expected)
def test_interp_scipy_basic(self):
pytest.importorskip("scipy")
s = Series([1, 3, np.nan, 12, np.nan, 25])
# slinear
expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
result = s.interpolate(method="slinear")
tm.assert_series_equal(result, expected)
msg = "The 'downcast' keyword in Series.interpolate is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="slinear", downcast="infer")
tm.assert_series_equal(result, expected)
# nearest
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="nearest")
tm.assert_series_equal(result, expected.astype("float"))
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="nearest", downcast="infer")
tm.assert_series_equal(result, expected)
# zero
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="zero")
tm.assert_series_equal(result, expected.astype("float"))
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="zero", downcast="infer")
tm.assert_series_equal(result, expected)
# quadratic
# GH #15662.
expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
result = s.interpolate(method="quadratic")
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="quadratic", downcast="infer")
tm.assert_series_equal(result, expected)
# cubic
expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
result = s.interpolate(method="cubic")
tm.assert_series_equal(result, expected)
def test_interp_limit(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit):
# GH 9217: make sure limit is greater than zero.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
with pytest.raises(ValueError, match="Limit must be greater than 0"):
s.interpolate(limit=limit, method=method, **kwargs)
def test_interpolate_invalid_float_limit(self, nontemporal_method):
# GH 9217: make sure limit is an integer.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
s.interpolate(limit=limit, method=method, **kwargs)
@pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])
msg = f"method must be one of.* Got '{invalid_method}' instead"
if invalid_method is None:
msg = "'method' should be a string, not None"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)
# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)
def test_interp_invalid_method_and_value(self):
# GH#36624
ser = Series([1, 3, np.nan, 12, np.nan, 25])
msg = "'fill_value' is not a valid keyword for Series.interpolate"
msg2 = "Series.interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
ser.interpolate(fill_value=3, method="pad")
def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
# Provide 'forward' (the default) explicitly here.
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="forward")
tm.assert_series_equal(result, expected)
result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD")
tm.assert_series_equal(result, expected)
def test_interp_unlimited(self):
# these test are for issue #16282 default Limit=None is unlimited
s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
result = s.interpolate(method="linear", limit_direction="backward")
tm.assert_series_equal(result, expected)
def test_interp_limit_bad_direction(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
r"Invalid limit_direction: expecting one of \['forward', "
r"'backward', 'both'\], got 'abc'"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit=2, limit_direction="abc")
# raises an error even if no limit is specified.
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_direction="abc")
# limit_area introduced GH #16284
def test_interp_limit_area(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit_area="inside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]
)
result = s.interpolate(method="linear", limit_area="inside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="inside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
result = s.interpolate(method="linear", limit_area="outside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]
)
result = s.interpolate(method="linear", limit_area="outside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="backward"
)
tm.assert_series_equal(result, expected)
# raises an error even if limit type is wrong.
msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_area="abc")
@pytest.mark.parametrize(
"method, limit_direction, expected",
[
("pad", "backward", "forward"),
("ffill", "backward", "forward"),
("backfill", "forward", "backward"),
("bfill", "forward", "backward"),
("pad", "both", "forward"),
("ffill", "both", "forward"),
("backfill", "both", "backward"),
("bfill", "both", "backward"),
],
)
def test_interp_limit_direction_raises(self, method, limit_direction, expected):
# https://github.com/pandas-dev/pandas/pull/34746
s = Series([1, 2, 3])
msg = f"`limit_direction` must be '{expected}' for method `{method}`"
msg2 = "Series.interpolate with method="
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
s.interpolate(method=method, limit_direction=limit_direction)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
{"method": "pad", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
{"method": "pad", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=pad"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=bfill"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
# Check that this works on a longer series of nans.
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])
expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series(
[1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]
)
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_to_ends(self):
# These test are for issue #10420 -- flow back to beginning.
s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_before_ends(self):
# These test are for issue #11115 -- limit ends properly.
s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_all_good(self):
pytest.importorskip("scipy")
s = Series([1, 2, 3])
result = s.interpolate(method="polynomial", order=1)
tm.assert_series_equal(result, s)
# non-scipy
result = s.interpolate()
tm.assert_series_equal(result, s)
@pytest.mark.parametrize(
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))]
)
def test_interp_multiIndex(self, check_scipy):
idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
s = Series([1, 2, np.nan], index=idx)
expected = s.copy()
expected.loc[2] = 2
result = s.interpolate()
tm.assert_series_equal(result, expected)
msg = "Only `method=linear` interpolation is supported on MultiIndexes"
if check_scipy:
with pytest.raises(ValueError, match=msg):
s.interpolate(method="polynomial", order=1)
def test_interp_nonmono_raise(self):
pytest.importorskip("scipy")
s = Series([1, np.nan, 3], index=[0, 2, 1])
msg = "krogh interpolation requires that the index be monotonic"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="krogh")
@pytest.mark.parametrize("method", ["nearest", "pad"])
def test_interp_datetime64(self, method, tz_naive_fixture):
pytest.importorskip("scipy")
df = Series(
[1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
)
warn = None if method == "nearest" else FutureWarning
msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = df.interpolate(method=method)
if warn is not None:
# check the "use ffill instead" is equivalent
alt = df.ffill()
tm.assert_series_equal(result, alt)
expected = Series(
[1.0, 1.0, 3.0],
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
)
tm.assert_series_equal(result, expected)
def test_interp_pad_datetime64tz_values(self):
# GH#27628 missing.interpolate_2d should handle datetimetz values
dti = date_range("2015-04-05", periods=3, tz="US/Central")
ser = Series(dti)
ser[1] = pd.NaT
msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.interpolate(method="pad")
# check the "use ffill instead" is equivalent
alt = ser.ffill()
tm.assert_series_equal(result, alt)
expected = Series(dti)
expected[1] = expected[0]
tm.assert_series_equal(result, expected)
def test_interp_limit_no_nans(self):
# GH 7173
s = Series([1.0, 2.0, 3.0])
result = s.interpolate(limit=1)
expected = s
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["polynomial", "spline"])
def test_no_order(self, method):
# see GH-10633, GH-24014
pytest.importorskip("scipy")
s = Series([0, 1, np.nan, 3])
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)
@pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
pytest.importorskip("scipy")
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="spline", order=order)
def test_spline(self):
pytest.importorskip("scipy")
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
result = s.interpolate(method="spline", order=1)
expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result, expected)
def test_spline_extrapolate(self):
pytest.importorskip("scipy")
s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
result3 = s.interpolate(method="spline", order=1, ext=3)
expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
tm.assert_series_equal(result3, expected3)
result1 = s.interpolate(method="spline", order=1, ext=0)
expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result1, expected1)
def test_spline_smooth(self):
pytest.importorskip("scipy")
s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
assert (
s.interpolate(method="spline", order=3, s=0)[5]
!= s.interpolate(method="spline", order=3)[5]
)
def test_spline_interpolation(self):
# Explicit cast to float to avoid implicit cast when setting np.nan
pytest.importorskip("scipy")
s = Series(np.arange(10) ** 2, dtype="float")
s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan
result1 = s.interpolate(method="spline", order=1)
expected1 = s.interpolate(method="spline", order=1)
tm.assert_series_equal(result1, expected1)
def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
result = df.interpolate(method="time")
expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
tm.assert_series_equal(result, expected)
# test for non uniform spacing
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
result = df.interpolate(method="time")
expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4]))
tm.assert_series_equal(result, expected)
def test_series_interpolate_method_values(self):
# GH#1646
rng = date_range("1/1/2000", "1/20/2000", freq="D")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
ts[::2] = np.nan
result = ts.interpolate(method="values")
exp = ts.interpolate()
tm.assert_series_equal(result, exp)
def test_series_interpolate_intraday(self):
# #1698
index = date_range("1/1/2012", periods=4, freq="12D")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
exp = ts.reindex(new_index).interpolate(method="time")
index = date_range("1/1/2012", periods=4, freq="12h")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
result = ts.reindex(new_index).interpolate(method="time")
tm.assert_numpy_array_equal(result.values, exp.values)
@pytest.mark.parametrize(
"ind",
[
["a", "b", "c", "d"],
pd.period_range(start="2019-01-01", periods=4),
pd.interval_range(start=0, end=4),
],
)
def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
# gh 21662
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
if method == "pchip":
pytest.importorskip("scipy")
if method == "linear":
result = df[0].interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
else:
expected_error = (
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)
with pytest.raises(ValueError, match=expected_error):
df[0].interpolate(method=method, **kwargs)
def test_interpolate_timedelta_index(self, request, interp_methods_ind):
"""
Tests for non numerical index types - object, period, timedelta
Note that all methods except time, index, nearest and values
are tested here.
"""
# gh 21662
pytest.importorskip("scipy")
ind = pd.timedelta_range(start=1, periods=4)
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
if method in {"cubic", "zero"}:
request.applymarker(
pytest.mark.xfail(
reason=f"{method} interpolation is not supported for TimedeltaIndex"
)
)
result = df[0].interpolate(method=method, **kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending, expected_values",
[(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
)
def test_interpolate_unsorted_index(self, ascending, expected_values):
# GH 21037
ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
result = ts.sort_index(ascending=ascending).interpolate(method="index")
expected = Series(data=expected_values, index=expected_values, dtype=float)
tm.assert_series_equal(result, expected)
def test_interpolate_asfreq_raises(self):
ser = Series(["a", None, "b"], dtype=object)
msg2 = "Series.interpolate with object dtype"
msg = "Invalid fill method"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
ser.interpolate(method="asfreq")
def test_interpolate_fill_value(self):
# GH#54920
pytest.importorskip("scipy")
ser = Series([np.nan, 0, 1, np.nan, 3, np.nan])
result = ser.interpolate(method="nearest", fill_value=0)
expected = Series([np.nan, 0, 1, 1, 3, 0])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,26 @@
import numpy as np
from pandas import (
Series,
date_range,
)
class TestIsMonotonic:
def test_is_monotonic_numeric(self):
ser = Series(np.random.default_rng(2).integers(0, 10, size=1000))
assert not ser.is_monotonic_increasing
ser = Series(np.arange(1000))
assert ser.is_monotonic_increasing is True
assert ser.is_monotonic_increasing is True
ser = Series(np.arange(1000, 0, -1))
assert ser.is_monotonic_decreasing is True
def test_is_monotonic_dt64(self):
ser = Series(date_range("20130101", periods=10))
assert ser.is_monotonic_increasing is True
assert ser.is_monotonic_increasing is True
ser = Series(list(reversed(ser)))
assert ser.is_monotonic_increasing is False
assert ser.is_monotonic_decreasing is True

View File

@ -0,0 +1,40 @@
import numpy as np
import pytest
from pandas import Series
@pytest.mark.parametrize(
"data, expected",
[
(np.random.default_rng(2).integers(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(["foo", "bar", np.nan], True),
(["foo", "foo", np.nan], False),
(["foo", "bar", np.nan, np.nan], False),
],
)
def test_is_unique(data, expected):
# GH#11946 / GH#25180
ser = Series(data)
assert ser.is_unique is expected
def test_is_unique_class_ne(capsys):
# GH#20661
class Foo:
def __init__(self, val) -> None:
self._value = val
def __ne__(self, other):
raise Exception("NEQ not supported")
with capsys.disabled():
li = [Foo(i) for i in range(5)]
ser = Series(li, index=list(range(5)))
ser.is_unique
captured = capsys.readouterr()
assert len(captured.err) == 0

View File

@ -0,0 +1,252 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
from pandas.core import algorithms
from pandas.core.arrays import PeriodArray
class TestSeriesIsIn:
def test_isin(self):
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
result = s.isin(["A", "C"])
expected = Series([True, False, True, False, False, False, True, True])
tm.assert_series_equal(result, expected)
# GH#16012
# This specific issue has to have a series over 1e6 in len, but the
# comparison array (in_list) must be large enough so that numpy doesn't
# do a manual masking trick that will avoid this issue altogether
s = Series(list("abcdefghijk" * 10**5))
# If numpy doesn't do the manual comparison/mask, these
# unorderable mixed types are what cause the exception in numpy
in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6
assert s.isin(in_list).sum() == 200000
def test_isin_with_string_scalar(self):
# GH#4763
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
msg = (
r"only list-like objects are allowed to be passed to isin\(\), "
r"you passed a `str`"
)
with pytest.raises(TypeError, match=msg):
s.isin("a")
s = Series(["aaa", "b", "c"])
with pytest.raises(TypeError, match=msg):
s.isin("aaa")
def test_isin_datetimelike_mismatched_reso(self):
expected = Series([True, True, False, False, False])
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
# fails on dtype conversion in the first place
day_values = np.asarray(ser[0:2].values).astype("datetime64[D]")
result = ser.isin(day_values)
tm.assert_series_equal(result, expected)
dta = ser[:2]._values.astype("M8[s]")
result = ser.isin(dta)
tm.assert_series_equal(result, expected)
def test_isin_datetimelike_mismatched_reso_list(self):
expected = Series([True, True, False, False, False])
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
dta = ser[:2]._values.astype("M8[s]")
result = ser.isin(list(dta))
tm.assert_series_equal(result, expected)
def test_isin_with_i8(self):
# GH#5021
expected = Series([True, True, False, False, False])
expected2 = Series([False, True, False, False, False])
# datetime64[ns]
s = Series(date_range("jan-01-2013", "jan-05-2013"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
result = s.isin(s[0:2].values)
tm.assert_series_equal(result, expected)
result = s.isin([s[1]])
tm.assert_series_equal(result, expected2)
result = s.isin([np.datetime64(s[1])])
tm.assert_series_equal(result, expected2)
result = s.isin(set(s[0:2]))
tm.assert_series_equal(result, expected)
# timedelta64[ns]
s = Series(pd.to_timedelta(range(5), unit="d"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
def test_isin_empty(self, empty):
# see GH#16991
s = Series(["a", "b"])
expected = Series([False, False])
result = s.isin(empty)
tm.assert_series_equal(expected, result)
def test_isin_read_only(self):
# https://github.com/pandas-dev/pandas/issues/37174
arr = np.array([1, 2, 3])
arr.setflags(write=False)
s = Series([1, 2, 3])
result = s.isin(arr)
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, None])
def test_isin_dt64_values_vs_ints(self, dtype):
# GH#36621 dont cast integers to datetimes for isin
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
comps = np.asarray([1356998400000000000], dtype=dtype)
res = dti.isin(comps)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(comps)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, comps)
tm.assert_numpy_array_equal(res, expected)
def test_isin_tzawareness_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
other = dti.tz_localize("UTC")
res = dti.isin(other)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
def test_isin_period_freq_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
pi = dti.to_period("M")
ser = Series(pi)
# We construct another PeriodIndex with the same i8 values
# but different dtype
dtype = dti.to_period("Y").dtype
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)
res = pi.isin(other)
expected = np.array([False] * len(pi), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
@pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]])
def test_isin_float_in_int_series(self, values):
# GH#19356 GH#21804
ser = Series(values)
result = ser.isin([-9, -0.5])
expected = Series([True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
@pytest.mark.parametrize(
"data,values,expected",
[
([0, 1, 0], [1], [False, True, False]),
([0, 1, 0], [1, pd.NA], [False, True, False]),
([0, pd.NA, 0], [1, 0], [True, False, True]),
([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
([0, 1, pd.NA], [1, np.nan], [False, True, False]),
([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
],
)
def test_isin_masked_types(self, dtype, data, values, expected):
# GH#42405
ser = Series(data, dtype=dtype)
result = ser.isin(values)
expected = Series(expected, dtype="boolean")
tm.assert_series_equal(result, expected)
def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch):
# https://github.com/pandas-dev/pandas/issues/37094
# combination of object dtype for the values
# and > _MINIMUM_COMP_ARR_LEN elements
min_isin_comp = 5
ser = Series([1, 2, np.nan] * min_isin_comp)
with monkeypatch.context() as m:
m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp)
result = ser.isin({"foo", "bar"})
expected = Series([False] * 3 * min_isin_comp)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, True, True, False, True, True, True], dtype=bool),
)
],
)
def test_isin_complex_numbers(array, expected):
# GH 17927
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data,is_in",
[([1, [2]], [1]), (["simple str", [{"values": 3}]], ["simple str"])],
)
def test_isin_filtering_with_mixed_object_types(data, is_in):
# GH 20883
ser = Series(data)
result = ser.isin(is_in)
expected = Series([True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, 2.0, 3.0]])
@pytest.mark.parametrize("isin", [[1, 2], [1.0, 2.0]])
def test_isin_filtering_on_iterable(data, isin):
# GH 50234
ser = Series(data)
result = ser.isin(i for i in isin)
expected_result = Series([True, True, False])
tm.assert_series_equal(result, expected_result)

View File

@ -0,0 +1,35 @@
"""
We also test Series.notna in this file.
"""
import numpy as np
from pandas import (
Period,
Series,
)
import pandas._testing as tm
class TestIsna:
def test_isna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
expected = Series([False, True])
result = ser.isna()
tm.assert_series_equal(result, expected)
result = ser.notna()
tm.assert_series_equal(result, ~expected)
def test_isna(self):
ser = Series([0, 5.4, 3, np.nan, -0.001])
expected = Series([False, False, False, True, False])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)
ser = Series(["hi", "", np.nan])
expected = Series([False, False, True])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)

View File

@ -0,0 +1,59 @@
"""
Series.item method, mainly testing that we get python scalars as opposed to
numpy scalars.
"""
import pytest
from pandas import (
Series,
Timedelta,
Timestamp,
date_range,
)
class TestItem:
def test_item(self):
# We are testing that we get python scalars as opposed to numpy scalars
ser = Series([1])
result = ser.item()
assert result == 1
assert result == ser.iloc[0]
assert isinstance(result, int) # i.e. not np.int64
ser = Series([0.5], index=[3])
result = ser.item()
assert isinstance(result, float)
assert result == 0.5
ser = Series([1, 2])
msg = "can only convert an array of size 1"
with pytest.raises(ValueError, match=msg):
ser.item()
dti = date_range("2016-01-01", periods=2)
with pytest.raises(ValueError, match=msg):
dti.item()
with pytest.raises(ValueError, match=msg):
Series(dti).item()
val = dti[:1].item()
assert isinstance(val, Timestamp)
val = Series(dti)[:1].item()
assert isinstance(val, Timestamp)
tdi = dti - dti
with pytest.raises(ValueError, match=msg):
tdi.item()
with pytest.raises(ValueError, match=msg):
Series(tdi).item()
val = tdi[:1].item()
assert isinstance(val, Timedelta)
val = Series(tdi)[:1].item()
assert isinstance(val, Timedelta)
# Case where ser[0] would not work
ser = Series(dti, index=[5, 6])
val = ser.iloc[:1].item()
assert val == dti[0]

View File

@ -0,0 +1,604 @@
from collections import (
Counter,
defaultdict,
)
from decimal import Decimal
import math
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
bdate_range,
date_range,
isna,
timedelta_range,
)
import pandas._testing as tm
def test_series_map_box_timedelta():
# GH#11349
ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
def f(x):
return x.total_seconds()
ser.map(f)
def test_map_callable(datetime_series):
with np.errstate(all="ignore"):
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series))
# map function element-wise
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series))
# empty series
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
rs = s.map(lambda x: x)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
assert s is not rs
assert s.index is rs.index
assert s.dtype == rs.dtype
assert s.name == rs.name
# index but no data
s = Series(index=[1, 2, 3], dtype=np.float64)
rs = s.map(lambda x: x)
tm.assert_series_equal(s, rs)
def test_map_same_length_inference_bug():
s = Series([1, 2])
def f(x):
return (x, x + 1)
s = Series([1, 2, 3])
result = s.map(f)
expected = Series([(1, 2), (2, 3), (3, 4)])
tm.assert_series_equal(result, expected)
s = Series(["foo,bar"])
result = s.map(lambda x: x.split(","))
expected = Series([("foo", "bar")])
tm.assert_series_equal(result, expected)
def test_series_map_box_timestamps():
# GH#2689, GH#2627
ser = Series(date_range("1/1/2000", periods=3))
def func(x):
return (x.hour, x.day, x.month)
result = ser.map(func)
expected = Series([(0, 1, 1), (0, 2, 1), (0, 3, 1)])
tm.assert_series_equal(result, expected)
def test_map_series_stringdtype(any_string_dtype, using_infer_string):
# map test on StringDType, GH#40823
ser1 = Series(
data=["cat", "dog", "rabbit"],
index=["id1", "id2", "id3"],
dtype=any_string_dtype,
)
ser2 = Series(["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
result = ser2.map(ser1)
item = pd.NA
if ser2.dtype == object:
item = np.nan
expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
if using_infer_string and any_string_dtype == "object":
expected = expected.astype("str")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_dtype",
[(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], "str")],
)
def test_map_categorical_with_nan_values(data, expected_dtype):
# GH 20714 bug fixed in: GH 24275
def func(val):
return val.split("-")[0]
s = Series(data, dtype="category")
result = s.map(func, na_action="ignore")
expected = Series(["1", "1", np.nan], dtype=expected_dtype)
tm.assert_series_equal(result, expected)
def test_map_empty_integer_series():
# GH52384
s = Series([], dtype=int)
result = s.map(lambda x: x)
tm.assert_series_equal(result, s)
def test_map_empty_integer_series_with_datetime_index():
# GH 21245
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
result = s.map(lambda x: x)
tm.assert_series_equal(result, s)
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
def test_map_simple_str_callables_same_as_astype(
string_series, func, using_infer_string
):
# test that we are evaluating row-by-row first
# before vectorized evaluation
result = string_series.map(func)
expected = string_series.astype(str if not using_infer_string else "str")
tm.assert_series_equal(result, expected)
def test_list_raises(string_series):
with pytest.raises(TypeError, match="'list' object is not callable"):
string_series.map([lambda x: x])
def test_map():
data = {
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
"D": bdate_range("1/1/2009", periods=5),
}
source = Series(data["B"], index=data["C"])
target = Series(data["C"][:4], index=data["D"][:4])
merged = target.map(source)
for k, v in merged.items():
assert v == source[target[k]]
# input could be a dict
merged = target.map(source.to_dict())
for k, v in merged.items():
assert v == source[target[k]]
def test_map_datetime(datetime_series):
# function
result = datetime_series.map(lambda x: x * 2)
tm.assert_series_equal(result, datetime_series * 2)
def test_map_category():
# GH 10324
a = Series([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
tm.assert_series_equal(a.map(b), exp)
exp = Series(["odd", "even", "odd", np.nan])
tm.assert_series_equal(a.map(c), exp)
def test_map_category_numeric():
a = Series(["a", "b", "c", "d"])
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(c), exp)
def test_map_category_string():
a = Series(["a", "b", "c", "d"])
b = Series(
["B", "C", "D", "E"],
dtype="category",
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
)
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
exp = Series(
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
)
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, "B", "C", "D"])
tm.assert_series_equal(a.map(c), exp)
@pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning")
def test_map_empty(request, index):
if isinstance(index, MultiIndex):
request.applymarker(
pytest.mark.xfail(
reason="Initializing a Series from a MultiIndex is not supported"
)
)
s = Series(index)
result = s.map({})
expected = Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)
def test_map_compat():
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
result = s.map({True: "foo", False: "bar"})
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_int():
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
right = Series({1: 11, 2: 22, 3: 33})
assert left.dtype == np.float64
assert issubclass(right.dtype.type, np.integer)
merged = left.map(right)
assert merged.dtype == np.float64
assert isna(merged["d"])
assert not isna(merged["c"])
def test_map_type_inference():
s = Series(range(3))
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
assert issubclass(s2.dtype.type, np.integer)
def test_map_decimal(string_series):
result = string_series.map(lambda x: Decimal(str(x)))
assert result.dtype == np.object_
assert isinstance(result.iloc[0], Decimal)
def test_map_na_exclusion():
s = Series([1.5, np.nan, 3, np.nan, 5])
result = s.map(lambda x: x * 2, na_action="ignore")
exp = s * 2
tm.assert_series_equal(result, exp)
def test_map_dict_with_tuple_keys():
"""
Due to new MultiIndex-ing behaviour in v0.14.0,
dicts with tuple keys passed to map were being
converted to a multi-index, preventing tuple values
from being mapped properly.
"""
# GH 18496
df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
df["labels"] = df["a"].map(label_mappings)
df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
# All labels should be filled now
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
def test_map_counter():
s = Series(["a", "b", "c"], index=[1, 2, 3])
counter = Counter()
counter["b"] = 5
counter["c"] += 1
result = s.map(counter)
expected = Series([0, 5, 1], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_defaultdict():
s = Series([1, 2, 3], index=["a", "b", "c"])
default_dict = defaultdict(lambda: "blank")
default_dict[1] = "stuff"
result = s.map(default_dict)
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)
def test_map_dict_na_key():
# https://github.com/pandas-dev/pandas/issues/17648
# Checks that np.nan key is appropriately mapped
s = Series([1, 2, np.nan])
expected = Series(["a", "b", "c"])
result = s.map({1: "a", 2: "b", np.nan: "c"})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_na_key(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
result = s.map(default_map, na_action=na_action)
expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_missing_key(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"})
result = s.map(default_map, na_action=na_action)
expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_unmutated(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
expected_default_map = default_map.copy()
s.map(default_map, na_action=na_action)
assert default_map == expected_default_map
@pytest.mark.parametrize("arg_func", [dict, Series])
def test_map_dict_ignore_na(arg_func):
# GH#47527
mapping = arg_func({1: 10, np.nan: 42})
ser = Series([1, np.nan, 2])
result = ser.map(mapping, na_action="ignore")
expected = Series([10, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_map_defaultdict_ignore_na():
# GH#47527
mapping = defaultdict(int, {1: 10, np.nan: 42})
ser = Series([1, np.nan, 2])
result = ser.map(mapping)
expected = Series([10, 42, 0])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"na_action, expected",
[(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))],
)
def test_map_categorical_na_ignore(na_action, expected):
# GH#47527
values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2])
ser = Series(values)
result = ser.map({1: 10, np.nan: 42}, na_action=na_action)
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_with_missing():
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
"""
class DictWithMissing(dict):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
dictionary = DictWithMissing({3: "three"})
result = s.map(dictionary)
expected = Series(["missing", "missing", "three"])
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_without_missing():
class DictWithoutMissing(dict):
pass
s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: "three"})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
s = Series([1, 2, 3])
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
result = s.map(not_a_dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
class NonDictMappingWithMissing(non_dict_mapping_subclass):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
result = s.map(not_a_dictionary)
# __missing__ is a dict concept, not a Mapping concept,
# so it should not change the result!
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_box_dt64(unit):
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"datetime64[{unit}]"
# boxed value must be Timestamp instance
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
def test_map_box_dt64tz(unit):
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"datetime64[{unit}, US/Eastern]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
def test_map_box_td64(unit):
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"timedelta64[{unit}]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.days}")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
def test_map_box_period():
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
ser = Series(vals)
assert ser.dtype == "Period[M]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.freqstr}")
exp = Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_categorical(na_action, using_infer_string):
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
s = Series(values, name="XX", index=list("abcdefg"))
result = s.map(lambda x: x.lower(), na_action=na_action)
exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
exp = Series(exp_values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp_values)
result = s.map(lambda x: "A", na_action=na_action)
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == object if not using_infer_string else "str"
@pytest.mark.parametrize(
"na_action, expected",
(
[None, Series(["A", "B", "nan"], name="XX")],
[
"ignore",
Series(
["A", "B", np.nan],
name="XX",
dtype=pd.CategoricalDtype(list("DCBA"), True),
),
],
),
)
def test_map_categorical_na_action(na_action, expected):
dtype = pd.CategoricalDtype(list("DCBA"), ordered=True)
values = pd.Categorical(list("AB") + [np.nan], dtype=dtype)
s = Series(values, name="XX")
result = s.map(str, na_action=na_action)
tm.assert_series_equal(result, expected)
def test_map_datetimetz():
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
s = Series(values, name="XX")
# keep tz
result = s.map(lambda x: x + pd.offsets.Day())
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
"Asia/Tokyo"
)
exp = Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
result = s.map(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"vals,mapping,exp",
[
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
],
)
def test_map_missing_mixed(vals, mapping, exp):
# GH20495
s = Series(vals + [np.nan])
result = s.map(mapping)
exp = Series(exp)
tm.assert_series_equal(result, exp)
def test_map_scalar_on_date_time_index_aware_series():
# GH 25959
# Calling map on a localized time series should not cause an error
series = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10, tz="UTC"),
name="ts",
)
result = Series(series.index).map(lambda x: 1)
tm.assert_series_equal(result, Series(np.ones(len(series)), dtype="int64"))
def test_map_float_to_string_precision():
# GH 13228
ser = Series(1 / 3)
result = ser.map(lambda val: str(val)).to_dict()
expected = {0: "0.3333333333333333"}
assert result == expected
def test_map_to_timedelta():
list_of_valid_strings = ["00:00:01", "00:00:02"]
a = pd.to_timedelta(list_of_valid_strings)
b = Series(list_of_valid_strings).map(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
a = pd.to_timedelta(list_of_strings)
ser = Series(list_of_strings)
b = ser.map(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
def test_map_type():
# GH 46719
s = Series([3, "string", float], index=["a", "b", "c"])
result = s.map(type)
expected = Series([int, str, type], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,82 @@
import operator
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
class TestMatmul:
def test_matmul(self):
# matmul test is for GH#10259
a = Series(
np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"]
)
b = DataFrame(
np.random.default_rng(2).standard_normal((3, 4)),
index=["1", "2", "3"],
columns=["p", "q", "r", "s"],
).T
# Series @ DataFrame -> Series
result = operator.matmul(a, b)
expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# DataFrame @ Series -> Series
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# Series @ Series -> scalar
result = operator.matmul(a, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D np.array) @ Series (__rmatmul__)
result = operator.matmul(a.values, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D list) @ Series (__rmatmul__)
result = operator.matmul(a.values.tolist(), a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D np.array) @ Series (__rmatmul__)
result = operator.matmul(b.T.values, a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D nested lists) @ Series (__rmatmul__)
result = operator.matmul(b.T.values.tolist(), a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# mixed dtype DataFrame @ Series
a["p"] = int(a.p)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# different dtypes DataFrame @ Series
a = a.astype(int)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)"
# exception raised is of type Exception
with pytest.raises(Exception, match=msg):
a.dot(a.values[:3])
msg = "matrices are not aligned"
with pytest.raises(ValueError, match=msg):
a.dot(b.T)

View File

@ -0,0 +1,248 @@
"""
Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo"
but are implicitly also testing nsmallest_foo.
"""
from itertools import product
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
main_dtypes = [
"datetime",
"datetimetz",
"timedelta",
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]
@pytest.fixture
def s_main_dtypes():
"""
A DataFrame with many dtypes
* datetime
* datetimetz
* timedelta
* [u]int{8,16,32,64}
* float{32,64}
The columns are the name of the dtype.
"""
df = pd.DataFrame(
{
"datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]),
"datetimetz": pd.to_datetime(
["2003", "2002", "2001", "2002", "2005"]
).tz_localize("US/Eastern"),
"timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]),
}
)
for dtype in [
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]:
df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype)
return df
@pytest.fixture(params=main_dtypes)
def s_main_dtypes_split(request, s_main_dtypes):
"""Each series in s_main_dtypes."""
return s_main_dtypes[request.param]
def assert_check_nselect_boundary(vals, dtype, method):
# helper function for 'test_boundary_{dtype}' tests
ser = Series(vals, dtype=dtype)
result = getattr(ser, method)(3)
expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1]
expected = ser.loc[expected_idxr]
tm.assert_series_equal(result, expected)
class TestSeriesNLargestNSmallest:
@pytest.mark.parametrize(
"r",
[
Series([3.0, 2, 1, 2, "5"], dtype="object"),
Series([3.0, 2, 1, 2, 5], dtype="object"),
# not supported on some archs
# Series([3., 2, 1, 2, 5], dtype='complex256'),
Series([3.0, 2, 1, 2, 5], dtype="complex128"),
Series(list("abcde")),
Series(list("abcde"), dtype="category"),
],
)
def test_nlargest_error(self, r):
dt = r.dtype
msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}"
args = 2, len(r), 0, -1
methods = r.nlargest, r.nsmallest
for method, arg in product(methods, args):
with pytest.raises(TypeError, match=msg):
method(arg)
def test_nsmallest_nlargest(self, s_main_dtypes_split):
# float, int, datetime64 (use i8), timedelts64 (same),
# object that are numbers, object that are strings
ser = s_main_dtypes_split
tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]])
tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]])
empty = ser.iloc[0:0]
tm.assert_series_equal(ser.nsmallest(0), empty)
tm.assert_series_equal(ser.nsmallest(-1), empty)
tm.assert_series_equal(ser.nlargest(0), empty)
tm.assert_series_equal(ser.nlargest(-1), empty)
tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values())
tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values())
tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]])
tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]])
def test_nlargest_misc(self):
ser = Series([3.0, np.nan, 1, 2, 5])
result = ser.nlargest()
expected = ser.iloc[[4, 0, 3, 2, 1]]
tm.assert_series_equal(result, expected)
result = ser.nsmallest()
expected = ser.iloc[[2, 3, 0, 4, 1]]
tm.assert_series_equal(result, expected)
msg = 'keep must be either "first", "last"'
with pytest.raises(ValueError, match=msg):
ser.nsmallest(keep="invalid")
with pytest.raises(ValueError, match=msg):
ser.nlargest(keep="invalid")
# GH#15297
ser = Series([1] * 5, index=[1, 2, 3, 4, 5])
expected_first = Series([1] * 3, index=[1, 2, 3])
expected_last = Series([1] * 3, index=[5, 4, 3])
result = ser.nsmallest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nsmallest(3, keep="last")
tm.assert_series_equal(result, expected_last)
result = ser.nlargest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nlargest(3, keep="last")
tm.assert_series_equal(result, expected_last)
@pytest.mark.parametrize("n", range(1, 5))
def test_nlargest_n(self, n):
# GH 13412
ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1])
result = ser.nlargest(n)
expected = ser.sort_values(ascending=False).head(n)
tm.assert_series_equal(result, expected)
result = ser.nsmallest(n)
expected = ser.sort_values().head(n)
tm.assert_series_equal(result, expected)
def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype):
# GH#21426
dtype_info = np.iinfo(any_int_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val, min_val + 1, max_val - 1, max_val]
assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method)
def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype):
# GH#21426
dtype_info = np.finfo(float_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype)
vals = [min_val, min_2nd, max_2nd, max_val]
assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method)
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_nlargest_boundary_datetimelike(self, nselect_method, dtype):
# GH#21426
# use int64 bounds and +1 to min_val since true minimum is NaT
# (include min_val/NaT at end to maintain same expected_idxr)
dtype_info = np.iinfo("int64")
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
assert_check_nselect_boundary(vals, dtype, nselect_method)
def test_nlargest_duplicate_keep_all_ties(self):
# see GH#16818
ser = Series([10, 9, 8, 7, 7, 7, 7, 6])
result = ser.nlargest(4, keep="all")
expected = Series([10, 9, 8, 7, 7, 7, 7])
tm.assert_series_equal(result, expected)
result = ser.nsmallest(2, keep="all")
expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data,expected", [([True, False], [True]), ([True, False, True, True], [True])]
)
def test_nlargest_boolean(self, data, expected):
# GH#26154 : ensure True > False
ser = Series(data)
result = ser.nlargest(1)
expected = Series(expected)
tm.assert_series_equal(result, expected)
def test_nlargest_nullable(self, any_numeric_ea_dtype):
# GH#42816
dtype = any_numeric_ea_dtype
if dtype.startswith("UInt"):
# Can't cast from negative float to uint on some platforms
arr = np.random.default_rng(2).integers(1, 10, 10)
else:
arr = np.random.default_rng(2).standard_normal(10)
arr = arr.astype(dtype.lower(), copy=False)
ser = Series(arr.copy(), dtype=dtype)
ser[1] = pd.NA
result = ser.nlargest(5)
expected = (
Series(np.delete(arr, 1), index=ser.index.delete(1))
.nlargest(5)
.astype(dtype)
)
tm.assert_series_equal(result, expected)
def test_nsmallest_nan_when_keep_is_all(self):
# GH#46589
s = Series([1, 2, 3, 3, 3, None])
result = s.nsmallest(3, keep="all")
expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
tm.assert_series_equal(result, expected)
s = Series([1, 2, None, None, None])
result = s.nsmallest(3, keep="all")
expected = Series([1, 2, None, None, None])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,24 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
def test_nunique():
# basics.rst doc example
series = Series(np.random.default_rng(2).standard_normal(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11
def test_nunique_categorical():
# GH#18051
ser = Series(Categorical([]))
assert ser.nunique() == 0
ser = Series(Categorical([np.nan]))
assert ser.nunique() == 0

View File

@ -0,0 +1,128 @@
import numpy as np
import pytest
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestSeriesPctChange:
def test_pct_change(self, datetime_series):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
rs = datetime_series.pct_change(fill_method=None)
tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)
rs = datetime_series.pct_change(2)
filled = datetime_series.ffill()
tm.assert_series_equal(rs, filled / filled.shift(2) - 1)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = datetime_series.pct_change(fill_method="bfill", limit=1)
filled = datetime_series.bfill(limit=1)
tm.assert_series_equal(rs, filled / filled.shift(1) - 1)
rs = datetime_series.pct_change(freq="5D")
filled = datetime_series.ffill()
tm.assert_series_equal(
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
)
def test_pct_change_with_duplicate_axis(self):
# GH#28664
common_idx = date_range("2019-11-14", periods=5, freq="D")
result = Series(range(5), common_idx).pct_change(freq="B")
# the reason that the expected should be like this is documented at PR 28681
expected = Series([np.nan, np.inf, np.nan, np.nan, 3.0], common_idx)
tm.assert_series_equal(result, expected)
def test_pct_change_shift_over_nas(self):
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
msg = "The default fill_method='pad' in Series.pct_change is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
chg = s.pct_change()
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
tm.assert_series_equal(chg, expected)
@pytest.mark.parametrize(
"freq, periods, fill_method, limit",
[
("5B", 5, None, None),
("3B", 3, None, None),
("3B", 3, "bfill", None),
("7B", 7, "pad", 1),
("7B", 7, "bfill", 3),
("14B", 14, None, None),
],
)
def test_pct_change_periods_freq(
self, freq, periods, fill_method, limit, datetime_series
):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
# GH#7292
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = datetime_series.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = datetime_series.pct_change(
periods, fill_method=fill_method, limit=limit
)
tm.assert_series_equal(rs_freq, rs_periods)
empty_ts = Series(index=datetime_series.index, dtype=object)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = empty_ts.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = empty_ts.pct_change(
periods, fill_method=fill_method, limit=limit
)
tm.assert_series_equal(rs_freq, rs_periods)
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
def test_pct_change_with_duplicated_indices(fill_method):
# GH30463
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
warn = None if fill_method is None else FutureWarning
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
with tm.assert_produces_warning(warn, match=msg):
result = s.pct_change(fill_method=fill_method)
expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
tm.assert_series_equal(result, expected)
def test_pct_change_no_warning_na_beginning():
# GH#54981
ser = Series([None, None, 1, 2, 3])
result = ser.pct_change()
expected = Series([np.nan, np.nan, np.nan, 1, 0.5])
tm.assert_series_equal(result, expected)
def test_pct_change_empty():
# GH 57056
ser = Series([], dtype="float64")
expected = ser.copy()
result = ser.pct_change(periods=0)
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,13 @@
from pandas import Series
import pandas._testing as tm
def test_pop():
# GH#6600
ser = Series([0, 4, 0], index=["A", "B", "C"], name=4)
result = ser.pop("B")
assert result == 4
expected = Series([0, 0], index=["A", "C"], name=4)
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,247 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import Timestamp
class TestSeriesQuantile:
def test_quantile(self, datetime_series):
q = datetime_series.quantile(0.1)
assert q == np.percentile(datetime_series.dropna(), 10)
q = datetime_series.quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# object dtype
q = Series(datetime_series, dtype=object).quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# datetime64[ns] dtype
dts = datetime_series.index.to_series()
q = dts.quantile(0.2)
assert q == Timestamp("2000-01-10 19:12:00")
# timedelta64[ns] dtype
tds = dts.diff()
q = tds.quantile(0.25)
assert q == pd.to_timedelta("24:00:00")
# GH7661
result = Series([np.timedelta64("NaT")]).sum()
assert result == pd.Timedelta(0)
msg = "percentiles should all be in the interval \\[0, 1\\]"
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
with pytest.raises(ValueError, match=msg):
datetime_series.quantile(invalid)
s = Series(np.random.default_rng(2).standard_normal(100))
percentile_array = [-0.5, 0.25, 1.5]
with pytest.raises(ValueError, match=msg):
s.quantile(percentile_array)
def test_quantile_multi(self, datetime_series, unit):
datetime_series.index = datetime_series.index.as_unit(unit)
qs = [0.1, 0.9]
result = datetime_series.quantile(qs)
expected = Series(
[
np.percentile(datetime_series.dropna(), 10),
np.percentile(datetime_series.dropna(), 90),
],
index=qs,
name=datetime_series.name,
)
tm.assert_series_equal(result, expected)
dts = datetime_series.index.to_series()
dts.name = "xxx"
result = dts.quantile((0.2, 0.2))
expected = Series(
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
index=[0.2, 0.2],
name="xxx",
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)
result = datetime_series.quantile([])
expected = Series(
[], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
)
tm.assert_series_equal(result, expected)
def test_quantile_interpolation(self, datetime_series):
# see gh-10174
# interpolation = linear (default case)
q = datetime_series.quantile(0.1, interpolation="linear")
assert q == np.percentile(datetime_series.dropna(), 10)
q1 = datetime_series.quantile(0.1)
assert q1 == np.percentile(datetime_series.dropna(), 10)
# test with and without interpolation keyword
assert q == q1
def test_quantile_interpolation_dtype(self):
# GH #10174
# interpolation = linear (default case)
q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
def test_quantile_nan(self):
# GH 13098
ser = Series([1, 2, 3, 4, np.nan])
result = ser.quantile(0.5)
expected = 2.5
assert result == expected
# all nan/empty
s1 = Series([], dtype=object)
cases = [s1, Series([np.nan, np.nan])]
for ser in cases:
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
tm.assert_series_equal(res, Series([np.nan], index=[0.5]))
res = ser.quantile([0.2, 0.3])
tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))
@pytest.mark.parametrize(
"case",
[
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
],
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
# NaT
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
pd.NaT,
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
pd.NaT,
],
[
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
pd.NaT,
],
],
)
def test_quantile_box(self, case):
ser = Series(case, name="XXX")
res = ser.quantile(0.5)
assert res == case[1]
res = ser.quantile([0.5])
exp = Series([case[1]], index=[0.5], name="XXX")
tm.assert_series_equal(res, exp)
def test_datetime_timedelta_quantiles(self):
# covers #9694
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
def test_quantile_nat(self):
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
assert res is pd.NaT
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))
@pytest.mark.parametrize(
"values, dtype",
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
)
def test_quantile_sparse(self, values, dtype):
ser = Series(values, dtype=dtype)
result = ser.quantile([0.5])
expected = Series(np.asarray(ser)).quantile([0.5]).astype("Sparse[float]")
tm.assert_series_equal(result, expected)
def test_quantile_empty_float64(self):
# floats
ser = Series([], dtype="float64")
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
def test_quantile_empty_int64(self):
# int
ser = Series([], dtype="int64")
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
def test_quantile_empty_dt64(self):
# datetime
ser = Series([], dtype="datetime64[ns]")
res = ser.quantile(0.5)
assert res is pd.NaT
res = ser.quantile([0.5])
exp = Series([pd.NaT], index=[0.5], dtype=ser.dtype)
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize("dtype", [int, float, "Int64"])
def test_quantile_dtypes(self, dtype):
result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
if dtype == "Int64":
expected = expected.astype("Float64")
tm.assert_series_equal(result, expected)
def test_quantile_all_na(self, any_int_ea_dtype):
# GH#50681
ser = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
with tm.assert_produces_warning(None):
result = ser.quantile([0.1, 0.5])
expected = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype, index=[0.1, 0.5])
tm.assert_series_equal(result, expected)
def test_quantile_dtype_size(self, any_int_ea_dtype):
# GH#50681
ser = Series([pd.NA, pd.NA, 1], dtype=any_int_ea_dtype)
result = ser.quantile([0.1, 0.5])
expected = Series([1, 1], dtype=any_int_ea_dtype, index=[0.1, 0.5])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,563 @@
from itertools import chain
import operator
import numpy as np
import pytest
from pandas._libs.algos import (
Infinity,
NegInfinity,
)
import pandas.util._test_decorators as td
from pandas import (
NA,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import CategoricalDtype
@pytest.fixture
def ser():
return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])
@pytest.fixture(
params=[
["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])],
["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])],
["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
],
ids=lambda x: x[0],
)
def results(request):
return request.param
@pytest.fixture(
params=[
"object",
"float64",
"int64",
"Float64",
"Int64",
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
"string[python]",
"str",
]
)
def dtype(request):
return request.param
def expected_dtype(dtype, method, pct=False):
exp_dtype = "float64"
# elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
if dtype in ["string[pyarrow]"]:
exp_dtype = "Float64"
elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]:
if method == "average" or pct:
exp_dtype = "double[pyarrow]"
else:
exp_dtype = "uint64[pyarrow]"
return exp_dtype
class TestSeriesRank:
def test_rank(self, datetime_series):
sp_stats = pytest.importorskip("scipy.stats")
datetime_series[::2] = np.nan
datetime_series[:10:3] = 4.0
ranks = datetime_series.rank()
oranks = datetime_series.astype("O").rank()
tm.assert_series_equal(ranks, oranks)
mask = np.isnan(datetime_series)
filled = datetime_series.fillna(np.inf)
# rankdata returns a ndarray
exp = Series(sp_stats.rankdata(filled), index=filled.index, name="ts")
exp[mask] = np.nan
tm.assert_series_equal(ranks, exp)
iseries = Series(np.arange(5).repeat(2))
iranks = iseries.rank()
exp = iseries.astype(float).rank()
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
exp = iseries / 5.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(1, 100))
exp = Series(np.repeat(0.505, 100))
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = iseries.astype("float")
iseries[1] = np.nan
exp = Series(np.repeat(50.0 / 99.0, 100))
exp[1] = np.nan
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(np.nan, 100))
exp = iseries.copy()
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = Series(np.arange(5), dtype="float") + 1
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
rng = date_range("1/1/1990", periods=5)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = Series(np.arange(5), rng, dtype="float") + 1
iseries.iloc[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
exp = Series([2, 1, 3, 5, 4, 6.0])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
# GH 5968
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
exp = Series([3, 2, 1, np.nan])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
values = np.array(
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
dtype="float64",
)
random_order = np.random.default_rng(2).permutation(len(values))
iseries = Series(values[random_order])
exp = Series(random_order + 1.0, dtype="float64")
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
def test_rank_categorical(self):
# GH issue #15420 rank incorrectly orders ordered categories
# Test ascending/descending ranking for ordered categoricals
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
ordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=True,
)
)
tm.assert_series_equal(ordered.rank(), exp)
tm.assert_series_equal(ordered.rank(ascending=False), exp_desc)
# Unordered categoricals should be ranked as objects
unordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=False,
)
)
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
res = unordered.rank()
tm.assert_series_equal(res, exp_unordered)
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
)
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
res1 = unordered1.rank()
tm.assert_series_equal(res1, exp_unordered1)
# Test na_option for rank data
na_ser = Series(
["first", "second", "third", "fourth", "fifth", "sixth", np.nan]
).astype(
CategoricalDtype(
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
True,
)
)
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
# Test na_option for rank data with ascending False
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
tm.assert_series_equal(
na_ser.rank(na_option="bottom", ascending=False), exp_bot
)
tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
# Test invalid values for na_option
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option="bad", ascending=False)
# invalid type
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option=True, ascending=False)
# Test with pct=True
na_ser = Series(["first", "second", "third", "fourth", np.nan]).astype(
CategoricalDtype(["first", "second", "third", "fourth"], True)
)
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
def test_rank_signature(self):
s = Series([0, 1])
s.rank(method="average")
msg = "No axis named average for object type Series"
with pytest.raises(ValueError, match=msg):
s.rank("average")
def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
method, exp = results
if (
dtype == "int64"
or dtype == "Int64"
or (not using_infer_string and dtype == "str")
):
pytest.skip("int64/str does not support NaN")
ser = ser if dtype is None else ser.astype(dtype)
result = ser.rank(method=method)
tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
@pytest.mark.parametrize(
"dtype, na_value, pos_inf, neg_inf",
[
("object", None, Infinity(), NegInfinity()),
("float64", np.nan, np.inf, -np.inf),
("Float64", NA, np.inf, -np.inf),
pytest.param(
"float64[pyarrow]",
NA,
np.inf,
-np.inf,
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_rank_tie_methods_on_infs_nans(
self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
):
pytest.importorskip("scipy")
if dtype == "float64[pyarrow]":
if method == "average":
exp_dtype = "float64[pyarrow]"
else:
exp_dtype = "uint64[pyarrow]"
else:
exp_dtype = "float64"
chunk = 3
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
iseries = Series(in_arr, dtype=dtype)
exp_ranks = {
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
}
ranks = exp_ranks[method]
if na_option == "top":
order = [ranks[1], ranks[0], ranks[2]]
elif na_option == "bottom":
order = [ranks[0], ranks[2], ranks[1]]
else:
order = [ranks[0], [np.nan] * chunk, ranks[1]]
expected = order if ascending else order[::-1]
expected = list(chain.from_iterable(expected))
result = iseries.rank(method=method, na_option=na_option, ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype=exp_dtype))
def test_rank_desc_mix_nans_infs(self):
# GH 19538
# check descending ranking when mix nans and infs
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
result = iseries.rank(ascending=False)
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize(
"op, value",
[
[operator.add, 0],
[operator.add, 1e6],
[operator.mul, 1e-6],
],
)
def test_rank_methods_series(self, method, op, value):
sp_stats = pytest.importorskip("scipy.stats")
xs = np.random.default_rng(2).standard_normal(9)
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
np.random.default_rng(2).shuffle(xs)
index = [chr(ord("a") + i) for i in range(len(xs))]
vals = op(xs, value)
ts = Series(vals, index=index)
result = ts.rank(method=method)
sprank = sp_stats.rankdata(vals, method if method != "first" else "ordinal")
expected = Series(sprank, index=index).astype("float64")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1]),
([2], [1]),
([0], [1]),
([2, 2], [1, 1]),
([1, 2, 3], [1, 2, 3]),
([4, 2, 1], [3, 2, 1]),
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
],
)
def test_rank_dense_method(self, dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="dense")
expected = Series(exp).astype(expected_dtype(dtype, "dense"))
tm.assert_series_equal(result, expected)
def test_rank_descending(self, ser, results, dtype, using_infer_string):
method, _ = results
if dtype == "int64" or (not using_infer_string and dtype == "str"):
s = ser.dropna()
else:
s = ser.astype(dtype)
res = s.rank(ascending=False)
if dtype.startswith("str"):
expected = (s.astype("float64").max() - s.astype("float64")).rank()
else:
expected = (s.max() - s).rank()
tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
if dtype.startswith("str"):
expected = (s.astype("float64").max() - s.astype("float64")).rank(
method=method
)
else:
expected = (s.max() - s).rank(method=method)
res2 = s.rank(method=method, ascending=False)
tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
def test_rank_int(self, ser, results):
method, exp = results
s = ser.dropna().astype("i8")
result = s.rank(method=method)
expected = Series(exp).dropna()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_rank_object_bug(self):
# GH 13445
# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)
def test_rank_modify_inplace(self):
# GH 18521
# Check rank does not mutate series
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
expected = s.copy()
s.rank()
result = s
tm.assert_series_equal(result, expected)
def test_rank_ea_small_values(self):
# GH#52471
ser = Series(
[5.4954145e29, -9.791984e-21, 9.3715776e-26, NA, 1.8790257e-28],
dtype="Float64",
)
result = ser.rank(method="min")
expected = Series([4, 1, 3, np.nan, 2])
tm.assert_series_equal(result, expected)
# GH15630, pct should be on 100% basis when method='dense'
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_dense_pct(dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="dense", pct=True)
expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 1.0 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_min_pct(dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="min", pct=True)
expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_max_pct(dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="max", pct=True)
expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.5 / 2, 1.5 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_average_pct(dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="average", pct=True)
expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 2.0 / 2.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_first_pct(dtype, ser, exp):
if ser[0] < 0 and dtype.startswith("str"):
exp = exp[::-1]
s = Series(ser).astype(dtype)
result = s.rank(method="first", pct=True)
expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True))
tm.assert_series_equal(result, expected)
@pytest.mark.single_cpu
def test_pct_max_many_rows():
# GH 18271
s = Series(np.arange(2**24 + 1))
result = s.rank(pct=True).max()
assert result == 1

View File

@ -0,0 +1,443 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
NA,
Categorical,
Float64Dtype,
Index,
MultiIndex,
NaT,
Period,
PeriodIndex,
RangeIndex,
Series,
Timedelta,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
def test_reindex(datetime_series, string_series):
identity = string_series.reindex(string_series.index)
assert tm.shares_memory(string_series.index, identity.index)
assert identity.index.is_(string_series.index)
assert identity.index.identical(string_series.index)
subIndex = string_series.index[10:20]
subSeries = string_series.reindex(subIndex)
for idx, val in subSeries.items():
assert val == string_series[idx]
subIndex2 = datetime_series.index[10:20]
subTS = datetime_series.reindex(subIndex2)
for idx, val in subTS.items():
assert val == datetime_series[idx]
stuffSeries = datetime_series.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = datetime_series.index[::2]
subNonContig = datetime_series.reindex(nonContigIndex)
for idx, val in subNonContig.items():
assert val == datetime_series[idx]
# return a copy the same index here
result = datetime_series.reindex()
assert result is not datetime_series
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype("object")
# reindex coerces index.dtype to float, loc/iloc doesn't
tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
series = Series(rng)
result = series.reindex(range(15))
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.default_rng(2).standard_normal(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(datetime_series):
# (don't forget to fix this) I think it's fixed
empty = Series(index=[])
empty.reindex(datetime_series.index, method="pad") # it works
# corner case: pad empty series
reindexed = empty.reindex(datetime_series.index, method="pad")
# pass non-Index
reindexed = datetime_series.reindex(list(datetime_series.index))
datetime_series.index = datetime_series.index._with_freq(None)
tm.assert_series_equal(datetime_series, reindexed)
# bad fill method
ts = datetime_series[::2]
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
r"\(bfill\) or nearest\. Got foo"
)
with pytest.raises(ValueError, match=msg):
ts.reindex(datetime_series.index, method="foo")
def test_reindex_pad():
s = Series(np.arange(10), dtype="int64")
s2 = s[::2]
reindexed = s2.reindex(s.index, method="pad")
reindexed2 = s2.reindex(s.index, method="ffill")
tm.assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
tm.assert_series_equal(reindexed, expected)
def test_reindex_pad2():
# GH4604
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
new_index = ["a", "g", "c", "f"]
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
tm.assert_series_equal(result, expected.astype("float64"))
msg = "The 'downcast' keyword in ffill is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(new_index).ffill(downcast="infer")
tm.assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method="ffill")
tm.assert_series_equal(result, expected)
def test_reindex_inference():
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
tm.assert_series_equal(result, expected)
def test_reindex_downcasting():
# GH4618 shifted series downcasting
s = Series(False, index=range(5))
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.shift(1).bfill()
expected = Series(False, index=range(5))
tm.assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
result = s.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
tm.assert_series_equal(expected, result)
def test_reindex_int(datetime_series):
ts = datetime_series[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float64
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.dtype(int)
def test_reindex_bool(datetime_series):
# A series other than float, int, string, or object
ts = datetime_series[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(datetime_series):
# fail
ts = datetime_series[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range("20000101", periods=3)
# reindexing to an invalid Categorical
s = Series(["a", "b", "c"], dtype="category")
result = s.reindex(index)
expected = Series(
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
)
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_astype_order_consistency():
# GH#17444
ser = Series([1, 2, 3], index=[2, 0, 1])
new_index = [0, 1, 2]
temp_dtype = "category"
new_dtype = str
result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
tm.assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1.0, 2.0, 3.0])
result = floats.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value="foo")
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
@td.skip_array_manager_not_yet_implemented
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/42921
if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
# use the scalar that is not compatible with the dtype for this test
fill_value = Timestamp(0)
ser = Series([NaT], dtype=dtype)
result = ser.reindex([0, 1], fill_value=fill_value)
expected = Series([NaT, fill_value], index=[0, 1], dtype=object)
tm.assert_series_equal(result, expected)
def test_reindex_datetimeindexes_tz_naive_and_aware():
# GH 8306
idx = date_range("20131101", tz="America/Chicago", periods=7)
newidx = date_range("20131103", periods=10, freq="h")
s = Series(range(7), index=idx)
msg = (
r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
r"and datetime64\[ns\]"
)
with pytest.raises(TypeError, match=msg):
s.reindex(newidx, method="ffill")
def test_reindex_empty_series_tz_dtype():
# GH 20869
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"p_values, o_values, values, expected_values",
[
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
[1.0, 1.0],
[1.0, 1.0, np.nan],
),
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[1.0, 1.0],
[1.0, 1.0],
),
],
)
def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
# GH#28337
period_index = PeriodIndex(p_values)
object_index = Index(o_values)
ser = Series(values, index=period_index)
result = ser.reindex(object_index)
expected = Series(expected_values, index=object_index)
tm.assert_series_equal(result, expected)
def test_reindex_too_many_args():
# GH 40980
ser = Series([1, 2])
msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
with pytest.raises(TypeError, match=msg):
ser.reindex([2, 3], False)
def test_reindex_double_index():
# GH 40980
ser = Series([1, 2])
msg = r"reindex\(\) got multiple values for argument 'index'"
with pytest.raises(TypeError, match=msg):
ser.reindex([2, 3], index=[3, 4])
def test_reindex_no_posargs():
# GH 40980
ser = Series([1, 2])
result = ser.reindex(index=[1, 0])
expected = Series([2, 1], index=[1, 0])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
ser = Series(
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
)
result = ser.reindex(np.array(["b"]), level=0)
expected = Series(
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
)
tm.assert_series_equal(result, expected)
def test_reindex_missing_category():
# GH#18185
ser = Series([1, 2, 3, 1], dtype="category")
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
with pytest.raises(TypeError, match=msg):
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
def test_reindexing_with_float64_NA_log():
# GH 47055
s = Series([1.0, NA], dtype=Float64Dtype())
s_reindex = s.reindex(range(3))
result = s_reindex.values._data
expected = np.array([1, np.nan, np.nan])
tm.assert_numpy_array_equal(result, expected)
with tm.assert_produces_warning(None):
result_log = np.log(s_reindex)
expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype())
tm.assert_series_equal(result_log, expected_log)
@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
def test_reindex_expand_nonnano_nat(dtype):
# GH 53497
ser = Series(np.array([1], dtype=f"{dtype}[s]"))
result = ser.reindex(RangeIndex(2))
expected = Series(
np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,41 @@
from datetime import datetime
import numpy as np
from pandas import Series
import pandas._testing as tm
def test_reindex_like(datetime_series):
other = datetime_series[::2]
tm.assert_series_equal(
datetime_series.reindex(other.index), datetime_series.reindex_like(other)
)
# GH#7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method="pad")
expected = Series([5, np.nan], index=[day1, day3])
tm.assert_series_equal(result, expected)
def test_reindex_like_nearest():
ser = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
other = ser.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
result = ser.reindex_like(other, method="nearest")
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=1)
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4])
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,184 @@
from datetime import datetime
import re
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
Series,
array,
)
import pandas._testing as tm
class TestRename:
def test_rename(self, datetime_series):
ts = datetime_series
renamer = lambda x: x.strftime("%Y%m%d")
renamed = ts.rename(renamer)
assert renamed.index[0] == renamer(ts.index[0])
# dict
rename_dict = dict(zip(ts.index, renamed.index))
renamed2 = ts.rename(rename_dict)
tm.assert_series_equal(renamed, renamed2)
def test_rename_partial_dict(self):
# partial dict
ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
renamed = ser.rename({"b": "foo", "d": "bar"})
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
def test_rename_retain_index_name(self):
# index with name
renamer = Series(
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
)
renamed = renamer.rename({})
assert renamed.index.name == renamer.index.name
def test_rename_by_series(self):
ser = Series(range(5), name="foo")
renamer = Series({1: 10, 2: 20})
result = ser.rename(renamer)
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
tm.assert_series_equal(result, expected)
def test_rename_set_name(self, using_infer_string):
ser = Series(range(4), index=list("abcd"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
result = ser.rename(name)
assert result.name == name
if using_infer_string:
tm.assert_extension_array_equal(result.index.values, ser.index.values)
else:
tm.assert_numpy_array_equal(result.index.values, ser.index.values)
assert ser.name is None
def test_rename_set_name_inplace(self, using_infer_string):
ser = Series(range(3), index=list("abc"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
ser.rename(name, inplace=True)
assert ser.name == name
exp = np.array(["a", "b", "c"], dtype=np.object_)
if using_infer_string:
exp = array(exp, dtype="str")
tm.assert_extension_array_equal(ser.index.values, exp)
else:
tm.assert_numpy_array_equal(ser.index.values, exp)
def test_rename_axis_supported(self):
# Supporting axis for compatibility, detailed in GH-18589
ser = Series(range(5))
ser.rename({}, axis=0)
ser.rename({}, axis="index")
with pytest.raises(ValueError, match="No axis named 5"):
ser.rename({}, axis=5)
def test_rename_inplace(self, datetime_series):
renamer = lambda x: x.strftime("%Y%m%d")
expected = renamer(datetime_series.index[0])
datetime_series.rename(renamer, inplace=True)
assert datetime_series.index[0] == expected
def test_rename_with_custom_indexer(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3]).rename(ix)
assert ser.name is ix
def test_rename_with_custom_indexer_inplace(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3])
ser.rename(ix, inplace=True)
assert ser.name is ix
def test_rename_callable(self):
# GH 17407
ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
result = ser.rename(str)
expected = ser.rename(lambda i: str(i))
tm.assert_series_equal(result, expected)
assert result.name == expected.name
def test_rename_none(self):
# GH 40977
ser = Series([1, 2], name="foo")
result = ser.rename(None)
expected = Series([1, 2])
tm.assert_series_equal(result, expected)
def test_rename_series_with_multiindex(self):
# issue #43659
arrays = [
["bar", "baz", "baz", "foo", "qux"],
["one", "one", "two", "two", "one"],
]
index = MultiIndex.from_arrays(arrays, names=["first", "second"])
ser = Series(np.ones(5), index=index)
result = ser.rename(index={"one": "yes"}, level="second", errors="raise")
arrays_expected = [
["bar", "baz", "baz", "foo", "qux"],
["yes", "yes", "two", "two", "yes"],
]
index_expected = MultiIndex.from_arrays(
arrays_expected, names=["first", "second"]
)
series_expected = Series(np.ones(5), index=index_expected)
tm.assert_series_equal(result, series_expected)
def test_rename_series_with_multiindex_keeps_ea_dtypes(self):
# GH21055
arrays = [
Index([1, 2, 3], dtype="Int64").astype("category"),
Index([1, 2, 3], dtype="Int64"),
]
mi = MultiIndex.from_arrays(arrays, names=["A", "B"])
ser = Series(1, index=mi)
result = ser.rename({1: 4}, level=1)
arrays_expected = [
Index([1, 2, 3], dtype="Int64").astype("category"),
Index([4, 2, 3], dtype="Int64"),
]
mi_expected = MultiIndex.from_arrays(arrays_expected, names=["A", "B"])
expected = Series(1, index=mi_expected)
tm.assert_series_equal(result, expected)
def test_rename_error_arg(self):
# GH 46889
ser = Series(["foo", "bar"])
match = re.escape("[2] not found in axis")
with pytest.raises(KeyError, match=match):
ser.rename({2: 9}, errors="raise")
def test_rename_copy_false(self, using_copy_on_write, warn_copy_on_write):
# GH 46889
ser = Series(["foo", "bar"])
ser_orig = ser.copy()
shallow_copy = ser.rename({1: 9}, copy=False)
with tm.assert_cow_warning(warn_copy_on_write):
ser[0] = "foobar"
if using_copy_on_write:
assert ser_orig[0] == shallow_copy[0]
assert ser_orig[1] == shallow_copy[9]
else:
assert ser[0] == shallow_copy[0]
assert ser[1] == shallow_copy[9]

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestSeriesRenameAxis:
def test_rename_axis_mapper(self):
# GH 19978
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
ser = Series(list(range(len(mi))), index=mi)
result = ser.rename_axis(index={"ll": "foo"})
assert result.index.names == ["foo", "nn"]
result = ser.rename_axis(index=str.upper, axis=0)
assert result.index.names == ["LL", "NN"]
result = ser.rename_axis(index=["foo", "goo"])
assert result.index.names == ["foo", "goo"]
with pytest.raises(TypeError, match="unexpected"):
ser.rename_axis(columns="wrong")
def test_rename_axis_inplace(self, datetime_series):
# GH 15704
expected = datetime_series.rename_axis("foo")
result = datetime_series
no_return = result.rename_axis("foo", inplace=True)
assert no_return is None
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
def test_rename_axis_none(self, kwargs):
# GH 25034
index = Index(list("abc"), name="foo")
ser = Series([1, 2, 3], index=index)
result = ser.rename_axis(**kwargs)
expected_index = index.rename(None) if kwargs else index
expected = Series([1, 2, 3], index=expected_index)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,40 @@
import numpy as np
import pytest
from pandas import (
MultiIndex,
Series,
)
import pandas._testing as tm
class TestRepeat:
def test_repeat(self):
ser = Series(np.random.default_rng(2).standard_normal(3), index=["a", "b", "c"])
reps = ser.repeat(5)
exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5))
tm.assert_series_equal(reps, exp)
to_rep = [2, 3, 4]
reps = ser.repeat(to_rep)
exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep))
tm.assert_series_equal(reps, exp)
def test_numpy_repeat(self):
ser = Series(np.arange(3), name="x")
expected = Series(
ser.values.repeat(2), name="x", index=ser.index.values.repeat(2)
)
tm.assert_series_equal(np.repeat(ser, 2), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(ser, 2, axis=0)
def test_repeat_with_multiindex(self):
# GH#9361, fixed by GH#7891
m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)])
data = ["a", "b", "c", "d"]
m_df = Series(data, index=m_idx)
assert m_df.repeat(3).shape == (3 * len(data),)

View File

@ -0,0 +1,819 @@
import re
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
class TestSeriesReplace:
def test_replace_explicit_none(self):
# GH#36984 if the user explicitly passes value=None, give it to them
ser = pd.Series([0, 0, ""], dtype=object)
result = ser.replace("", None)
expected = pd.Series([0, 0, None], dtype=object)
tm.assert_series_equal(result, expected)
# Cast column 2 to object to avoid implicit cast when setting entry to ""
df = pd.DataFrame(np.zeros((3, 3))).astype({2: object})
df.iloc[2, 2] = ""
result = df.replace("", None)
expected = pd.DataFrame(
{
0: np.zeros(3),
1: np.zeros(3),
2: np.array([0.0, 0.0, None], dtype=object),
}
)
assert expected.iloc[2, 2] is None
tm.assert_frame_equal(result, expected)
# GH#19998 same thing with object dtype
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
result = ser.replace("a", None)
expected = pd.Series([10, 20, 30, None, None, "b", None])
assert expected.iloc[-1] is None
tm.assert_series_equal(result, expected)
def test_replace_noop_doesnt_downcast(self):
# GH#44498
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
res = ser.replace({np.nan: None}) # should be a no-op
tm.assert_series_equal(res, ser)
assert res.dtype == object
# same thing but different calling convention
res = ser.replace(np.nan, None)
tm.assert_series_equal(res, ser)
assert res.dtype == object
def test_replace(self):
N = 50
ser = pd.Series(np.random.default_rng(2).standard_normal(N))
ser[0:4] = np.nan
ser[6:10] = 0
# replace list with a single value
return_value = ser.replace([np.nan], -1, inplace=True)
assert return_value is None
exp = ser.fillna(-1)
tm.assert_series_equal(ser, exp)
rs = ser.replace(0.0, np.nan)
ser[ser == 0.0] = np.nan
tm.assert_series_equal(rs, ser)
ser = pd.Series(
np.fabs(np.random.default_rng(2).standard_normal(N)),
pd.date_range("2020-01-01", periods=N),
dtype=object,
)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
with tm.assert_produces_warning(FutureWarning, match=msg):
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
def test_replace_nan_with_inf(self):
ser = pd.Series([np.nan, 0, np.inf])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
filled = ser.copy()
filled[4] = 0
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
def test_replace_listlike_value_listlike_target(self, datetime_series):
ser = pd.Series(datetime_series.index)
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
# malformed
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
with pytest.raises(ValueError, match=msg):
ser.replace([1, 2, 3], [np.nan, 0])
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
result = ser.replace([1, 2], [np.nan, 0])
tm.assert_series_equal(result, ser)
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
def test_replace_gh5319(self):
# API change from 0.12?
# GH 5319
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
msg = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace([np.nan])
tm.assert_series_equal(result, expected)
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(np.nan)
tm.assert_series_equal(result, expected)
def test_replace_datetime64(self):
# GH 5797
ser = pd.Series(pd.date_range("20130101", periods=5))
expected = ser.copy()
expected.loc[2] = pd.Timestamp("20120101")
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
tm.assert_series_equal(result, expected)
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
tm.assert_series_equal(result, expected)
def test_replace_nat_with_tz(self):
# GH 11792: Test with replacing NaT in a list with tz data
ts = pd.Timestamp("2015/01/01", tz="UTC")
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
tm.assert_series_equal(expected, result)
def test_replace_timedelta_td64(self):
tdi = pd.timedelta_range(0, periods=5)
ser = pd.Series(tdi)
# Using a single dict argument means we go through replace_list
result = ser.replace({ser[1]: ser[3]})
expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
tm.assert_series_equal(result, expected)
def test_replace_with_single_list(self):
ser = pd.Series([0, 1, 2, 3, 4])
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = ser.replace([1, 2, 3])
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
s = ser.copy()
with tm.assert_produces_warning(FutureWarning, match=msg2):
return_value = s.replace([1, 2, 3], inplace=True)
assert return_value is None
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
# make sure things don't get corrupted when fillna call fails
s = ser.copy()
msg = (
r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
r"\(bfill\)\. Got crash_cymbal"
)
msg3 = "The 'method' keyword in Series.replace is deprecated"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg3):
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
assert return_value is None
tm.assert_series_equal(s, ser)
def test_replace_mixed_types(self):
ser = pd.Series(np.arange(5), dtype="int64")
def check_replace(to_rep, val, expected):
sc = ser.copy()
result = ser.replace(to_rep, val)
return_value = sc.replace(to_rep, val, inplace=True)
assert return_value is None
tm.assert_series_equal(expected, result)
tm.assert_series_equal(expected, sc)
# 3.0 can still be held in our int64 series, so we do not upcast GH#44940
tr, v = [3], [3.0]
check_replace(tr, v, ser)
# Note this matches what we get with the scalars 3 and 3.0
check_replace(tr[0], v[0], ser)
# MUST upcast to float
e = pd.Series([0, 1, 2, 3.5, 4])
tr, v = [3], [3.5]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, "a"])
tr, v = [3, 4], [3.5, "a"]
check_replace(tr, v, e)
# again casts to object
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)
# test an object with dates + floats + integers + strings
dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_bool_with_string_no_op(self):
s = pd.Series([True, False, True])
result = s.replace("fun", "in-the-sun")
tm.assert_series_equal(s, result)
def test_replace_bool_with_string(self):
# nonexistent elements
s = pd.Series([True, False, True])
result = s.replace(True, "2u")
expected = pd.Series(["2u", False, "2u"])
tm.assert_series_equal(expected, result)
def test_replace_bool_with_bool(self):
s = pd.Series([True, False, True])
result = s.replace(True, False)
expected = pd.Series([False] * len(s))
tm.assert_series_equal(expected, result)
def test_replace_with_dict_with_bool_keys(self):
s = pd.Series([True, False, True])
result = s.replace({"asdf": "asdb", True: "yes"})
expected = pd.Series(["yes", False, "yes"])
tm.assert_series_equal(result, expected)
def test_replace_Int_with_na(self, any_int_ea_dtype):
# GH 38267
result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
tm.assert_series_equal(result, expected)
result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
result.replace(1, pd.NA, inplace=True)
tm.assert_series_equal(result, expected)
def test_replace2(self):
N = 50
ser = pd.Series(
np.fabs(np.random.default_rng(2).standard_normal(N)),
pd.date_range("2020-01-01", periods=N),
dtype=object,
)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
with tm.assert_produces_warning(FutureWarning, match=msg):
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
@pytest.mark.parametrize("inplace", [True, False])
def test_replace_cascade(self, inplace):
# Test that replaced values are not replaced again
# GH #50778
ser = pd.Series([1, 2, 3])
expected = pd.Series([2, 3, 4])
res = ser.replace([1, 2, 3], [2, 3, 4], inplace=inplace)
if inplace:
tm.assert_series_equal(ser, expected)
else:
tm.assert_series_equal(res, expected)
def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
# GH 32621, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
result = ser.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)
def test_replace_with_empty_dictlike(self):
# GH 15289
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace({}))
empty_series = pd.Series([])
tm.assert_series_equal(s, s.replace(empty_series))
def test_replace_string_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_replacer_equals_replacement(self):
# GH 20656
# make sure all replacers are matching against original values
s = pd.Series(["a", "b"])
expected = pd.Series(["b", "a"])
result = s.replace({"a": "b", "b": "a"})
tm.assert_series_equal(expected, result)
def test_replace_unicode_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_mixed_types_with_string(self):
# Testing mixed
s = pd.Series([1, 2, 3, "4", 4, 5])
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.replace([2, "4"], np.nan)
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
(pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
],
)
def test_replace_categorical(self, categorical, numeric, using_infer_string):
# GH 24971, GH#23305
ser = pd.Series(categorical)
msg = "Downcasting behavior in `replace`"
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
if 2 not in expected.cat.categories:
# i.e. categories should be [1, 2] even if there are no "B"s present
# GH#44940
expected = expected.cat.add_categories(2)
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])]
)
def test_replace_categorical_inplace(self, data, data_exp):
# GH 53358
result = pd.Series(data, dtype="category")
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result.replace(to_replace="a", value="b", inplace=True)
expected = pd.Series(data_exp, dtype="category")
tm.assert_series_equal(result, expected)
def test_replace_categorical_single(self):
# GH 26988
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
s = pd.Series(dti)
c = s.astype("category")
expected = c.copy()
expected = expected.cat.add_categories("foo")
expected[2] = "foo"
expected = expected.cat.remove_unused_categories()
assert c[2] != "foo"
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = c.replace(c[2], "foo")
tm.assert_series_equal(expected, result)
assert c[2] != "foo" # ensure non-inplace call does not alter original
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[2], "foo", inplace=True)
assert return_value is None
tm.assert_series_equal(expected, c)
first_value = c[0]
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[1], c[0], inplace=True)
assert return_value is None
assert c[0] == c[1] == first_value # test replacing with existing value
def test_replace_with_no_overflowerror(self):
# GH 25616
# casts to object without Exception from OverflowError
s = pd.Series([0, 1, 2, 3, 4])
result = s.replace([3], ["100000000000000000000"])
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
tm.assert_series_equal(result, expected)
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
result = s.replace(["100000000000000000000"], [1])
expected = pd.Series([0, 1, "100000000000000000001"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, to_replace, exp",
[
([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
(["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
],
)
def test_replace_commutative(self, ser, to_replace, exp):
# GH 16051
# DataFrame.replace() overwrites when values are non-numeric
series = pd.Series(ser)
expected = pd.Series(exp)
result = series.replace(to_replace)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
)
def test_replace_no_cast(self, ser, exp):
# GH 9113
# BUG: replace int64 dtype with bool coerces to int64
series = pd.Series(ser)
result = series.replace(2, True)
expected = pd.Series(exp)
tm.assert_series_equal(result, expected)
def test_replace_invalid_to_replace(self):
# GH 18634
# API: replace() should raise an exception if invalid argument is given
series = pd.Series(["a", "b", "c "])
msg = (
r"Expecting 'to_replace' to be either a scalar, array-like, "
r"dict or None, got invalid type.*"
)
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
series.replace(lambda x: x.strip())
@pytest.mark.parametrize("frame", [False, True])
def test_replace_nonbool_regex(self, frame):
obj = pd.Series(["a", "b", "c "])
if frame:
obj = obj.to_frame()
msg = "'to_replace' must be 'None' if 'regex' is not a bool"
with pytest.raises(ValueError, match=msg):
obj.replace(to_replace=["a"], regex="foo")
@pytest.mark.parametrize("frame", [False, True])
def test_replace_empty_copy(self, frame):
obj = pd.Series([], dtype=np.float64)
if frame:
obj = obj.to_frame()
res = obj.replace(4, 5, inplace=True)
assert res is None
res = obj.replace(4, 5, inplace=False)
tm.assert_equal(res, obj)
assert res is not obj
def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
# GH#33340
ser = pd.Series([1, 2, "A", fixed_now_ts, True])
to_replace = {0: 1, 2: "A"}
value = "foo"
msg = "Series.replace cannot use dict-like to_replace and non-None value"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
to_replace = 1
value = {0: "foo", 2: "bar"}
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
def test_replace_extension_other(self, frame_or_series):
# https://github.com/pandas-dev/pandas/issues/34530
obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
result = obj.replace("", "") # no exception
# should not have changed dtype
tm.assert_equal(obj, result)
def _check_replace_with_method(self, ser: pd.Series):
df = ser.to_frame()
msg1 = "The 'method' keyword in Series.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg1):
res = ser.replace(ser[1], method="pad")
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
tm.assert_series_equal(res, expected)
msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df = df.replace(ser[1], method="pad")
tm.assert_frame_equal(res_df, expected.to_frame())
ser2 = ser.copy()
with tm.assert_produces_warning(FutureWarning, match=msg1):
res2 = ser2.replace(ser[1], method="pad", inplace=True)
assert res2 is None
tm.assert_series_equal(ser2, expected)
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df2 = df.replace(ser[1], method="pad", inplace=True)
assert res_df2 is None
tm.assert_frame_equal(df, expected.to_frame())
def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
ser = pd.Series(arr)
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_interval_with_method(self, as_categorical):
# in particular interval that can't hold NA
idx = pd.IntervalIndex.from_breaks(range(4))
ser = pd.Series(idx)
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_period", [True, False])
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_datetimelike_with_method(self, as_period, as_categorical):
idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
if as_period:
idx = idx.tz_localize(None).to_period("D")
ser = pd.Series(idx)
ser.iloc[-2] = pd.NaT
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
def test_replace_with_compiled_regex(self):
# https://github.com/pandas-dev/pandas/issues/35680
s = pd.Series(["a", "b", "c"])
regex = re.compile("^a$")
result = s.replace({regex: "z"}, regex=True)
expected = pd.Series(["z", "b", "c"])
tm.assert_series_equal(result, expected)
def test_pandas_replace_na(self):
# GH#43344
# GH#56599
ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA, "AA"], dtype="string")
regex_mapping = {
"AA": "CC",
"BB": "CC",
"EE": "CC",
"CC": "CC-REPL",
}
result = ser.replace(regex_mapping, regex=True)
exp = pd.Series(
["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA, "CC"], dtype="string"
)
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"dtype, input_data, to_replace, expected_data",
[
("bool", [True, False], {True: False}, [False, False]),
("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
(
pd.IntervalDtype("int64"),
IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
{pd.Interval(1, 2): pd.Interval(10, 20)},
IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
),
(
pd.IntervalDtype("float64"),
IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
{pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
),
(
pd.PeriodDtype("M"),
[pd.Period("2020-05", freq="M")],
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
[pd.Period("2020-06", freq="M")],
),
],
)
def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
# GH#33484
ser = pd.Series(input_data, dtype=dtype)
result = ser.replace(to_replace)
expected = pd.Series(expected_data, dtype=dtype)
tm.assert_series_equal(result, expected)
def test_replace_string_dtype(self):
# GH#40732, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype="string")
res = ser.replace({"one": "1", "two": "2"})
expected = pd.Series(["1", "2", np.nan], dtype="string")
tm.assert_series_equal(res, expected)
# GH#31644
ser2 = pd.Series(["A", np.nan], dtype="string")
res2 = ser2.replace("A", "B")
expected2 = pd.Series(["B", np.nan], dtype="string")
tm.assert_series_equal(res2, expected2)
ser3 = pd.Series(["A", "B"], dtype="string")
res3 = ser3.replace("A", pd.NA)
expected3 = pd.Series([pd.NA, "B"], dtype="string")
tm.assert_series_equal(res3, expected3)
def test_replace_string_dtype_list_to_replace(self):
# GH#41215, GH#44940
ser = pd.Series(["abc", "def"], dtype="string")
res = ser.replace(["abc", "any other string"], "xyz")
expected = pd.Series(["xyz", "def"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_string_dtype_regex(self):
# GH#31644
ser = pd.Series(["A", "B"], dtype="string")
res = ser.replace(r".", "C", regex=True)
expected = pd.Series(["C", "C"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_nullable_numeric(self):
# GH#40732, GH#44940
floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
assert floats.replace({1.0: 9}).dtype == floats.dtype
assert floats.replace(1.0, 9).dtype == floats.dtype
assert floats.replace({1.0: 9.0}).dtype == floats.dtype
assert floats.replace(1.0, 9.0).dtype == floats.dtype
res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
assert res.dtype == floats.dtype
ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
assert ints.replace({1: 9}).dtype == ints.dtype
assert ints.replace(1, 9).dtype == ints.dtype
assert ints.replace({1: 9.0}).dtype == ints.dtype
assert ints.replace(1, 9.0).dtype == ints.dtype
# nullable (for now) raises instead of casting
with pytest.raises(TypeError, match="Invalid value"):
ints.replace({1: 9.5})
with pytest.raises(TypeError, match="Invalid value"):
ints.replace(1, 9.5)
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"], dtype=object)
expected = pd.Series([1])
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series_string(self, regex):
series = pd.Series(["0"], dtype="str")
expected = pd.Series([1], dtype="int64")
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)
def test_replace_different_int_types(self, any_int_numpy_dtype):
# GH#45311
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype)
map_dict = dict(zip(maps.values, maps.index))
result = labs.replace(map_dict)
expected = labs.replace({0: 0, 2: 1, 1: 2})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("val", [2, np.nan, 2.0])
def test_replace_value_none_dtype_numeric(self, val):
# GH#48231
ser = pd.Series([1, val])
result = ser.replace(val, None)
expected = pd.Series([1, None], dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_change_dtype_series(self):
# GH#25797
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
tm.assert_frame_equal(df, expected)
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([None], [np.nan])
tm.assert_frame_equal(df, expected)
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].fillna(np.nan)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize("dtype", ["object", "Int64"])
def test_replace_na_in_obj_column(self, dtype):
# GH#47480
ser = pd.Series([0, 1, pd.NA], dtype=dtype)
expected = pd.Series([0, 2, pd.NA], dtype=dtype)
result = ser.replace(to_replace=1, value=2)
tm.assert_series_equal(result, expected)
ser.replace(to_replace=1, value=2, inplace=True)
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize("val", [0, 0.5])
def test_replace_numeric_column_with_na(self, val):
# GH#50758
ser = pd.Series([val, 1])
expected = pd.Series([val, pd.NA])
result = ser.replace(to_replace=1, value=pd.NA)
tm.assert_series_equal(result, expected)
ser.replace(to_replace=1, value=pd.NA, inplace=True)
tm.assert_series_equal(ser, expected)
def test_replace_ea_float_with_bool(self):
# GH#55398
ser = pd.Series([0.0], dtype="Float64")
expected = ser.copy()
result = ser.replace(False, 1.0)
tm.assert_series_equal(result, expected)
ser = pd.Series([False], dtype="boolean")
expected = ser.copy()
result = ser.replace(0.0, True)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,225 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
date_range,
option_context,
)
import pandas._testing as tm
class TestResetIndex:
def test_reset_index_dti_round_trip(self):
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes.iloc[0] == np.dtype("M8[ns]")
d3 = d2.set_index("index")
tm.assert_frame_equal(d1, d3, check_names=False)
# GH#2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
df = df.set_index("Date")
assert df.index[0] == stamp
assert df.reset_index()["Date"].iloc[0] == stamp
def test_reset_index(self):
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
)[:5]
ser = df.stack(future_stack=True)
ser.index.names = ["hash", "category"]
ser.name = "value"
df = ser.reset_index()
assert "value" in df
df = ser.reset_index(name="value2")
assert "value2" in df
# check inplace
s = ser.reset_index(drop=True)
s2 = ser
return_value = s2.reset_index(drop=True, inplace=True)
assert return_value is None
tm.assert_series_equal(s, s2)
# level
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
rs = s.reset_index(level=1)
assert len(rs.columns) == 2
rs = s.reset_index(level=[0, 2], drop=True)
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
assert isinstance(rs, Series)
def test_reset_index_name(self):
s = Series([1, 2, 3], index=Index(range(3), name="x"))
assert s.reset_index().index.name is None
assert s.reset_index(drop=True).index.name is None
def test_reset_index_level(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
for levels in ["A", "B"], [0, 1]:
# With MultiIndex
s = df.set_index(["A", "B"])["C"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels)
tm.assert_frame_equal(result, df)
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
tm.assert_frame_equal(result, df[["C"]])
with pytest.raises(KeyError, match="Level E "):
s.reset_index(level=["A", "E"])
# With single-level Index
s = df.set_index("A")["B"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[0], drop=True)
tm.assert_series_equal(result, df["B"])
with pytest.raises(IndexError, match="Too many levels"):
s.reset_index(level=[0, 1, 2])
# Check that .reset_index([],drop=True) doesn't fail
result = Series(range(4)).reset_index([], drop=True)
expected = Series(range(4))
tm.assert_series_equal(result, expected)
def test_reset_index_range(self):
# GH 12071
s = Series(range(2), name="A", dtype="int64")
series_result = s.reset_index()
assert isinstance(series_result.index, RangeIndex)
series_expected = DataFrame(
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
)
tm.assert_frame_equal(series_result, series_expected)
def test_reset_index_drop_errors(self):
# GH 20925
# KeyError raised for series index when passed level name is missing
s = Series(range(4))
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong", drop=True)
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong")
# KeyError raised for series when level to be dropped is missing
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
with pytest.raises(KeyError, match="not found"):
s.reset_index("wrong", drop=True)
def test_reset_index_with_drop(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
data = np.random.default_rng(2).standard_normal(8)
ser = Series(data, index=index)
ser.iloc[3] = np.nan
deleveled = ser.reset_index()
assert isinstance(deleveled, DataFrame)
assert len(deleveled.columns) == len(ser.index.levels) + 1
assert deleveled.index.name == ser.index.name
deleveled = ser.reset_index(drop=True)
assert isinstance(deleveled, Series)
assert deleveled.index.name == ser.index.name
def test_reset_index_inplace_and_drop_ignore_name(self):
# GH#44575
ser = Series(range(2), name="old")
ser.reset_index(name="new", drop=True, inplace=True)
expected = Series(range(2), name="old")
tm.assert_series_equal(ser, expected)
def test_reset_index_drop_infer_string(self):
# GH#56160
pytest.importorskip("pyarrow")
ser = Series(["a", "b", "c"], dtype=object)
with option_context("future.infer_string", True):
result = ser.reset_index(drop=True)
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize(
"array, dtype",
[
(["a", "b"], object),
(
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
pd.PeriodDtype(freq="Q-DEC"),
),
],
)
def test_reset_index_dtypes_on_empty_series_with_multiindex(
array, dtype, using_infer_string
):
# GH 19602 - Preserve dtype on empty Series with MultiIndex
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
exp = "str" if using_infer_string else object
expected = Series(
{
"level_0": np.int64,
"level_1": np.float64,
"level_2": exp if dtype == object else dtype,
0: object,
}
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"names, expected_names",
[
(["A", "A"], ["A", "A"]),
(["level_1", None], ["level_1", "level_1"]),
],
)
@pytest.mark.parametrize("allow_duplicates", [False, True])
def test_column_name_duplicates(names, expected_names, allow_duplicates):
# GH#44755 reset_index with duplicate column labels
s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names))
if allow_duplicates:
result = s.reset_index(allow_duplicates=True)
expected = DataFrame([[1, 1, 1]], columns=expected_names + [0])
tm.assert_frame_equal(result, expected)
else:
with pytest.raises(ValueError, match="cannot insert"):
s.reset_index()

View File

@ -0,0 +1,81 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
class TestSeriesRound:
def test_round(self, datetime_series):
datetime_series.index.name = "index_name"
result = datetime_series.round(2)
expected = Series(
np.round(datetime_series.values, 2), index=datetime_series.index, name="ts"
)
tm.assert_series_equal(result, expected)
assert result.name == datetime_series.name
def test_round_numpy(self, any_float_dtype):
# See GH#12600
ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype)
out = np.round(ser, decimals=0)
expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(out, expected)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.round(ser, decimals=0, out=ser)
def test_round_numpy_with_nan(self, any_float_dtype):
# See GH#14197
ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype)
with tm.assert_produces_warning(None):
result = ser.round()
expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(result, expected)
def test_round_builtin(self, any_float_dtype):
ser = Series(
[1.123, 2.123, 3.123],
index=range(3),
dtype=any_float_dtype,
)
result = round(ser)
expected_rounded0 = Series(
[1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype
)
tm.assert_series_equal(result, expected_rounded0)
decimals = 2
expected_rounded = Series(
[1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype
)
result = round(ser, decimals)
tm.assert_series_equal(result, expected_rounded)
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(self, method, freq, unit):
# GH14940, GH#56158
ser = Series([pd.NaT], dtype=f"M8[{unit}]")
expected = Series(pd.NaT, dtype=f"M8[{unit}]")
round_method = getattr(ser.dt, method)
result = round_method(freq)
tm.assert_series_equal(result, expected)
def test_round_ea_boolean(self):
# GH#55936
ser = Series([True, False], dtype="boolean")
expected = ser.copy()
result = ser.round(2)
tm.assert_series_equal(result, expected)
result.iloc[0] = False
tm.assert_series_equal(ser, expected)
def test_round_dtype_object(self):
# GH#61206
ser = Series([0.2], dtype="object")
msg = "Expected numeric dtype, got object instead."
with pytest.raises(TypeError, match=msg):
ser.round()

View File

@ -0,0 +1,77 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import is_scalar
class TestSeriesSearchSorted:
def test_searchsorted(self):
ser = Series([1, 2, 3])
result = ser.searchsorted(1, side="left")
assert is_scalar(result)
assert result == 0
result = ser.searchsorted(1, side="right")
assert is_scalar(result)
assert result == 1
def test_searchsorted_numeric_dtypes_scalar(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted(30)
assert is_scalar(res)
assert res == 2
res = ser.searchsorted([30])
exp = np.array([2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_numeric_dtypes_vector(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted([91, 2e6])
exp = np.array([3, 4], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_datetime64_scalar(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
val = Timestamp("20120102")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_scalar_mixed_timezones(self):
# GH 30086
ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC"))
val = Timestamp("20120102", tz="America/New_York")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_list(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
vals = [Timestamp("20120102"), Timestamp("20120104")]
res = ser.searchsorted(vals)
exp = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_sorter(self):
# GH8490
ser = Series([3, 1, 2])
res = ser.searchsorted([0, 3], sorter=np.argsort(ser))
exp = np.array([0, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_dataframe_fail(self):
# GH#49620
ser = Series([1, 2, 3, 4, 5])
vals = pd.DataFrame([[1, 2], [3, 4]])
msg = "Value must be 1-D array-like or scalar, DataFrame is not supported"
with pytest.raises(ValueError, match=msg):
ser.searchsorted(vals)

View File

@ -0,0 +1,21 @@
from datetime import datetime
from pandas import Series
class TestSetName:
def test_set_name(self):
ser = Series([1, 2, 3])
ser2 = ser._set_name("foo")
assert ser2.name == "foo"
assert ser.name is None
assert ser is not ser2
def test_set_name_attribute(self):
ser = Series([1, 2, 3])
ser2 = Series([1, 2, 3], name="bar")
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
ser.name = name
assert ser.name == name
ser2.name = name
assert ser2.name == name

View File

@ -0,0 +1,22 @@
import pytest
from pandas import Series
@pytest.mark.parametrize(
"data, index, expected",
[
([1, 2, 3], None, 3),
({"a": 1, "b": 2, "c": 3}, None, 3),
([1, 2, 3], ["x", "y", "z"], 3),
([1, 2, 3, 4, 5], ["x", "y", "z", "w", "n"], 5),
([1, 2, 3], None, 3),
([1, 2, 3], ["x", "y", "z"], 3),
([1, 2, 3, 4], ["x", "y", "z", "w"], 4),
],
)
def test_series(data, index, expected):
# GH#52897
ser = Series(data, index=index)
assert ser.size == expected
assert isinstance(ser.size, int)

View File

@ -0,0 +1,337 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"])
def sort_kind(request):
return request.param
class TestSeriesSortIndex:
def test_sort_index_name(self, datetime_series):
result = datetime_series.sort_index(ascending=False)
assert result.name == datetime_series.name
def test_sort_index(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
rindex = list(datetime_series.index)
np.random.default_rng(2).shuffle(rindex)
random_order = datetime_series.reindex(rindex)
sorted_series = random_order.sort_index()
tm.assert_series_equal(sorted_series, datetime_series)
# descending
sorted_series = random_order.sort_index(ascending=False)
tm.assert_series_equal(
sorted_series, datetime_series.reindex(datetime_series.index[::-1])
)
# compat on level
sorted_series = random_order.sort_index(level=0)
tm.assert_series_equal(sorted_series, datetime_series)
# compat on axis
sorted_series = random_order.sort_index(axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
random_order.sort_values(axis=1)
sorted_series = random_order.sort_index(level=0, axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
with pytest.raises(ValueError, match=msg):
random_order.sort_index(level=0, axis=1)
def test_sort_index_inplace(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
# For GH#11402
rindex = list(datetime_series.index)
np.random.default_rng(2).shuffle(rindex)
# descending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=False, inplace=True)
assert result is None
expected = datetime_series.reindex(datetime_series.index[::-1])
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
# ascending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=True, inplace=True)
assert result is None
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
def test_sort_index_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
res = s.sort_index(level="A")
tm.assert_series_equal(backwards, res)
res = s.sort_index(level=["A", "B"])
tm.assert_series_equal(backwards, res)
res = s.sort_index(level="A", sort_remaining=False)
tm.assert_series_equal(s, res)
res = s.sort_index(level=["A", "B"], sort_remaining=False)
tm.assert_series_equal(s, res)
@pytest.mark.parametrize("level", ["A", 0]) # GH#21052
def test_sort_index_multiindex(self, level):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
# implicit sort_remaining=True
res = s.sort_index(level=level)
tm.assert_series_equal(backwards, res)
# GH#13496
# sort has no effect without remaining lvls
res = s.sort_index(level=level, sort_remaining=False)
tm.assert_series_equal(s, res)
def test_sort_index_kind(self, sort_kind):
# GH#14444 & GH#13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position(self):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first")
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last")
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_sort_index_intervals(self):
s = Series(
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
)
result = s.sort_index()
expected = s
tm.assert_series_equal(result, expected)
result = s.sort_index(ascending=False)
expected = Series(
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ascending, ignore_index, output_index",
[
([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]),
],
)
def test_sort_index_ignore_index(
self, inplace, original_list, sorted_list, ascending, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {
"ascending": ascending,
"ignore_index": ignore_index,
"inplace": inplace,
}
if inplace:
result_ser = ser.copy()
result_ser.sort_index(**kwargs)
else:
result_ser = ser.sort_index(**kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_sort_index_ascending_list(self):
# GH#16934
# Set up a Series with a three level MultiIndex
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
[4, 3, 2, 1, 4, 3, 2, 1],
]
tuples = zip(*arrays)
mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
ser = Series(range(8), index=mi)
# Sort with boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=False)
expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
tm.assert_series_equal(result, expected)
# Sort with list of boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=[False, True])
expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
None,
(True, None),
(False, "True"),
],
)
def test_sort_index_ascending_bad_value_raises(self, ascending):
ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9])
match = 'For argument "ascending" expected type bool'
with pytest.raises(ValueError, match=match):
ser.sort_index(ascending=ascending)
class TestSeriesSortIndexKey:
def test_sort_index_multiindex_key(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level="C", key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level="C", key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_multiindex_key_multi_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level=["A", "C"], key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level=["A", "C"], key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_key(self):
series = Series(np.arange(6, dtype="int64"), index=list("aaBBca"))
result = series.sort_index()
expected = series.iloc[[2, 3, 0, 1, 5, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower())
expected = series.iloc[[0, 1, 5, 2, 3, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower(), ascending=False)
expected = series.iloc[[4, 2, 3, 0, 1, 5]]
tm.assert_series_equal(result, expected)
def test_sort_index_key_int(self):
series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64"))
result = series.sort_index()
tm.assert_series_equal(result, series)
result = series.sort_index(key=lambda x: -x)
expected = series.sort_index(ascending=False)
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: 2 * x)
tm.assert_series_equal(result, series)
def test_sort_index_kind_key(self, sort_kind, sort_by_key):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_kind_neg_key(self, sort_kind):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position_key(self, sort_by_key):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first", key=sort_by_key)
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last", key=sort_by_key)
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_changes_length_raises(self):
s = Series([1, 2, 3])
with pytest.raises(ValueError, match="change the shape"):
s.sort_index(key=lambda x: x[:1])
def test_sort_values_key_type(self):
s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"]))
result = s.sort_index(key=lambda x: x.month)
expected = s.iloc[[0, 1, 2]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.day)
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.year)
expected = s.iloc[[2, 0, 1]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.month_name())
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
[True, False],
[False, True],
],
)
def test_sort_index_multi_already_monotonic(self, ascending):
# GH 56049
mi = MultiIndex.from_product([[1, 2], [3, 4]])
ser = Series(range(len(mi)), index=mi)
result = ser.sort_index(ascending=ascending)
if ascending == [True, False]:
expected = ser.take([1, 0, 3, 2])
elif ascending == [False, True]:
expected = ser.take([2, 3, 0, 1])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,246 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Series,
)
import pandas._testing as tm
class TestSeriesSortValues:
def test_sort_values(self, datetime_series, using_copy_on_write):
# check indexes are reordered corresponding with the values
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
result = ser.sort_values()
tm.assert_series_equal(expected, result)
ts = datetime_series.copy()
ts[:5] = np.nan
vals = ts.values
result = ts.sort_values()
assert np.isnan(result[-5:]).all()
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
# na_position
result = ts.sort_values(na_position="first")
assert np.isnan(result[:5]).all()
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
# something object-type
ser = Series(["A", "B"], [1, 2])
# no failure
ser.sort_values()
# ascending=False
ordered = ts.sort_values(ascending=False)
expected = np.sort(ts.dropna().values)[::-1]
tm.assert_almost_equal(expected, ordered.dropna().values)
ordered = ts.sort_values(ascending=False, na_position="first")
tm.assert_almost_equal(expected, ordered.dropna().values)
# ascending=[False] should behave the same as ascending=False
ordered = ts.sort_values(ascending=[False])
expected = ts.sort_values(ascending=False)
tm.assert_series_equal(expected, ordered)
ordered = ts.sort_values(ascending=[False], na_position="first")
expected = ts.sort_values(ascending=False, na_position="first")
tm.assert_series_equal(expected, ordered)
msg = 'For argument "ascending" expected type bool, received type NoneType.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=None)
msg = r"Length of ascending \(0\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[])
msg = r"Length of ascending \(3\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[1, 2, 3])
msg = r"Length of ascending \(2\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[False, False])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending="foobar")
# inplace=True
ts = datetime_series.copy()
return_value = ts.sort_values(ascending=False, inplace=True)
assert return_value is None
tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False))
tm.assert_index_equal(
ts.index, datetime_series.sort_values(ascending=False).index
)
# GH#5856/5853
# Series.sort_values operating on a view
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
s = df.iloc[:, 0]
msg = (
"This Series is a view of some other array, to sort in-place "
"you must create a copy"
)
if using_copy_on_write:
s.sort_values(inplace=True)
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
else:
with pytest.raises(ValueError, match=msg):
s.sort_values(inplace=True)
def test_sort_values_categorical(self):
c = Categorical(["a", "b", "b", "a"], ordered=False)
cat = Series(c.copy())
# sort in the categories order
expected = Series(
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
)
result = cat.sort_values()
tm.assert_series_equal(result, expected)
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
cat = Series(
Categorical(
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
)
)
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
res = cat.sort_values(ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
raw_cat1 = Categorical(
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
)
raw_cat2 = Categorical(
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
)
s = ["a", "b", "c", "d"]
df = DataFrame(
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
)
# Cats must be sorted in a dataframe
res = df.sort_values(by=["string"], ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
assert res["sort"].dtype == "category"
res = df.sort_values(by=["sort"], ascending=False)
exp = df.sort_values(by=["string"], ascending=True)
tm.assert_series_equal(res["values"], exp["values"])
assert res["sort"].dtype == "category"
assert res["unsort"].dtype == "category"
# unordered cat, but we allow this
df.sort_values(by=["unsort"], ascending=False)
# multi-columns sort
# GH#7848
df = DataFrame(
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df["grade"] = Categorical(df["raw_grade"], ordered=True)
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
# sorts 'grade' according to the order of the categories
result = df.sort_values(by=["grade"])
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
tm.assert_frame_equal(result, expected)
# multi
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ignore_index, output_index",
[
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
],
)
def test_sort_values_ignore_index(
self, inplace, original_list, sorted_list, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {"ignore_index": ignore_index, "inplace": inplace}
if inplace:
result_ser = ser.copy()
result_ser.sort_values(ascending=False, **kwargs)
else:
result_ser = ser.sort_values(ascending=False, **kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_mergesort_descending_stability(self):
# GH 28697
s = Series([1, 2, 1, 3], ["first", "b", "second", "c"])
result = s.sort_values(ascending=False, kind="mergesort")
expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"])
tm.assert_series_equal(result, expected)
def test_sort_values_validate_ascending_for_value_error(self):
# GH41634
ser = Series([23, 7, 21])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ser.sort_values(ascending="False")
@pytest.mark.parametrize("ascending", [False, 0, 1, True])
def test_sort_values_validate_ascending_functional(self, ascending):
# GH41634
ser = Series([23, 7, 21])
expected = np.sort(ser.values)
sorted_ser = ser.sort_values(ascending=ascending)
if not ascending:
expected = expected[::-1]
result = sorted_ser.values
tm.assert_numpy_array_equal(result, expected)
class TestSeriesSortingKey:
def test_sort_values_key(self):
series = Series(np.array(["Hello", "goodbye"]))
result = series.sort_values(axis=0)
expected = series
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x.str.lower())
expected = series[::-1]
tm.assert_series_equal(result, expected)
def test_sort_values_key_nan(self):
series = Series(np.array([0, 5, np.nan, 3, 2, np.nan]))
result = series.sort_values(axis=0)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x + 5)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: -x, ascending=False)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,179 @@
from datetime import datetime
from io import StringIO
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
from pandas.io.common import get_handle
class TestSeriesToCSV:
def read_csv(self, path, **kwargs):
params = {"index_col": 0, "header": None}
params.update(**kwargs)
header = params.get("header")
out = pd.read_csv(path, **params).squeeze("columns")
if header is None:
out.name = out.index.name = None
return out
def test_from_csv(self, datetime_series, string_series):
# freq doesn't round-trip
datetime_series.index = datetime_series.index._with_freq(None)
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
ts = self.read_csv(path, parse_dates=True)
tm.assert_series_equal(datetime_series, ts, check_names=False)
assert ts.name is None
assert ts.index.name is None
# see gh-10483
datetime_series.to_csv(path, header=True)
ts_h = self.read_csv(path, header=0)
assert ts_h.name == "ts"
string_series.to_csv(path, header=False)
series = self.read_csv(path)
tm.assert_series_equal(string_series, series, check_names=False)
assert series.name is None
assert series.index.name is None
string_series.to_csv(path, header=True)
series_h = self.read_csv(path, header=0)
assert series_h.name == "series"
with open(path, "w", encoding="utf-8") as outfile:
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
series = self.read_csv(path, sep="|", parse_dates=True)
check_series = Series(
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
)
tm.assert_series_equal(check_series, series)
series = self.read_csv(path, sep="|", parse_dates=False)
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
tm.assert_series_equal(check_series, series)
def test_to_csv(self, datetime_series):
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
with open(path, newline=None, encoding="utf-8") as f:
lines = f.readlines()
assert lines[1] != "\n"
datetime_series.to_csv(path, index=False, header=False)
arr = np.loadtxt(path)
tm.assert_almost_equal(arr, datetime_series.values)
def test_to_csv_unicode_index(self):
buf = StringIO()
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
s.to_csv(buf, encoding="UTF-8", header=False)
buf.seek(0)
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
tm.assert_series_equal(s, s2)
def test_to_csv_float_format(self):
with tm.ensure_clean() as filename:
ser = Series([0.123456, 0.234567, 0.567567])
ser.to_csv(filename, float_format="%.2f", header=False)
rs = self.read_csv(filename)
xp = Series([0.12, 0.23, 0.57])
tm.assert_series_equal(rs, xp)
def test_to_csv_list_entries(self):
s = Series(["jack and jill", "jesse and frank"])
split = s.str.split(r"\s+and\s+")
buf = StringIO()
split.to_csv(buf, header=False)
def test_to_csv_path_is_none(self):
# GH 8215
# Series.to_csv() was returning None, inconsistent with
# DataFrame.to_csv() which returned string
s = Series([1, 2, 3])
csv_str = s.to_csv(path_or_buf=None, header=False)
assert isinstance(csv_str, str)
@pytest.mark.parametrize(
"s,encoding",
[
(
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
None,
),
# GH 21241, 21118
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
(
Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), # noqa: RUF001
"cp737",
),
],
)
def test_to_csv_compression(self, s, encoding, compression):
with tm.ensure_clean() as filename:
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
# test the round trip - to_csv -> read_csv
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# test the round trip using file handle - to_csv -> read_csv
with get_handle(
filename, "w", compression=compression, encoding=encoding
) as handles:
s.to_csv(handles.handle, encoding=encoding, header=True)
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# explicitly ensure file was compressed
with tm.decompress_file(filename, compression) as fh:
text = fh.read().decode(encoding or "utf8")
assert s.name in text
with tm.decompress_file(filename, compression) as fh:
tm.assert_series_equal(
s,
pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
)
def test_to_csv_interval_index(self, using_infer_string):
# GH 28210
s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
s.to_csv(path, header=False)
result = self.read_csv(path, index_col=0)
# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
expected = s
expected.index = expected.index.astype("str")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,38 @@
import collections
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
class TestSeriesToDict:
@pytest.mark.parametrize(
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
)
def test_to_dict(self, mapping, datetime_series):
# GH#16122
result = Series(datetime_series.to_dict(into=mapping), name="ts")
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
from_method = Series(datetime_series.to_dict(into=collections.Counter))
from_constructor = Series(collections.Counter(datetime_series.items()))
tm.assert_series_equal(from_method, from_constructor)
@pytest.mark.parametrize(
"input",
(
{"a": np.int64(64), "b": 10},
{"a": np.int64(64), "b": 10, "c": "ABC"},
{"a": np.uint64(64), "b": 10, "c": "ABC"},
),
)
def test_to_dict_return_types(self, input):
# GH25969
d = Series(input).to_dict()
assert isinstance(d["a"], int)
assert isinstance(d["b"], int)

View File

@ -0,0 +1,63 @@
import pytest
from pandas import (
DataFrame,
Index,
Series,
)
import pandas._testing as tm
class TestToFrame:
def test_to_frame_respects_name_none(self):
# GH#44212 if we explicitly pass name=None, then that should be respected,
# not changed to 0
# GH-45448 this is first deprecated & enforced in 2.0
ser = Series(range(3))
result = ser.to_frame(None)
exp_index = Index([None], dtype=object)
tm.assert_index_equal(result.columns, exp_index)
result = ser.rename("foo").to_frame(None)
exp_index = Index([None], dtype=object)
tm.assert_index_equal(result.columns, exp_index)
def test_to_frame(self, datetime_series):
datetime_series.name = None
rs = datetime_series.to_frame()
xp = DataFrame(datetime_series.values, index=datetime_series.index)
tm.assert_frame_equal(rs, xp)
datetime_series.name = "testname"
rs = datetime_series.to_frame()
xp = DataFrame(
{"testname": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
rs = datetime_series.to_frame(name="testdifferent")
xp = DataFrame(
{"testdifferent": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
@pytest.mark.filterwarnings(
"ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
)
def test_to_frame_expanddim(self):
# GH#9762
class SubclassedSeries(Series):
@property
def _constructor_expanddim(self):
return SubclassedFrame
class SubclassedFrame(DataFrame):
pass
ser = SubclassedSeries([1, 2, 3], name="X")
result = ser.to_frame()
assert isinstance(result, SubclassedFrame)
expected = SubclassedFrame({"X": [1, 2, 3]})
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,49 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
NA,
Series,
Timedelta,
)
import pandas._testing as tm
@pytest.mark.parametrize("dtype", ["int64", "float64"])
def test_to_numpy_na_value(dtype):
# GH#48951
ser = Series([1, 2, NA, 4])
result = ser.to_numpy(dtype=dtype, na_value=0)
expected = np.array([1, 2, 0, 4], dtype=dtype)
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_cast_before_setting_na():
# GH#50600
ser = Series([1])
result = ser.to_numpy(dtype=np.float64, na_value=np.nan)
expected = np.array([1.0])
tm.assert_numpy_array_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_to_numpy_arrow_dtype_given():
# GH#57121
ser = Series([1, NA], dtype="int64[pyarrow]")
result = ser.to_numpy(dtype="float64")
expected = np.array([1.0, np.nan])
tm.assert_numpy_array_equal(result, expected)
def test_astype_ea_int_to_td_ts():
# GH#57093
ser = Series([1, None], dtype="Int64")
result = ser.astype("m8[ns]")
expected = Series([1, Timedelta("nat")], dtype="m8[ns]")
tm.assert_series_equal(result, expected)
result = ser.astype("M8[ns]")
expected = Series([1, Timedelta("nat")], dtype="M8[ns]")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,36 @@
import pytest
import pandas.util._test_decorators as td
from pandas import (
Interval,
Period,
Series,
Timedelta,
Timestamp,
)
@pytest.mark.parametrize(
"values, dtype, expected_dtype",
(
([1], "int64", int),
([1], "Int64", int),
([1.0], "float64", float),
([1.0], "Float64", float),
(["abc"], "object", str),
(["abc"], "string", str),
([Interval(1, 3)], "interval", Interval),
([Period("2000-01-01", "D")], "period[D]", Period),
([Timedelta(days=1)], "timedelta64[ns]", Timedelta),
([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp),
pytest.param([1], "int64[pyarrow]", int, marks=td.skip_if_no("pyarrow")),
pytest.param([1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow")),
pytest.param(["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow")),
),
)
def test_tolist_scalar_dtype(values, dtype, expected_dtype):
# GH49890
ser = Series(values, dtype=dtype)
result_dtype = type(ser.tolist()[0])
assert result_dtype == expected_dtype

View File

@ -0,0 +1,67 @@
from datetime import datetime
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestTruncate:
def test_truncate_datetimeindex_tz(self):
# GH 9243
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
s = Series(range(len(idx)), index=idx)
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 as of 2.0 we require tzawareness compat
s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
lb = idx[1]
ub = idx[3]
result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime())
expected = Series([1, 2, 3], index=idx[1:4])
tm.assert_series_equal(result, expected)
def test_truncate_periodindex(self):
# GH 17717
idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series1 = Series([1, 2, 3], index=idx1)
result1 = series1.truncate(after="2017-09-02")
expected_idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
)
tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1))
idx2 = pd.PeriodIndex(
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series2 = Series([1, 2, 3], index=idx2)
result2 = series2.sort_index().truncate(after="2017-09-02")
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
tm.assert_series_equal(result2, Series([2], index=expected_idx2))
def test_truncate_one_element_series(self):
# GH 35544
series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))
before = pd.Timestamp("2020-08-02")
after = pd.Timestamp("2020-08-04")
result = series.truncate(before=before, after=after)
# the input Series and the expected Series are the same
tm.assert_series_equal(result, series)
def test_truncate_index_only_one_unique_value(self):
# GH 42365
obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5)
truncated = obj.truncate("2021-06-28", "2021-07-01")
tm.assert_series_equal(truncated, obj)

View File

@ -0,0 +1,123 @@
from datetime import timezone
import pytest
import pytz
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestTZLocalize:
def test_series_tz_localize_ambiguous_bool(self):
# make sure that we are correctly accepting bool values as ambiguous
# GH#14402
ts = Timestamp("2015-11-01 01:00:03")
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
ser = Series([ts])
expected0 = Series([expected0])
expected1 = Series([expected1])
with tm.external_error_raised(pytz.AmbiguousTimeError):
ser.dt.tz_localize("US/Central")
result = ser.dt.tz_localize("US/Central", ambiguous=True)
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=False)
tm.assert_series_equal(result, expected1)
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
tm.assert_series_equal(result, expected1)
def test_series_tz_localize_matching_index(self):
# Matching the index of the result with that of the original series
# GH 43080
dt_series = Series(
date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.tz_localize("Europe/Berlin")
expected = Series(
date_range(
start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin"
),
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"method, exp",
[
["shift_forward", "2015-03-29 03:00:00"],
["shift_backward", "2015-03-29 01:59:59.999999999"],
["NaT", NaT],
["raise", None],
["foo", "invalid"],
],
)
def test_tz_localize_nonexistent(self, warsaw, method, exp, unit):
# GH 8917
tz = warsaw
n = 60
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min", unit=unit)
ser = Series(1, index=dti)
df = ser.to_frame()
if method == "raise":
with tm.external_error_raised(pytz.NonExistentTimeError):
dti.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
ser.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
df.tz_localize(tz, nonexistent=method)
elif exp == "invalid":
msg = (
"The nonexistent argument must be one of "
"'raise', 'NaT', 'shift_forward', 'shift_backward' "
"or a timedelta object"
)
with pytest.raises(ValueError, match=msg):
dti.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match=msg):
ser.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match=msg):
df.tz_localize(tz, nonexistent=method)
else:
result = ser.tz_localize(tz, nonexistent=method)
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz).as_unit(unit))
tm.assert_series_equal(result, expected)
result = df.tz_localize(tz, nonexistent=method)
expected = expected.to_frame()
tm.assert_frame_equal(result, expected)
res_index = dti.tz_localize(tz, nonexistent=method)
tm.assert_index_equal(res_index, expected.index)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_series_tz_localize_empty(self, tzstr):
# GH#2248
ser = Series(dtype=object)
ser2 = ser.tz_localize("utc")
assert ser2.index.tz == timezone.utc
ser2 = ser.tz_localize(tzstr)
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))

View File

@ -0,0 +1,76 @@
import numpy as np
from pandas import (
Categorical,
IntervalIndex,
Series,
date_range,
)
import pandas._testing as tm
class TestUnique:
def test_unique_uint64(self):
ser = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
res = ser.unique()
exp = np.array([1, 2, 2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(res, exp)
def test_unique_data_ownership(self):
# it works! GH#1807
Series(Series(["a", "c", "b"]).unique()).sort_values()
def test_unique(self):
# GH#714 also, dtype=float
ser = Series([1.2345] * 100)
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
# explicit f4 dtype
ser = Series([1.2345] * 100, dtype="f4")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_nan_object_dtype(self):
# NAs in object arrays GH#714
ser = Series(["foo"] * 100, dtype="O")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_none(self):
# decision about None
ser = Series([1, 2, 3, None, None, None], dtype=object)
result = ser.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)
def test_unique_categorical(self):
# GH#18051
cat = Categorical([])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)
cat = Categorical([np.nan])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)
def test_tz_unique(self):
# GH 46128
dti1 = date_range("2016-01-01", periods=3)
ii1 = IntervalIndex.from_breaks(dti1)
ser1 = Series(ii1)
uni1 = ser1.unique()
tm.assert_interval_array_equal(ser1.array, uni1)
dti2 = date_range("2016-01-01", periods=3, tz="US/Eastern")
ii2 = IntervalIndex.from_breaks(dti2)
ser2 = Series(ii2)
uni2 = ser2.unique()
tm.assert_interval_array_equal(ser2.array, uni2)
assert uni1.dtype != uni2.dtype

View File

@ -0,0 +1,169 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
date_range,
)
import pandas._testing as tm
def test_unstack_preserves_object():
mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
ser = Series(np.arange(4.0), index=mi, dtype=object)
res1 = ser.unstack()
assert (res1.dtypes == object).all()
res2 = ser.unstack(level=0)
assert (res2.dtypes == object).all()
def test_unstack():
index = MultiIndex(
levels=[["bar", "foo"], ["one", "three", "two"]],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
)
s = Series(np.arange(4.0), index=index)
unstacked = s.unstack()
expected = DataFrame(
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
index=["bar", "foo"],
columns=["one", "three", "two"],
)
tm.assert_frame_equal(unstacked, expected)
unstacked = s.unstack(level=0)
tm.assert_frame_equal(unstacked, expected.T)
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
exp_index = MultiIndex(
levels=[["one", "two", "three"], [0, 1]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
unstacked = s.unstack(0).sort_index()
tm.assert_frame_equal(unstacked, expected)
# GH5873
idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame(
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
)
tm.assert_frame_equal(left, right)
idx = MultiIndex.from_arrays(
[
["cat", "cat", "cat", "dog", "dog"],
["a", "a", "b", "a", "b"],
[1, 2, 1, 1, np.nan],
]
)
ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame(
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
columns=["cat", "dog"],
)
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
right.index = MultiIndex.from_tuples(tpls)
tm.assert_frame_equal(ts.unstack(level=0), right)
def test_unstack_tuplename_in_multiindex():
# GH 19966
idx = MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
ser = Series(1, index=idx)
result = ser.unstack(("A", "a"))
expected = DataFrame(
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
index=Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1], [1, 1], [1, 1], [1, 1]],
MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1], [1, 1, 1, 1]],
Index([3, 4], name="C"),
MultiIndex.from_tuples(
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
ser = Series(1, index=idx)
result = ser.unstack(unstack_idx)
expected = DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)
def test_unstack_multi_index_categorical_values():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
mi = df.stack(future_stack=True).index.rename(["major", "minor"])
ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
result = ser.unstack()
dti = ser.index.levels[0]
c = pd.Categorical(["foo"] * len(dti))
expected = DataFrame(
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
columns=Index(list("ABCD"), name="minor"),
index=dti.rename("major"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_mixed_level_names():
# GH#48763
arrays = [["a", "a"], [1, 2], ["red", "blue"]]
idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y"))
ser = Series([1, 2], index=idx)
result = ser.unstack("x")
expected = DataFrame(
[[1], [2]],
columns=Index(["a"], name="x"),
index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,139 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
CategoricalDtype,
DataFrame,
NaT,
Series,
Timestamp,
)
import pandas._testing as tm
class TestUpdate:
def test_update(self, using_copy_on_write):
s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
s2 = Series([np.nan, 3.5, np.nan, 5.0])
s.update(s2)
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
tm.assert_series_equal(s, expected)
# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df["c"] = np.nan
# Cast to object to avoid upcast when setting "foo"
df["c"] = df["c"].astype(object)
df_orig = df.copy()
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["c"].update(Series(["foo"], index=[0]))
expected = df_orig
else:
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
df["c"].update(Series(["foo"], index=[0]))
expected = DataFrame(
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
)
expected["c"] = expected["c"].astype(object)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"other, dtype, expected, warn",
[
# other is int
([61, 63], "int32", Series([10, 61, 12], dtype="int32"), None),
([61, 63], "int64", Series([10, 61, 12]), None),
([61, 63], float, Series([10.0, 61.0, 12.0]), None),
([61, 63], object, Series([10, 61, 12], dtype=object), None),
# other is float, but can be cast to int
([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32"), None),
([61.0, 63.0], "int64", Series([10, 61, 12]), None),
([61.0, 63.0], float, Series([10.0, 61.0, 12.0]), None),
([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object), None),
# others is float, cannot be cast to int
([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0]), FutureWarning),
([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0]), FutureWarning),
([61.1, 63.1], float, Series([10.0, 61.1, 12.0]), None),
([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object), None),
# other is object, cannot be cast
([(61,), (63,)], "int32", Series([10, (61,), 12]), FutureWarning),
([(61,), (63,)], "int64", Series([10, (61,), 12]), FutureWarning),
([(61,), (63,)], float, Series([10.0, (61,), 12.0]), FutureWarning),
([(61,), (63,)], object, Series([10, (61,), 12]), None),
],
)
def test_update_dtypes(self, other, dtype, expected, warn):
ser = Series([10, 11, 12], dtype=dtype)
other = Series(other, index=[1, 3])
with tm.assert_produces_warning(warn, match="item of incompatible dtype"):
ser.update(other)
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"series, other, expected",
[
# update by key
(
Series({"a": 1, "b": 2, "c": 3, "d": 4}),
{"b": 5, "c": np.nan},
Series({"a": 1, "b": 5, "c": 3, "d": 4}),
),
# update by position
(Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])),
],
)
def test_update_from_non_series(self, series, other, expected):
# GH 33215
series.update(other)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, other, expected, dtype",
[
(["a", None], [None, "b"], ["a", "b"], "string[python]"),
pytest.param(
["a", None],
[None, "b"],
["a", "b"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
([1, None], [None, 2], [1, 2], "Int64"),
([True, None], [None, False], [True, False], "boolean"),
(
["a", None],
[None, "b"],
["a", "b"],
CategoricalDtype(categories=["a", "b"]),
),
(
[Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
[NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
[Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
"datetime64[ns, Europe/London]",
),
],
)
def test_update_extension_array_series(self, data, other, expected, dtype):
result = Series(data, dtype=dtype)
other = Series(other, dtype=dtype)
expected = Series(expected, dtype=dtype)
result.update(other)
tm.assert_series_equal(result, expected)
def test_update_with_categorical_type(self):
# GH 25744
dtype = CategoricalDtype(["a", "b", "c", "d"])
s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype)
s2 = Series(["b", "a"], index=[1, 2], dtype=dtype)
s1.update(s2)
result = s1
expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,271 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
Index,
Series,
)
import pandas._testing as tm
class TestSeriesValueCounts:
def test_value_counts_datetime(self, unit):
# most dtypes are tested in tests/base
values = [
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 11:00"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
name="xxx",
).as_unit(unit)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx").dt.as_unit(unit)
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_datetime_tz(self, unit):
values = [
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
tz="US/Eastern",
name="xxx",
).as_unit(unit)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx").dt.as_unit(unit)
tm.assert_series_equal(ser.value_counts(), exp)
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
tm.assert_series_equal(idx.value_counts(), exp)
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_period(self):
values = [
pd.Period("2011-01", freq="M"),
pd.Period("2011-02", freq="M"),
pd.Period("2011-03", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-03", freq="M"),
]
exp_idx = pd.PeriodIndex(
["2011-01", "2011-03", "2011-02"], freq="M", name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.PeriodIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_ordered(self):
# most dtypes are tested in tests/base
values = Categorical([1, 2, 3, 1, 1, 3], ordered=True)
exp_idx = CategoricalIndex(
[1, 3, 2], categories=[1, 2, 3], ordered=True, name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_not_ordered(self):
values = Categorical([1, 2, 3, 1, 1, 3], ordered=False)
exp_idx = CategoricalIndex(
[1, 3, 2], categories=[1, 2, 3], ordered=False, name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical(self):
# GH#12835
cats = Categorical(list("abcccb"), categories=list("cabd"))
ser = Series(cats, name="xxx")
res = ser.value_counts(sort=False)
exp_index = CategoricalIndex(
list("cabd"), categories=cats.categories, name="xxx"
)
exp = Series([3, 1, 2, 0], name="count", index=exp_index)
tm.assert_series_equal(res, exp)
res = ser.value_counts(sort=True)
exp_index = CategoricalIndex(
list("cbad"), categories=cats.categories, name="xxx"
)
exp = Series([3, 2, 1, 0], name="count", index=exp_index)
tm.assert_series_equal(res, exp)
# check object dtype handles the Series.name as the same
# (tested in tests/base)
ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx")
res = ser.value_counts()
exp = Series([3, 2, 1], name="count", index=Index(["c", "b", "a"], name="xxx"))
tm.assert_series_equal(res, exp)
def test_value_counts_categorical_with_nan(self):
# see GH#9443
# sanity check
ser = Series(["a", "b", "a"], dtype="category")
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# same Series via two different constructions --> same behaviour
series = [
Series(["a", "b", None, "a", None, None], dtype="category"),
Series(
Categorical(["a", "b", None, "a", None, None], categories=["a", "b"])
),
]
for ser in series:
# None is a NaN value, so we exclude its count here
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# we don't exclude the count of None and sort by counts
exp = Series(
[3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]), name="count"
)
res = ser.value_counts(dropna=False)
tm.assert_series_equal(res, exp)
# When we aren't sorting by counts, and np.nan isn't a
# category, it should be last.
exp = Series(
[2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]), name="count"
)
res = ser.value_counts(dropna=False, sort=False)
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"ser, dropna, exp",
[
(
Series([False, True, True, pd.NA]),
False,
Series([2, 1, 1], index=[True, False, pd.NA], name="count"),
),
(
Series([False, True, True, pd.NA]),
True,
Series([2, 1], index=Index([True, False], dtype=object), name="count"),
),
(
Series(range(3), index=[True, False, np.nan]).index,
False,
Series([1, 1, 1], index=[True, False, np.nan], name="count"),
),
],
)
def test_value_counts_bool_with_nan(self, ser, dropna, exp):
# GH32146
out = ser.value_counts(dropna=dropna)
tm.assert_series_equal(out, exp)
@pytest.mark.parametrize(
"input_array,expected",
[
(
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
Series(
[3, 2, 1],
index=Index([3j, 1 + 1j, 1], dtype=np.complex128),
name="count",
),
),
(
np.array([1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], dtype=np.complex64),
Series(
[3, 2, 1],
index=Index([3j, 1 + 1j, 1], dtype=np.complex64),
name="count",
),
),
],
)
def test_value_counts_complex_numbers(self, input_array, expected):
# GH 17927
result = Series(input_array).value_counts()
tm.assert_series_equal(result, expected)
def test_value_counts_masked(self):
# GH#54984
dtype = "Int64"
ser = Series([1, 2, None, 2, None, 3], dtype=dtype)
result = ser.value_counts(dropna=False)
expected = Series(
[2, 2, 1, 1],
index=Index([2, None, 1, 3], dtype=dtype),
dtype=dtype,
name="count",
)
tm.assert_series_equal(result, expected)
result = ser.value_counts(dropna=True)
expected = Series(
[2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count"
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,29 @@
import numpy as np
import pytest
from pandas import (
IntervalIndex,
Series,
period_range,
)
import pandas._testing as tm
class TestValues:
@pytest.mark.parametrize(
"data",
[
period_range("2000", periods=4),
IntervalIndex.from_breaks([1, 2, 3, 4]),
],
)
def test_values_object_extension_dtypes(self, data):
# https://github.com/pandas-dev/pandas/issues/23995
result = Series(data).values
expected = np.array(data.astype(object))
tm.assert_numpy_array_equal(result, expected)
def test_values(self, datetime_series):
tm.assert_almost_equal(
datetime_series.values, list(datetime_series), check_dtype=False
)

View File

@ -0,0 +1,61 @@
import numpy as np
import pytest
from pandas import (
Index,
Series,
array,
date_range,
)
import pandas._testing as tm
pytestmark = pytest.mark.filterwarnings(
"ignore:Series.view is deprecated and will be removed in a future version.:FutureWarning" # noqa: E501
)
class TestView:
def test_view_i8_to_datetimelike(self):
dti = date_range("2000", periods=4, tz="US/Central")
ser = Series(dti.asi8)
result = ser.view(dti.dtype)
tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None))
pi = dti.tz_localize(None).to_period("D")
ser = Series(pi.asi8)
result = ser.view(pi.dtype)
tm.assert_period_array_equal(result._values, pi._data)
def test_view_tz(self):
# GH#24024
ser = Series(date_range("2000", periods=4, tz="US/Central"))
result = ser.view("i8")
expected = Series(
[
946706400000000000,
946792800000000000,
946879200000000000,
946965600000000000,
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize(
"second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize("box", [Series, Index, array])
def test_view_between_datetimelike(self, first, second, box):
dti = date_range("2016-01-01", periods=3)
orig = box(dti)
obj = orig.view(first)
assert obj.dtype == first
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)
res = obj.view(second)
assert res.dtype == second
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)