done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/tests/strings/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/init.py
@ -0,0 +1,23 @@
+import numpy as np
+
+import pandas as pd
+
+
+def is_object_or_nan_string_dtype(dtype):
+    """
+    Check if string-like dtype is following NaN semantics, i.e. is object
+    dtype or a NaN-variant of the StringDtype.
+    """
+    return (isinstance(dtype, np.dtype) and dtype == "object") or (
+        dtype.na_value is np.nan
+    )
+
+
+def _convert_na_value(ser, expected):
+    if ser.dtype != object:
+        if ser.dtype.na_value is np.nan:
+            expected = expected.fillna(np.nan)
+        else:
+            # GH#18463
+            expected = expected.fillna(pd.NA)
+    return expected
--- a/lib/python3.11/site-packages/pandas/tests/strings/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/conftest.py
@ -0,0 +1,132 @@
+import pytest
+
+from pandas import Series
+from pandas.core.strings.accessor import StringMethods
+
+_any_string_method = [
+    ("cat", (), {"sep": ","}),
+    ("cat", (Series(list("zyx")),), {"sep": ",", "join": "left"}),
+    ("center", (10,), {}),
+    ("contains", ("a",), {}),
+    ("count", ("a",), {}),
+    ("decode", ("UTF-8",), {}),
+    ("encode", ("UTF-8",), {}),
+    ("endswith", ("a",), {}),
+    ("endswith", ((),), {}),
+    ("endswith", (("a",),), {}),
+    ("endswith", (("a", "b"),), {}),
+    ("endswith", (("a", "MISSING"),), {}),
+    ("endswith", ("a",), {"na": True}),
+    ("endswith", ("a",), {"na": False}),
+    ("extract", ("([a-z]*)",), {"expand": False}),
+    ("extract", ("([a-z]*)",), {"expand": True}),
+    ("extractall", ("([a-z]*)",), {}),
+    ("find", ("a",), {}),
+    ("findall", ("a",), {}),
+    ("get", (0,), {}),
+    # because "index" (and "rindex") fail intentionally
+    # if the string is not found, search only for empty string
+    ("index", ("",), {}),
+    ("join", (",",), {}),
+    ("ljust", (10,), {}),
+    ("match", ("a",), {}),
+    ("fullmatch", ("a",), {}),
+    ("normalize", ("NFC",), {}),
+    ("pad", (10,), {}),
+    ("partition", (" ",), {"expand": False}),
+    ("partition", (" ",), {"expand": True}),
+    ("repeat", (3,), {}),
+    ("replace", ("a", "z"), {}),
+    ("rfind", ("a",), {}),
+    ("rindex", ("",), {}),
+    ("rjust", (10,), {}),
+    ("rpartition", (" ",), {"expand": False}),
+    ("rpartition", (" ",), {"expand": True}),
+    ("slice", (0, 1), {}),
+    ("slice_replace", (0, 1, "z"), {}),
+    ("split", (" ",), {"expand": False}),
+    ("split", (" ",), {"expand": True}),
+    ("startswith", ("a",), {}),
+    ("startswith", (("a",),), {}),
+    ("startswith", (("a", "b"),), {}),
+    ("startswith", (("a", "MISSING"),), {}),
+    ("startswith", ((),), {}),
+    ("startswith", ("a",), {"na": True}),
+    ("startswith", ("a",), {"na": False}),
+    ("removeprefix", ("a",), {}),
+    ("removesuffix", ("a",), {}),
+    # translating unicode points of "a" to "d"
+    ("translate", ({97: 100},), {}),
+    ("wrap", (2,), {}),
+    ("zfill", (10,), {}),
+] + list(
+    zip(
+        [
+            # methods without positional arguments: zip with empty tuple and empty dict
+            "capitalize",
+            "cat",
+            "get_dummies",
+            "isalnum",
+            "isalpha",
+            "isdecimal",
+            "isdigit",
+            "islower",
+            "isnumeric",
+            "isspace",
+            "istitle",
+            "isupper",
+            "len",
+            "lower",
+            "lstrip",
+            "partition",
+            "rpartition",
+            "rsplit",
+            "rstrip",
+            "slice",
+            "slice_replace",
+            "split",
+            "strip",
+            "swapcase",
+            "title",
+            "upper",
+            "casefold",
+        ],
+        [()] * 100,
+        [{}] * 100,
+    )
+)
+ids, _, _ = zip(*_any_string_method)  # use method name as fixture-id
+missing_methods = {f for f in dir(StringMethods) if not f.startswith("_")} - set(ids)
+
+# test that the above list captures all methods of StringMethods
+assert not missing_methods
+
+
+@pytest.fixture(params=_any_string_method, ids=ids)
+def any_string_method(request):
+    """
+    Fixture for all public methods of `StringMethods`
+
+    This fixture returns a tuple of the method name and sample arguments
+    necessary to call the method.
+
+    Returns
+    -------
+    method_name : str
+        The name of the method in `StringMethods`
+    args : tuple
+        Sample values for the positional arguments
+    kwargs : dict
+        Sample values for the keyword arguments
+
+    Examples
+    --------
+    >>> def test_something(any_string_method):
+    ...     s = Series(['a', 'b', np.nan, 'd'])
+    ...
+    ...     method_name, args, kwargs = any_string_method
+    ...     method = getattr(s.str, method_name)
+    ...     # will not raise
+    ...     method(*args, **kwargs)
+    """
+    return request.param
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_api.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_api.py
@ -0,0 +1,205 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    CategoricalDtype,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    _testing as tm,
+    option_context,
+)
+from pandas.core.strings.accessor import StringMethods
+
+# subset of the full set from pandas/conftest.py
+_any_allowed_skipna_inferred_dtype = [
+    ("string", ["a", np.nan, "c"]),
+    ("bytes", [b"a", np.nan, b"c"]),
+    ("empty", [np.nan, np.nan, np.nan]),
+    ("empty", []),
+    ("mixed-integer", ["a", np.nan, 2]),
+]
+ids, _ = zip(*_any_allowed_skipna_inferred_dtype)  # use inferred type as id
+
+
+@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
+def any_allowed_skipna_inferred_dtype(request):
+    """
+    Fixture for all (inferred) dtypes allowed in StringMethods.__init__
+
+    The covered (inferred) types are:
+    * 'string'
+    * 'empty'
+    * 'bytes'
+    * 'mixed'
+    * 'mixed-integer'
+
+    Returns
+    -------
+    inferred_dtype : str
+        The string for the inferred dtype from _libs.lib.infer_dtype
+    values : np.ndarray
+        An array of object dtype that will be inferred to have
+        `inferred_dtype`
+
+    Examples
+    --------
+    >>> from pandas._libs import lib
+    >>>
+    >>> def test_something(any_allowed_skipna_inferred_dtype):
+    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
+    ...     # will pass
+    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    ...
+    ...     # constructor for .str-accessor will also pass
+    ...     Series(values).str
+    """
+    inferred_dtype, values = request.param
+    values = np.array(values, dtype=object)  # object dtype to avoid casting
+
+    # correctness of inference tested in tests/dtypes/test_inference.py
+    return inferred_dtype, values
+
+
+def test_api(any_string_dtype):
+    # GH 6106, GH 9322
+    assert Series.str is StringMethods
+    assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)
+
+
+def test_api_mi_raises():
+    # GH 23679
+    mi = MultiIndex.from_arrays([["a", "b", "c"]])
+    msg = "Can only use .str accessor with Index, not MultiIndex"
+    with pytest.raises(AttributeError, match=msg):
+        mi.str
+    assert not hasattr(mi, "str")
+
+
+@pytest.mark.parametrize("dtype", [object, "category"])
+def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype):
+    # one instance of parametrized fixture
+    box = index_or_series
+    inferred_dtype, values = any_skipna_inferred_dtype
+
+    t = box(values, dtype=dtype)  # explicit dtype to avoid casting
+
+    types_passing_constructor = [
+        "string",
+        "unicode",
+        "empty",
+        "bytes",
+        "mixed",
+        "mixed-integer",
+    ]
+    if inferred_dtype in types_passing_constructor:
+        # GH 6106
+        assert isinstance(t.str, StringMethods)
+    else:
+        # GH 9184, GH 23011, GH 23163
+        msg = "Can only use .str accessor with string values.*"
+        with pytest.raises(AttributeError, match=msg):
+            t.str
+        assert not hasattr(t, "str")
+
+
+@pytest.mark.parametrize("dtype", [object, "category"])
+def test_api_per_method(
+    index_or_series,
+    dtype,
+    any_allowed_skipna_inferred_dtype,
+    any_string_method,
+    request,
+    using_infer_string,
+):
+    # this test does not check correctness of the different methods,
+    # just that the methods work on the specified (inferred) dtypes,
+    # and raise on all others
+    box = index_or_series
+
+    # one instance of each parametrized fixture
+    inferred_dtype, values = any_allowed_skipna_inferred_dtype
+    method_name, args, kwargs = any_string_method
+
+    reason = None
+    if box is Index and values.size == 0:
+        if method_name in ["partition", "rpartition"] and kwargs.get("expand", True):
+            raises = TypeError
+            reason = "Method cannot deal with empty Index"
+        elif method_name == "split" and kwargs.get("expand", None):
+            raises = TypeError
+            reason = "Split fails on empty Series when expand=True"
+        elif method_name == "get_dummies":
+            raises = ValueError
+            reason = "Need to fortify get_dummies corner cases"
+
+    elif (
+        box is Index
+        and inferred_dtype == "empty"
+        and dtype == object
+        and method_name == "get_dummies"
+    ):
+        raises = ValueError
+        reason = "Need to fortify get_dummies corner cases"
+
+    if reason is not None:
+        mark = pytest.mark.xfail(raises=raises, reason=reason)
+        request.applymarker(mark)
+
+    t = box(values, dtype=dtype)  # explicit dtype to avoid casting
+    method = getattr(t.str, method_name)
+
+    if using_infer_string and dtype == "category":
+        string_allowed = method_name not in ["decode"]
+    else:
+        string_allowed = True
+    bytes_allowed = method_name in ["decode", "get", "len", "slice"]
+    # as of v0.23.4, all methods except 'cat' are very lenient with the
+    # allowed data types, just returning NaN for entries that error.
+    # This could be changed with an 'errors'-kwarg to the `str`-accessor,
+    # see discussion in GH 13877
+    mixed_allowed = method_name not in ["cat"]
+
+    allowed_types = (
+        ["empty"]
+        + ["string", "unicode"] * string_allowed
+        + ["bytes"] * bytes_allowed
+        + ["mixed", "mixed-integer"] * mixed_allowed
+    )
+
+    if inferred_dtype in allowed_types:
+        # xref GH 23555, GH 23556
+        with option_context("future.no_silent_downcasting", True):
+            method(*args, **kwargs)  # works!
+    else:
+        # GH 23011, GH 23163
+        msg = (
+            f"Cannot use .str.{method_name} with values of "
+            f"inferred dtype {repr(inferred_dtype)}."
+            "|a bytes-like object is required, not 'str'"
+        )
+        with pytest.raises(TypeError, match=msg):
+            method(*args, **kwargs)
+
+
+def test_api_for_categorical(any_string_method, any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/10661
+    s = Series(list("aabb"), dtype=any_string_dtype)
+    s = s + " " + s
+    c = s.astype("category")
+    c = c.astype(CategoricalDtype(c.dtype.categories.astype("object")))
+    assert isinstance(c.str, StringMethods)
+
+    method_name, args, kwargs = any_string_method
+
+    result = getattr(c.str, method_name)(*args, **kwargs)
+    expected = getattr(s.astype("object").str, method_name)(*args, **kwargs)
+
+    if isinstance(result, DataFrame):
+        tm.assert_frame_equal(result, expected)
+    elif isinstance(result, Series):
+        tm.assert_series_equal(result, expected)
+    else:
+        # str.cat(others=None) returns string, for example
+        assert result == expected
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_case_justify.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_case_justify.py
@ -0,0 +1,423 @@
+from datetime import datetime
+import operator
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Series,
+    _testing as tm,
+)
+
+
+def test_title(any_string_dtype):
+    s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
+    result = s.str.title()
+    expected = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_title_mixed_object():
+    s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
+    result = s.str.title()
+    expected = Series(
+        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_almost_equal(result, expected)
+
+
+def test_lower_upper(any_string_dtype):
+    s = Series(["om", np.nan, "nom", "nom"], dtype=any_string_dtype)
+
+    result = s.str.upper()
+    expected = Series(["OM", np.nan, "NOM", "NOM"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = result.str.lower()
+    tm.assert_series_equal(result, s)
+
+
+def test_lower_upper_mixed_object():
+    s = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
+
+    result = s.str.upper()
+    expected = Series(
+        ["A", np.nan, "B", np.nan, np.nan, "FOO", None, np.nan, np.nan], dtype=object
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.lower()
+    expected = Series(
+        ["a", np.nan, "b", np.nan, np.nan, "foo", None, np.nan, np.nan], dtype=object
+    )
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            ["FOO", "BAR", np.nan, "Blah", "blurg"],
+            ["Foo", "Bar", np.nan, "Blah", "Blurg"],
+        ),
+        (["a", "b", "c"], ["A", "B", "C"]),
+        (["a b", "a bc. de"], ["A b", "A bc. de"]),
+    ],
+)
+def test_capitalize(data, expected, any_string_dtype):
+    s = Series(data, dtype=any_string_dtype)
+    result = s.str.capitalize()
+    expected = Series(expected, dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_capitalize_mixed_object():
+    s = Series(["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0])
+    result = s.str.capitalize()
+    expected = Series(
+        ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_swapcase(any_string_dtype):
+    s = Series(["FOO", "BAR", np.nan, "Blah", "blurg"], dtype=any_string_dtype)
+    result = s.str.swapcase()
+    expected = Series(["foo", "bar", np.nan, "bLAH", "BLURG"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_swapcase_mixed_object():
+    s = Series(["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0])
+    result = s.str.swapcase()
+    expected = Series(
+        ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_casefold():
+    # GH25405
+    expected = Series(["ss", np.nan, "case", "ssd"])
+    s = Series(["ß", np.nan, "case", "ßd"])
+    result = s.str.casefold()
+
+    tm.assert_series_equal(result, expected)
+
+
+def test_casemethods(any_string_dtype):
+    values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"]
+    s = Series(values, dtype=any_string_dtype)
+    assert s.str.lower().tolist() == [v.lower() for v in values]
+    assert s.str.upper().tolist() == [v.upper() for v in values]
+    assert s.str.title().tolist() == [v.title() for v in values]
+    assert s.str.capitalize().tolist() == [v.capitalize() for v in values]
+    assert s.str.swapcase().tolist() == [v.swapcase() for v in values]
+
+
+def test_pad(any_string_dtype):
+    s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
+
+    result = s.str.pad(5, side="left")
+    expected = Series(
+        ["    a", "    b", np.nan, "    c", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="right")
+    expected = Series(
+        ["a    ", "b    ", np.nan, "c    ", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="both")
+    expected = Series(
+        ["  a  ", "  b  ", np.nan, "  c  ", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_pad_mixed_object():
+    s = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0])
+
+    result = s.str.pad(5, side="left")
+    expected = Series(
+        ["    a", np.nan, "    b", np.nan, np.nan, "   ee", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="right")
+    expected = Series(
+        ["a    ", np.nan, "b    ", np.nan, np.nan, "ee   ", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="both")
+    expected = Series(
+        ["  a  ", np.nan, "  b  ", np.nan, np.nan, "  ee ", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_pad_fillchar(any_string_dtype):
+    s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
+
+    result = s.str.pad(5, side="left", fillchar="X")
+    expected = Series(
+        ["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="right", fillchar="X")
+    expected = Series(
+        ["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.pad(5, side="both", fillchar="X")
+    expected = Series(
+        ["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_pad_fillchar_bad_arg_raises(any_string_dtype):
+    s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
+
+    msg = "fillchar must be a character, not str"
+    with pytest.raises(TypeError, match=msg):
+        s.str.pad(5, fillchar="XY")
+
+    msg = "fillchar must be a character, not int"
+    with pytest.raises(TypeError, match=msg):
+        s.str.pad(5, fillchar=5)
+
+
+@pytest.mark.parametrize("method_name", ["center", "ljust", "rjust", "zfill", "pad"])
+def test_pad_width_bad_arg_raises(method_name, any_string_dtype):
+    # see gh-13598
+    s = Series(["1", "22", "a", "bb"], dtype=any_string_dtype)
+    op = operator.methodcaller(method_name, "f")
+
+    msg = "width must be of integer type, not str"
+    with pytest.raises(TypeError, match=msg):
+        op(s.str)
+
+
+def test_center_ljust_rjust(any_string_dtype):
+    s = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"], dtype=any_string_dtype)
+
+    result = s.str.center(5)
+    expected = Series(
+        ["  a  ", "  b  ", np.nan, "  c  ", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.ljust(5)
+    expected = Series(
+        ["a    ", "b    ", np.nan, "c    ", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.rjust(5)
+    expected = Series(
+        ["    a", "    b", np.nan, "    c", np.nan, "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_center_ljust_rjust_mixed_object():
+    s = Series(["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0])
+
+    result = s.str.center(5)
+    expected = Series(
+        [
+            "  a  ",
+            np.nan,
+            "  b  ",
+            np.nan,
+            np.nan,
+            "  c  ",
+            " eee ",
+            None,
+            np.nan,
+            np.nan,
+        ],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.ljust(5)
+    expected = Series(
+        [
+            "a    ",
+            np.nan,
+            "b    ",
+            np.nan,
+            np.nan,
+            "c    ",
+            "eee  ",
+            None,
+            np.nan,
+            np.nan,
+        ],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.rjust(5)
+    expected = Series(
+        [
+            "    a",
+            np.nan,
+            "    b",
+            np.nan,
+            np.nan,
+            "    c",
+            "  eee",
+            None,
+            np.nan,
+            np.nan,
+        ],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_center_ljust_rjust_fillchar(any_string_dtype):
+    # GH#54533, GH#54792
+    s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
+
+    result = s.str.center(5, fillchar="X")
+    expected = Series(
+        ["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+    expected = np.array([v.center(5, "X") for v in np.array(s)], dtype=np.object_)
+    tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
+
+    result = s.str.ljust(5, fillchar="X")
+    expected = Series(
+        ["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+    expected = np.array([v.ljust(5, "X") for v in np.array(s)], dtype=np.object_)
+    tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
+
+    result = s.str.rjust(5, fillchar="X")
+    expected = Series(
+        ["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+    expected = np.array([v.rjust(5, "X") for v in np.array(s)], dtype=np.object_)
+    tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
+
+
+def test_center_ljust_rjust_fillchar_bad_arg_raises(any_string_dtype):
+    s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
+
+    # If fillchar is not a character, normal str raises TypeError
+    # 'aaa'.ljust(5, 'XY')
+    # TypeError: must be char, not str
+    template = "fillchar must be a character, not {dtype}"
+
+    with pytest.raises(TypeError, match=template.format(dtype="str")):
+        s.str.center(5, fillchar="XY")
+
+    with pytest.raises(TypeError, match=template.format(dtype="str")):
+        s.str.ljust(5, fillchar="XY")
+
+    with pytest.raises(TypeError, match=template.format(dtype="str")):
+        s.str.rjust(5, fillchar="XY")
+
+    with pytest.raises(TypeError, match=template.format(dtype="int")):
+        s.str.center(5, fillchar=1)
+
+    with pytest.raises(TypeError, match=template.format(dtype="int")):
+        s.str.ljust(5, fillchar=1)
+
+    with pytest.raises(TypeError, match=template.format(dtype="int")):
+        s.str.rjust(5, fillchar=1)
+
+
+def test_zfill(any_string_dtype):
+    s = Series(["1", "22", "aaa", "333", "45678"], dtype=any_string_dtype)
+
+    result = s.str.zfill(5)
+    expected = Series(
+        ["00001", "00022", "00aaa", "00333", "45678"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+    expected = np.array([v.zfill(5) for v in np.array(s)], dtype=np.object_)
+    tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
+
+    result = s.str.zfill(3)
+    expected = Series(["001", "022", "aaa", "333", "45678"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+    expected = np.array([v.zfill(3) for v in np.array(s)], dtype=np.object_)
+    tm.assert_numpy_array_equal(np.array(result, dtype=np.object_), expected)
+
+    s = Series(["1", np.nan, "aaa", np.nan, "45678"], dtype=any_string_dtype)
+    result = s.str.zfill(5)
+    expected = Series(
+        ["00001", np.nan, "00aaa", np.nan, "45678"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_wrap(any_string_dtype):
+    # test values are: two words less than width, two words equal to width,
+    # two words greater than width, one word less than width, one word
+    # equal to width, one word greater than width, multiple tokens with
+    # trailing whitespace equal to width
+    s = Series(
+        [
+            "hello world",
+            "hello world!",
+            "hello world!!",
+            "abcdefabcde",
+            "abcdefabcdef",
+            "abcdefabcdefa",
+            "ab ab ab ab ",
+            "ab ab ab ab a",
+            "\t",
+        ],
+        dtype=any_string_dtype,
+    )
+
+    # expected values
+    expected = Series(
+        [
+            "hello world",
+            "hello world!",
+            "hello\nworld!!",
+            "abcdefabcde",
+            "abcdefabcdef",
+            "abcdefabcdef\na",
+            "ab ab ab ab",
+            "ab ab ab ab\na",
+            "",
+        ],
+        dtype=any_string_dtype,
+    )
+
+    result = s.str.wrap(12, break_long_words=True)
+    tm.assert_series_equal(result, expected)
+
+
+def test_wrap_unicode(any_string_dtype):
+    # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
+    s = Series(
+        ["  pre  ", np.nan, "\xac\u20ac\U00008000 abadcafe"], dtype=any_string_dtype
+    )
+    expected = Series(
+        ["  pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"], dtype=any_string_dtype
+    )
+    result = s.str.wrap(6)
+    tm.assert_series_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_cat.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_cat.py
@ -0,0 +1,427 @@
+import re
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    _testing as tm,
+    concat,
+    option_context,
+)
+
+
+@pytest.mark.parametrize("other", [None, Series, Index])
+def test_str_cat_name(index_or_series, other):
+    # GH 21053
+    box = index_or_series
+    values = ["a", "b"]
+    if other:
+        other = other(values)
+    else:
+        other = values
+    result = box(values, name="name").str.cat(other, sep=",")
+    assert result.name == "name"
+
+
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+def test_str_cat(index_or_series, infer_string):
+    with option_context("future.infer_string", infer_string):
+        box = index_or_series
+        # test_cat above tests "str_cat" from ndarray;
+        # here testing "str.cat" from Series/Index to ndarray/list
+        s = box(["a", "a", "b", "b", "c", np.nan])
+
+        # single array
+        result = s.str.cat()
+        expected = "aabbc"
+        assert result == expected
+
+        result = s.str.cat(na_rep="-")
+        expected = "aabbc-"
+        assert result == expected
+
+        result = s.str.cat(sep="_", na_rep="NA")
+        expected = "a_a_b_b_c_NA"
+        assert result == expected
+
+        t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object)
+        expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"])
+
+        # Series/Index with array
+        result = s.str.cat(t, na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # Series/Index with list
+        result = s.str.cat(list(t), na_rep="-")
+        tm.assert_equal(result, expected)
+
+        # errors for incorrect lengths
+        rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+        z = Series(["1", "2", "3"])
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(z.values)
+
+        with pytest.raises(ValueError, match=rgx):
+            s.str.cat(list(z))
+
+
+def test_str_cat_raises_intuitive_error(index_or_series):
+    # GH 11334
+    box = index_or_series
+    s = box(["a", "b", "c", "d"])
+    message = "Did you mean to supply a `sep` keyword?"
+    with pytest.raises(ValueError, match=message):
+        s.str.cat("|")
+    with pytest.raises(ValueError, match=message):
+        s.str.cat("    ")
+
+
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+@pytest.mark.parametrize("sep", ["", None])
+@pytest.mark.parametrize("dtype_target", ["object", "category"])
+@pytest.mark.parametrize("dtype_caller", ["object", "category"])
+def test_str_cat_categorical(
+    index_or_series, dtype_caller, dtype_target, sep, infer_string
+):
+    box = index_or_series
+
+    with option_context("future.infer_string", infer_string):
+        s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
+        s = s if box == Index else Series(s, index=s, dtype=s.dtype)
+        t = Index(["b", "a", "b", "c"], dtype=dtype_target)
+
+        expected = Index(
+            ["ab", "aa", "bb", "ac"], dtype=object if dtype_caller == "object" else None
+        )
+        expected = (
+            expected
+            if box == Index
+            else Series(
+                expected, index=Index(s, dtype=dtype_caller), dtype=expected.dtype
+            )
+        )
+
+        # Series/Index with unaligned Index -> t.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
+
+        # Series/Index with Series having matching Index
+        t = Series(t.values, index=Index(s, dtype=dtype_caller))
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
+
+        # Series/Index with Series.values
+        result = s.str.cat(t.values, sep=sep)
+        tm.assert_equal(result, expected)
+
+        # Series/Index with Series having different Index
+        t = Series(t.values, index=t.values)
+        expected = Index(
+            ["aa", "aa", "bb", "bb", "aa"],
+            dtype=object if dtype_caller == "object" else None,
+        )
+        dtype = object if dtype_caller == "object" else s.dtype.categories.dtype
+        expected = (
+            expected
+            if box == Index
+            else Series(
+                expected,
+                index=Index(expected.str[:1], dtype=dtype),
+                dtype=expected.dtype,
+            )
+        )
+
+        result = s.str.cat(t, sep=sep)
+        tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [[1, 2, 3], [0.1, 0.2, 0.3], [1, 2, "b"]],
+    ids=["integers", "floats", "mixed"],
+)
+# without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b']
+@pytest.mark.parametrize(
+    "box",
+    [Series, Index, list, lambda x: np.array(x, dtype=object)],
+    ids=["Series", "Index", "list", "np.array"],
+)
+def test_str_cat_wrong_dtype_raises(box, data):
+    # GH 22722
+    s = Series(["a", "b", "c"])
+    t = box(data)
+
+    msg = "Concatenation requires list-likes containing only strings.*"
+    with pytest.raises(TypeError, match=msg):
+        # need to use outer and na_rep, as otherwise Index would not raise
+        s.str.cat(t, join="outer", na_rep="-")
+
+
+def test_str_cat_mixed_inputs(index_or_series):
+    box = index_or_series
+    s = Index(["a", "b", "c", "d"])
+    s = s if box == Index else Series(s, index=s)
+
+    t = Series(["A", "B", "C", "D"], index=s.values)
+    d = concat([t, Series(s, index=s)], axis=1)
+
+    expected = Index(["aAa", "bBb", "cCc", "dDd"])
+    expected = expected if box == Index else Series(expected.values, index=s.values)
+
+    # Series/Index with DataFrame
+    result = s.str.cat(d)
+    tm.assert_equal(result, expected)
+
+    # Series/Index with two-dimensional ndarray
+    result = s.str.cat(d.values)
+    tm.assert_equal(result, expected)
+
+    # Series/Index with list of Series
+    result = s.str.cat([t, s])
+    tm.assert_equal(result, expected)
+
+    # Series/Index with mixed list of Series/array
+    result = s.str.cat([t, s.values])
+    tm.assert_equal(result, expected)
+
+    # Series/Index with list of Series; different indexes
+    t.index = ["b", "c", "d", "a"]
+    expected = box(["aDa", "bAb", "cBc", "dCd"])
+    expected = expected if box == Index else Series(expected.values, index=s.values)
+    result = s.str.cat([t, s])
+    tm.assert_equal(result, expected)
+
+    # Series/Index with mixed list; different index
+    result = s.str.cat([t, s.values])
+    tm.assert_equal(result, expected)
+
+    # Series/Index with DataFrame; different indexes
+    d.index = ["b", "c", "d", "a"]
+    expected = box(["aDd", "bAa", "cBb", "dCc"])
+    expected = expected if box == Index else Series(expected.values, index=s.values)
+    result = s.str.cat(d)
+    tm.assert_equal(result, expected)
+
+    # errors for incorrect lengths
+    rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+    z = Series(["1", "2", "3"])
+    e = concat([z, z], axis=1)
+
+    # two-dimensional ndarray
+    with pytest.raises(ValueError, match=rgx):
+        s.str.cat(e.values)
+
+    # list of list-likes
+    with pytest.raises(ValueError, match=rgx):
+        s.str.cat([z.values, s.values])
+
+    # mixed list of Series/list-like
+    with pytest.raises(ValueError, match=rgx):
+        s.str.cat([z.values, s])
+
+    # errors for incorrect arguments in list-like
+    rgx = "others must be Series, Index, DataFrame,.*"
+    # make sure None/NaN do not crash checks in _get_series_list
+    u = Series(["a", np.nan, "c", None])
+
+    # mix of string and Series
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat([u, "u"])
+
+    # DataFrame in list
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat([u, d])
+
+    # 2-dim ndarray in list
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat([u, d.values])
+
+    # nested lists
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat([u, [u, d]])
+
+    # forbidden input type: set
+    # GH 23009
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat(set(u))
+
+    # forbidden input type: set in list
+    # GH 23009
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat([u, set(u)])
+
+    # other forbidden input type, e.g. int
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat(1)
+
+    # nested list-likes
+    with pytest.raises(TypeError, match=rgx):
+        s.str.cat(iter([t.values, list(s)]))
+
+
+@pytest.mark.parametrize("join", ["left", "outer", "inner", "right"])
+def test_str_cat_align_indexed(index_or_series, join):
+    # https://github.com/pandas-dev/pandas/issues/18657
+    box = index_or_series
+
+    s = Series(["a", "b", "c", "d"], index=["a", "b", "c", "d"])
+    t = Series(["D", "A", "E", "B"], index=["d", "a", "e", "b"])
+    sa, ta = s.align(t, join=join)
+    # result after manual alignment of inputs
+    expected = sa.str.cat(ta, na_rep="-")
+
+    if box == Index:
+        s = Index(s)
+        sa = Index(sa)
+        expected = Index(expected)
+
+    result = s.str.cat(t, join=join, na_rep="-")
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("join", ["left", "outer", "inner", "right"])
+def test_str_cat_align_mixed_inputs(join):
+    s = Series(["a", "b", "c", "d"])
+    t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1])
+    d = concat([t, t], axis=1)
+
+    expected_outer = Series(["aaa", "bbb", "c--", "ddd", "-ee"])
+    expected = expected_outer.loc[s.index.join(t.index, how=join)]
+
+    # list of Series
+    result = s.str.cat([t, t], join=join, na_rep="-")
+    tm.assert_series_equal(result, expected)
+
+    # DataFrame
+    result = s.str.cat(d, join=join, na_rep="-")
+    tm.assert_series_equal(result, expected)
+
+    # mixed list of indexed/unindexed
+    u = np.array(["A", "B", "C", "D"])
+    expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
+    # joint index of rhs [t, u]; u will be forced have index of s
+    rhs_idx = (
+        t.index.intersection(s.index)
+        if join == "inner"
+        else t.index.union(s.index)
+        if join == "outer"
+        else t.index.append(s.index.difference(t.index))
+    )
+
+    expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
+    result = s.str.cat([t, u], join=join, na_rep="-")
+    tm.assert_series_equal(result, expected)
+
+    with pytest.raises(TypeError, match="others must be Series,.*"):
+        # nested lists are forbidden
+        s.str.cat([t, list(u)], join=join)
+
+    # errors for incorrect lengths
+    rgx = r"If `others` contains arrays or lists \(or other list-likes.*"
+    z = Series(["1", "2", "3"]).values
+
+    # unindexed object of wrong length
+    with pytest.raises(ValueError, match=rgx):
+        s.str.cat(z, join=join)
+
+    # unindexed object of wrong length in list
+    with pytest.raises(ValueError, match=rgx):
+        s.str.cat([t, z], join=join)
+
+
+def test_str_cat_all_na(index_or_series, index_or_series2):
+    # GH 24044
+    box = index_or_series
+    other = index_or_series2
+
+    # check that all NaNs in caller / target work
+    s = Index(["a", "b", "c", "d"])
+    s = s if box == Index else Series(s, index=s)
+    t = other([np.nan] * 4, dtype=object)
+    # add index of s for alignment
+    t = t if other == Index else Series(t, index=s)
+
+    # all-NA target
+    if box == Series:
+        expected = Series([np.nan] * 4, index=s.index, dtype=s.dtype)
+    else:  # box == Index
+        # TODO: Strimg option, this should return string dtype
+        expected = Index([np.nan] * 4, dtype=object)
+    result = s.str.cat(t, join="left")
+    tm.assert_equal(result, expected)
+
+    # all-NA caller (only for Series)
+    if other == Series:
+        expected = Series([np.nan] * 4, dtype=object, index=t.index)
+        result = t.str.cat(s, join="left")
+        tm.assert_series_equal(result, expected)
+
+
+def test_str_cat_special_cases():
+    s = Series(["a", "b", "c", "d"])
+    t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1])
+
+    # iterator of elements with different types
+    expected = Series(["aaa", "bbb", "c-c", "ddd", "-e-"])
+    result = s.str.cat(iter([t, s.values]), join="outer", na_rep="-")
+    tm.assert_series_equal(result, expected)
+
+    # right-align with different indexes in others
+    expected = Series(["aa-", "d-d"], index=[0, 3])
+    result = s.str.cat([t.loc[[0]], t.loc[[3]]], join="right", na_rep="-")
+    tm.assert_series_equal(result, expected)
+
+
+def test_cat_on_filtered_index():
+    df = DataFrame(
+        index=MultiIndex.from_product(
+            [[2011, 2012], [1, 2, 3]], names=["year", "month"]
+        )
+    )
+
+    df = df.reset_index()
+    df = df[df.month > 1]
+
+    str_year = df.year.astype("str")
+    str_month = df.month.astype("str")
+    str_both = str_year.str.cat(str_month, sep=" ")
+
+    assert str_both.loc[1] == "2011 2"
+
+    str_multiple = str_year.str.cat([str_month, str_month], sep=" ")
+
+    assert str_multiple.loc[1] == "2011 2 2"
+
+
+@pytest.mark.parametrize("klass", [tuple, list, np.array, Series, Index])
+def test_cat_different_classes(klass):
+    # https://github.com/pandas-dev/pandas/issues/33425
+    s = Series(["a", "b", "c"])
+    result = s.str.cat(klass(["x", "y", "z"]))
+    expected = Series(["ax", "by", "cz"])
+    tm.assert_series_equal(result, expected)
+
+
+def test_cat_on_series_dot_str():
+    # GH 28277
+    ps = Series(["AbC", "de", "FGHI", "j", "kLLLm"])
+
+    message = re.escape(
+        "others must be Series, Index, DataFrame, np.ndarray "
+        "or list-like (either containing only strings or "
+        "containing only objects of type Series/Index/"
+        "np.ndarray[1-dim])"
+    )
+    with pytest.raises(TypeError, match=message):
+        ps.str.cat(others=ps.str)
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_extract.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_extract.py
@ -0,0 +1,724 @@
+from datetime import datetime
+import re
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import ArrowDtype
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    _testing as tm,
+)
+
+
+def test_extract_expand_kwarg_wrong_type_raises(any_string_dtype):
+    # TODO: should this raise TypeError
+    values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
+    with pytest.raises(ValueError, match="expand must be True or False"):
+        values.str.extract(".*(BAD[_]+).*(BAD)", expand=None)
+
+
+def test_extract_expand_kwarg(any_string_dtype):
+    s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
+    expected = DataFrame(["BAD__", np.nan, np.nan], dtype=any_string_dtype)
+
+    result = s.str.extract(".*(BAD[_]+).*")
+    tm.assert_frame_equal(result, expected)
+
+    result = s.str.extract(".*(BAD[_]+).*", expand=True)
+    tm.assert_frame_equal(result, expected)
+
+    expected = DataFrame(
+        [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_expand_False_mixed_object():
+    ser = Series(
+        ["aBAD_BAD", np.nan, "BAD_b_BAD", True, datetime.today(), "foo", None, 1, 2.0]
+    )
+
+    # two groups
+    result = ser.str.extract(".*(BAD[_]+).*(BAD)", expand=False)
+    er = [np.nan, np.nan]  # empty row
+    expected = DataFrame(
+        [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # single group
+    result = ser.str.extract(".*(BAD[_]+).*BAD", expand=False)
+    expected = Series(
+        ["BAD_", np.nan, "BAD_", np.nan, np.nan, np.nan, None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_extract_expand_index_raises():
+    # GH9980
+    # Index only works with one regex group since
+    # multi-group would expand to a frame
+    idx = Index(["A1", "A2", "A3", "A4", "B5"])
+    msg = "only one regex group is supported with Index"
+    with pytest.raises(ValueError, match=msg):
+        idx.str.extract("([AB])([123])", expand=False)
+
+
+def test_extract_expand_no_capture_groups_raises(index_or_series, any_string_dtype):
+    s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype)
+    msg = "pattern contains no capture groups"
+
+    # no groups
+    with pytest.raises(ValueError, match=msg):
+        s_or_idx.str.extract("[ABC][123]", expand=False)
+
+    # only non-capturing groups
+    with pytest.raises(ValueError, match=msg):
+        s_or_idx.str.extract("(?:[AB]).*", expand=False)
+
+
+def test_extract_expand_single_capture_group(index_or_series, any_string_dtype):
+    # single group renames series/index properly
+    s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype)
+    result = s_or_idx.str.extract(r"(?P<uno>A)\d", expand=False)
+
+    expected = index_or_series(["A", "A"], name="uno", dtype=any_string_dtype)
+    if index_or_series == Series:
+        tm.assert_series_equal(result, expected)
+    else:
+        tm.assert_index_equal(result, expected)
+
+
+def test_extract_expand_capture_groups(any_string_dtype):
+    s = Series(["A1", "B2", "C3"], dtype=any_string_dtype)
+    # one group, no matches
+    result = s.str.extract("(_)", expand=False)
+    expected = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # two groups, no matches
+    result = s.str.extract("(_)(_)", expand=False)
+    expected = DataFrame(
+        [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one group, some matches
+    result = s.str.extract("([AB])[123]", expand=False)
+    expected = Series(["A", "B", np.nan], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # two groups, some matches
+    result = s.str.extract("([AB])([123])", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one named group
+    result = s.str.extract("(?P<letter>[AB])", expand=False)
+    expected = Series(["A", "B", np.nan], name="letter", dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # two named groups
+    result = s.str.extract("(?P<letter>[AB])(?P<number>[123])", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # mix named and unnamed groups
+    result = s.str.extract("([AB])(?P<number>[123])", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+        columns=[0, "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one normal group, one non-capturing group
+    result = s.str.extract("([AB])(?:[123])", expand=False)
+    expected = Series(["A", "B", np.nan], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    # two normal groups, one non-capturing group
+    s = Series(["A11", "B22", "C33"], dtype=any_string_dtype)
+    result = s.str.extract("([AB])([123])(?:[123])", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one optional group followed by one normal group
+    s = Series(["A1", "B2", "3"], dtype=any_string_dtype)
+    result = s.str.extract("(?P<letter>[AB])?(?P<number>[123])", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, "3"]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one normal group followed by one optional group
+    s = Series(["A1", "B2", "C"], dtype=any_string_dtype)
+    result = s.str.extract("(?P<letter>[ABC])(?P<number>[123])?", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], ["C", np.nan]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_expand_capture_groups_index(index, any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/6348
+    # not passing index to the extractor
+    data = ["A1", "B2", "C"]
+
+    if len(index) == 0:
+        pytest.skip("Test requires len(index) > 0")
+    while len(index) < len(data):
+        index = index.repeat(2)
+
+    index = index[: len(data)]
+    ser = Series(data, index=index, dtype=any_string_dtype)
+
+    result = ser.str.extract(r"(\d)", expand=False)
+    expected = Series(["1", "2", np.nan], index=index, dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.extract(r"(?P<letter>\D)(?P<number>\d)?", expand=False)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], ["C", np.nan]],
+        columns=["letter", "number"],
+        index=index,
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_single_series_name_is_preserved(any_string_dtype):
+    s = Series(["a3", "b3", "c2"], name="bob", dtype=any_string_dtype)
+    result = s.str.extract(r"(?P<sue>[a-z])", expand=False)
+    expected = Series(["a", "b", "c"], name="sue", dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_extract_expand_True(any_string_dtype):
+    # Contains tests like those in test_match and some others.
+    s = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype)
+
+    result = s.str.extract(".*(BAD[_]+).*(BAD)", expand=True)
+    expected = DataFrame(
+        [["BAD__", "BAD"], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_expand_True_mixed_object():
+    er = [np.nan, np.nan]  # empty row
+    mixed = Series(
+        [
+            "aBAD_BAD",
+            np.nan,
+            "BAD_b_BAD",
+            True,
+            datetime.today(),
+            "foo",
+            None,
+            1,
+            2.0,
+        ]
+    )
+
+    result = mixed.str.extract(".*(BAD[_]+).*(BAD)", expand=True)
+    expected = DataFrame(
+        [["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er], dtype=object
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_expand_True_single_capture_group_raises(
+    index_or_series, any_string_dtype
+):
+    # these should work for both Series and Index
+    # no groups
+    s_or_idx = index_or_series(["A1", "B2", "C3"], dtype=any_string_dtype)
+    msg = "pattern contains no capture groups"
+    with pytest.raises(ValueError, match=msg):
+        s_or_idx.str.extract("[ABC][123]", expand=True)
+
+    # only non-capturing groups
+    with pytest.raises(ValueError, match=msg):
+        s_or_idx.str.extract("(?:[AB]).*", expand=True)
+
+
+def test_extract_expand_True_single_capture_group(index_or_series, any_string_dtype):
+    # single group renames series/index properly
+    s_or_idx = index_or_series(["A1", "A2"], dtype=any_string_dtype)
+    result = s_or_idx.str.extract(r"(?P<uno>A)\d", expand=True)
+    expected = DataFrame({"uno": ["A", "A"]}, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("name", [None, "series_name"])
+def test_extract_series(name, any_string_dtype):
+    # extract should give the same result whether or not the series has a name.
+    s = Series(["A1", "B2", "C3"], name=name, dtype=any_string_dtype)
+
+    # one group, no matches
+    result = s.str.extract("(_)", expand=True)
+    expected = DataFrame([np.nan, np.nan, np.nan], dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    # two groups, no matches
+    result = s.str.extract("(_)(_)", expand=True)
+    expected = DataFrame(
+        [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one group, some matches
+    result = s.str.extract("([AB])[123]", expand=True)
+    expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    # two groups, some matches
+    result = s.str.extract("([AB])([123])", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one named group
+    result = s.str.extract("(?P<letter>[AB])", expand=True)
+    expected = DataFrame({"letter": ["A", "B", np.nan]}, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    # two named groups
+    result = s.str.extract("(?P<letter>[AB])(?P<number>[123])", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # mix named and unnamed groups
+    result = s.str.extract("([AB])(?P<number>[123])", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]],
+        columns=[0, "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one normal group, one non-capturing group
+    result = s.str.extract("([AB])(?:[123])", expand=True)
+    expected = DataFrame(["A", "B", np.nan], dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_optional_groups(any_string_dtype):
+    # two normal groups, one non-capturing group
+    s = Series(["A11", "B22", "C33"], dtype=any_string_dtype)
+    result = s.str.extract("([AB])([123])(?:[123])", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one optional group followed by one normal group
+    s = Series(["A1", "B2", "3"], dtype=any_string_dtype)
+    result = s.str.extract("(?P<letter>[AB])?(?P<number>[123])", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], [np.nan, "3"]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one normal group followed by one optional group
+    s = Series(["A1", "B2", "C"], dtype=any_string_dtype)
+    result = s.str.extract("(?P<letter>[ABC])(?P<number>[123])?", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], ["C", np.nan]],
+        columns=["letter", "number"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_dataframe_capture_groups_index(index, any_string_dtype):
+    # GH6348
+    # not passing index to the extractor
+
+    data = ["A1", "B2", "C"]
+
+    if len(index) < len(data):
+        pytest.skip(f"Index needs more than {len(data)} values")
+
+    index = index[: len(data)]
+    s = Series(data, index=index, dtype=any_string_dtype)
+
+    result = s.str.extract(r"(\d)", expand=True)
+    expected = DataFrame(["1", "2", np.nan], index=index, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    result = s.str.extract(r"(?P<letter>\D)(?P<number>\d)?", expand=True)
+    expected = DataFrame(
+        [["A", "1"], ["B", "2"], ["C", np.nan]],
+        columns=["letter", "number"],
+        index=index,
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extract_single_group_returns_frame(any_string_dtype):
+    # GH11386 extract should always return DataFrame, even when
+    # there is only one group. Prior to v0.18.0, extract returned
+    # Series when there was only one group in the regex.
+    s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)
+    result = s.str.extract(r"(?P<letter>[a-z])", expand=True)
+    expected = DataFrame({"letter": ["a", "b", "c"]}, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extractall(any_string_dtype):
+    data = [
+        "dave@google.com",
+        "tdhock5@gmail.com",
+        "maudelaperriere@gmail.com",
+        "rob@gmail.com some text steve@gmail.com",
+        "a@b.com some text c@d.com and e@f.com",
+        np.nan,
+        "",
+    ]
+    expected_tuples = [
+        ("dave", "google", "com"),
+        ("tdhock5", "gmail", "com"),
+        ("maudelaperriere", "gmail", "com"),
+        ("rob", "gmail", "com"),
+        ("steve", "gmail", "com"),
+        ("a", "b", "com"),
+        ("c", "d", "com"),
+        ("e", "f", "com"),
+    ]
+    pat = r"""
+    (?P<user>[a-z0-9]+)
+    @
+    (?P<domain>[a-z]+)
+    \.
+    (?P<tld>[a-z]{2,4})
+    """
+    expected_columns = ["user", "domain", "tld"]
+    s = Series(data, dtype=any_string_dtype)
+    # extractall should return a DataFrame with one row for each match, indexed by the
+    # subject from which the match came.
+    expected_index = MultiIndex.from_tuples(
+        [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)],
+        names=(None, "match"),
+    )
+    expected = DataFrame(
+        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
+    )
+    result = s.str.extractall(pat, flags=re.VERBOSE)
+    tm.assert_frame_equal(result, expected)
+
+    # The index of the input Series should be used to construct the index of the output
+    # DataFrame:
+    mi = MultiIndex.from_tuples(
+        [
+            ("single", "Dave"),
+            ("single", "Toby"),
+            ("single", "Maude"),
+            ("multiple", "robAndSteve"),
+            ("multiple", "abcdef"),
+            ("none", "missing"),
+            ("none", "empty"),
+        ]
+    )
+    s = Series(data, index=mi, dtype=any_string_dtype)
+    expected_index = MultiIndex.from_tuples(
+        [
+            ("single", "Dave", 0),
+            ("single", "Toby", 0),
+            ("single", "Maude", 0),
+            ("multiple", "robAndSteve", 0),
+            ("multiple", "robAndSteve", 1),
+            ("multiple", "abcdef", 0),
+            ("multiple", "abcdef", 1),
+            ("multiple", "abcdef", 2),
+        ],
+        names=(None, None, "match"),
+    )
+    expected = DataFrame(
+        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
+    )
+    result = s.str.extractall(pat, flags=re.VERBOSE)
+    tm.assert_frame_equal(result, expected)
+
+    # MultiIndexed subject with names.
+    s = Series(data, index=mi, dtype=any_string_dtype)
+    s.index.names = ("matches", "description")
+    expected_index.names = ("matches", "description", "match")
+    expected = DataFrame(
+        expected_tuples, expected_index, expected_columns, dtype=any_string_dtype
+    )
+    result = s.str.extractall(pat, flags=re.VERBOSE)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "pat,expected_names",
+    [
+        # optional groups.
+        ("(?P<letter>[AB])?(?P<number>[123])", ["letter", "number"]),
+        # only one of two groups has a name.
+        ("([AB])?(?P<number>[123])", [0, "number"]),
+    ],
+)
+def test_extractall_column_names(pat, expected_names, any_string_dtype):
+    s = Series(["", "A1", "32"], dtype=any_string_dtype)
+
+    result = s.str.extractall(pat)
+    expected = DataFrame(
+        [("A", "1"), (np.nan, "3"), (np.nan, "2")],
+        index=MultiIndex.from_tuples([(1, 0), (2, 0), (2, 1)], names=(None, "match")),
+        columns=expected_names,
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extractall_single_group(any_string_dtype):
+    s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)
+    expected_index = MultiIndex.from_tuples(
+        [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")
+    )
+
+    # extractall(one named group) returns DataFrame with one named column.
+    result = s.str.extractall(r"(?P<letter>[a-z])")
+    expected = DataFrame(
+        {"letter": ["a", "b", "d", "c"]}, index=expected_index, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # extractall(one un-named group) returns DataFrame with one un-named column.
+    result = s.str.extractall(r"([a-z])")
+    expected = DataFrame(
+        ["a", "b", "d", "c"], index=expected_index, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extractall_single_group_with_quantifier(any_string_dtype):
+    # GH#13382
+    # extractall(one un-named group with quantifier) returns DataFrame with one un-named
+    # column.
+    s = Series(["ab3", "abc3", "d4cd2"], name="series_name", dtype=any_string_dtype)
+    result = s.str.extractall(r"([a-z]+)")
+    expected = DataFrame(
+        ["ab", "abc", "d", "cd"],
+        index=MultiIndex.from_tuples(
+            [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")
+        ),
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data, names",
+    [
+        ([], (None,)),
+        ([], ("i1",)),
+        ([], (None, "i2")),
+        ([], ("i1", "i2")),
+        (["a3", "b3", "d4c2"], (None,)),
+        (["a3", "b3", "d4c2"], ("i1", "i2")),
+        (["a3", "b3", "d4c2"], (None, "i2")),
+        (["a3", "b3", "d4c2"], ("i1", "i2")),
+    ],
+)
+def test_extractall_no_matches(data, names, any_string_dtype):
+    # GH19075 extractall with no matches should return a valid MultiIndex
+    n = len(data)
+    if len(names) == 1:
+        index = Index(range(n), name=names[0])
+    else:
+        tuples = (tuple([i] * (n - 1)) for i in range(n))
+        index = MultiIndex.from_tuples(tuples, names=names)
+    s = Series(data, name="series_name", index=index, dtype=any_string_dtype)
+    expected_index = MultiIndex.from_tuples([], names=(names + ("match",)))
+
+    # one un-named group.
+    result = s.str.extractall("(z)")
+    expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    # two un-named groups.
+    result = s.str.extractall("(z)(z)")
+    expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, expected)
+
+    # one named group.
+    result = s.str.extractall("(?P<first>z)")
+    expected = DataFrame(
+        columns=["first"], index=expected_index, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # two named groups.
+    result = s.str.extractall("(?P<first>z)(?P<second>z)")
+    expected = DataFrame(
+        columns=["first", "second"], index=expected_index, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # one named, one un-named.
+    result = s.str.extractall("(z)(?P<second>z)")
+    expected = DataFrame(
+        columns=[0, "second"], index=expected_index, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extractall_stringindex(any_string_dtype):
+    s = Series(["a1a2", "b1", "c1"], name="xxx", dtype=any_string_dtype)
+    result = s.str.extractall(r"[ab](?P<digit>\d)")
+    expected = DataFrame(
+        {"digit": ["1", "2", "1"]},
+        index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=[None, "match"]),
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+    # index should return the same result as the default index without name thus
+    # index.name doesn't affect to the result
+    if any_string_dtype == "object":
+        for idx in [
+            Index(["a1a2", "b1", "c1"], dtype=object),
+            Index(["a1a2", "b1", "c1"], name="xxx", dtype=object),
+        ]:
+            result = idx.str.extractall(r"[ab](?P<digit>\d)")
+            tm.assert_frame_equal(result, expected)
+
+    s = Series(
+        ["a1a2", "b1", "c1"],
+        name="s_name",
+        index=Index(["XX", "yy", "zz"], name="idx_name"),
+        dtype=any_string_dtype,
+    )
+    result = s.str.extractall(r"[ab](?P<digit>\d)")
+    expected = DataFrame(
+        {"digit": ["1", "2", "1"]},
+        index=MultiIndex.from_tuples(
+            [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"]
+        ),
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_extractall_no_capture_groups_raises(any_string_dtype):
+    # Does not make sense to use extractall with a regex that has no capture groups.
+    # (it returns DataFrame with one column for each capture group)
+    s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)
+    with pytest.raises(ValueError, match="no capture groups"):
+        s.str.extractall(r"[a-z]")
+
+
+def test_extract_index_one_two_groups():
+    s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name")
+    r = s.index.str.extract(r"([A-Z])", expand=True)
+    e = DataFrame(["A", "B", "D"])
+    tm.assert_frame_equal(r, e)
+
+    # Prior to v0.18.0, index.str.extract(regex with one group)
+    # returned Index. With more than one group, extract raised an
+    # error (GH9980). Now extract always returns DataFrame.
+    r = s.index.str.extract(r"(?P<letter>[A-Z])(?P<digit>[0-9])", expand=True)
+    e_list = [("A", "3"), ("B", "3"), ("D", "4")]
+    e = DataFrame(e_list, columns=["letter", "digit"])
+    tm.assert_frame_equal(r, e)
+
+
+def test_extractall_same_as_extract(any_string_dtype):
+    s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)
+
+    pattern_two_noname = r"([a-z])([0-9])"
+    extract_two_noname = s.str.extract(pattern_two_noname, expand=True)
+    has_multi_index = s.str.extractall(pattern_two_noname)
+    no_multi_index = has_multi_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_two_noname, no_multi_index)
+
+    pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"
+    extract_two_named = s.str.extract(pattern_two_named, expand=True)
+    has_multi_index = s.str.extractall(pattern_two_named)
+    no_multi_index = has_multi_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_two_named, no_multi_index)
+
+    pattern_one_named = r"(?P<group_name>[a-z])"
+    extract_one_named = s.str.extract(pattern_one_named, expand=True)
+    has_multi_index = s.str.extractall(pattern_one_named)
+    no_multi_index = has_multi_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_one_named, no_multi_index)
+
+    pattern_one_noname = r"([a-z])"
+    extract_one_noname = s.str.extract(pattern_one_noname, expand=True)
+    has_multi_index = s.str.extractall(pattern_one_noname)
+    no_multi_index = has_multi_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_one_noname, no_multi_index)
+
+
+def test_extractall_same_as_extract_subject_index(any_string_dtype):
+    # same as above tests, but s has an MultiIndex.
+    mi = MultiIndex.from_tuples(
+        [("A", "first"), ("B", "second"), ("C", "third")],
+        names=("capital", "ordinal"),
+    )
+    s = Series(["a3", "b3", "c2"], index=mi, name="series_name", dtype=any_string_dtype)
+
+    pattern_two_noname = r"([a-z])([0-9])"
+    extract_two_noname = s.str.extract(pattern_two_noname, expand=True)
+    has_match_index = s.str.extractall(pattern_two_noname)
+    no_match_index = has_match_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_two_noname, no_match_index)
+
+    pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"
+    extract_two_named = s.str.extract(pattern_two_named, expand=True)
+    has_match_index = s.str.extractall(pattern_two_named)
+    no_match_index = has_match_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_two_named, no_match_index)
+
+    pattern_one_named = r"(?P<group_name>[a-z])"
+    extract_one_named = s.str.extract(pattern_one_named, expand=True)
+    has_match_index = s.str.extractall(pattern_one_named)
+    no_match_index = has_match_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_one_named, no_match_index)
+
+    pattern_one_noname = r"([a-z])"
+    extract_one_noname = s.str.extract(pattern_one_noname, expand=True)
+    has_match_index = s.str.extractall(pattern_one_noname)
+    no_match_index = has_match_index.xs(0, level="match")
+    tm.assert_frame_equal(extract_one_noname, no_match_index)
+
+
+def test_extractall_preserves_dtype():
+    # Ensure that when extractall is called on a series with specific dtypes set, that
+    # the dtype is preserved in the resulting DataFrame's column.
+    pa = pytest.importorskip("pyarrow")
+
+    result = Series(["abc", "ab"], dtype=ArrowDtype(pa.string())).str.extractall("(ab)")
+    assert result.dtypes[0] == "string[pyarrow]"
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_find_replace.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_find_replace.py
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_get_dummies.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_get_dummies.py
@ -0,0 +1,53 @@
+import numpy as np
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    _testing as tm,
+)
+
+
+def test_get_dummies(any_string_dtype):
+    s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype)
+    result = s.str.get_dummies("|")
+    expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc"))
+    tm.assert_frame_equal(result, expected)
+
+    s = Series(["a;b", "a", 7], dtype=any_string_dtype)
+    result = s.str.get_dummies(";")
+    expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_get_dummies_index():
+    # GH9980, GH8028
+    idx = Index(["a|b", "a|c", "b|c"])
+    result = idx.str.get_dummies("|")
+
+    expected = MultiIndex.from_tuples(
+        [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c")
+    )
+    tm.assert_index_equal(result, expected)
+
+
+def test_get_dummies_with_name_dummy(any_string_dtype):
+    # GH 12180
+    # Dummies named 'name' should work as expected
+    s = Series(["a", "b,name", "b"], dtype=any_string_dtype)
+    result = s.str.get_dummies(",")
+    expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_get_dummies_with_name_dummy_index():
+    # GH 12180
+    # Dummies named 'name' should work as expected
+    idx = Index(["a|b", "name|c", "b|name"])
+    result = idx.str.get_dummies("|")
+
+    expected = MultiIndex.from_tuples(
+        [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name")
+    )
+    tm.assert_index_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_split_partition.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_split_partition.py
@ -0,0 +1,734 @@
+from datetime import datetime
+import re
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    _testing as tm,
+)
+from pandas.tests.strings import (
+    _convert_na_value,
+    is_object_or_nan_string_dtype,
+)
+
+
+@pytest.mark.parametrize("method", ["split", "rsplit"])
+def test_split(any_string_dtype, method):
+    values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
+
+    result = getattr(values.str, method)("_")
+    exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
+    exp = _convert_na_value(values, exp)
+    tm.assert_series_equal(result, exp)
+
+
+@pytest.mark.parametrize("method", ["split", "rsplit"])
+def test_split_more_than_one_char(any_string_dtype, method):
+    # more than one char
+    values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"], dtype=any_string_dtype)
+    result = getattr(values.str, method)("__")
+    exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
+    exp = _convert_na_value(values, exp)
+    tm.assert_series_equal(result, exp)
+
+    result = getattr(values.str, method)("__", expand=False)
+    tm.assert_series_equal(result, exp)
+
+
+def test_split_more_regex_split(any_string_dtype):
+    # regex split
+    values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype)
+    result = values.str.split("[,_]")
+    exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
+    exp = _convert_na_value(values, exp)
+    tm.assert_series_equal(result, exp)
+
+
+def test_split_regex(any_string_dtype):
+    # GH 43563
+    # explicit regex = True split
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
+    result = values.str.split(r"\.jpg", regex=True)
+    exp = Series([["xxxjpgzzz", ""]])
+    tm.assert_series_equal(result, exp)
+
+
+def test_split_regex_explicit(any_string_dtype):
+    # explicit regex = True split with compiled regex
+    regex_pat = re.compile(r".jpg")
+    values = Series("xxxjpgzzz.jpg", dtype=any_string_dtype)
+    result = values.str.split(regex_pat)
+    exp = Series([["xx", "zzz", ""]])
+    tm.assert_series_equal(result, exp)
+
+    # explicit regex = False split
+    result = values.str.split(r"\.jpg", regex=False)
+    exp = Series([["xxxjpgzzz.jpg"]])
+    tm.assert_series_equal(result, exp)
+
+    # non explicit regex split, pattern length == 1
+    result = values.str.split(r".")
+    exp = Series([["xxxjpgzzz", "jpg"]])
+    tm.assert_series_equal(result, exp)
+
+    # non explicit regex split, pattern length != 1
+    result = values.str.split(r".jpg")
+    exp = Series([["xx", "zzz", ""]])
+    tm.assert_series_equal(result, exp)
+
+    # regex=False with pattern compiled regex raises error
+    with pytest.raises(
+        ValueError,
+        match="Cannot use a compiled regex as replacement pattern with regex=False",
+    ):
+        values.str.split(regex_pat, regex=False)
+
+
+@pytest.mark.parametrize("expand", [None, False])
+@pytest.mark.parametrize("method", ["split", "rsplit"])
+def test_split_object_mixed(expand, method):
+    mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0])
+    result = getattr(mixed.str, method)("_", expand=expand)
+    exp = Series(
+        [
+            ["a", "b", "c"],
+            np.nan,
+            ["d", "e", "f"],
+            np.nan,
+            np.nan,
+            None,
+            np.nan,
+            np.nan,
+        ]
+    )
+    assert isinstance(result, Series)
+    tm.assert_almost_equal(result, exp)
+
+
+@pytest.mark.parametrize("method", ["split", "rsplit"])
+@pytest.mark.parametrize("n", [None, 0])
+def test_split_n(any_string_dtype, method, n):
+    s = Series(["a b", pd.NA, "b c"], dtype=any_string_dtype)
+    expected = Series([["a", "b"], pd.NA, ["b", "c"]])
+    result = getattr(s.str, method)(" ", n=n)
+    expected = _convert_na_value(s, expected)
+    tm.assert_series_equal(result, expected)
+
+
+def test_rsplit(any_string_dtype):
+    # regex split is not supported by rsplit
+    values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"], dtype=any_string_dtype)
+    result = values.str.rsplit("[,_]")
+    exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]])
+    exp = _convert_na_value(values, exp)
+    tm.assert_series_equal(result, exp)
+
+
+def test_rsplit_max_number(any_string_dtype):
+    # setting max number of splits, make sure it's from reverse
+    values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
+    result = values.str.rsplit("_", n=1)
+    exp = Series([["a_b", "c"], ["c_d", "e"], np.nan, ["f_g", "h"]])
+    exp = _convert_na_value(values, exp)
+    tm.assert_series_equal(result, exp)
+
+
+def test_split_blank_string(any_string_dtype):
+    # expand blank split GH 20067
+    values = Series([""], name="test", dtype=any_string_dtype)
+    result = values.str.split(expand=True)
+    exp = DataFrame([[]], dtype=any_string_dtype)  # NOTE: this is NOT an empty df
+    tm.assert_frame_equal(result, exp)
+
+
+def test_split_blank_string_with_non_empty(any_string_dtype):
+    values = Series(["a b c", "a b", "", " "], name="test", dtype=any_string_dtype)
+    result = values.str.split(expand=True)
+    exp = DataFrame(
+        [
+            ["a", "b", "c"],
+            ["a", "b", None],
+            [None, None, None],
+            [None, None, None],
+        ],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+
+@pytest.mark.parametrize("method", ["split", "rsplit"])
+def test_split_noargs(any_string_dtype, method):
+    # #1859
+    s = Series(["Wes McKinney", "Travis  Oliphant"], dtype=any_string_dtype)
+    result = getattr(s.str, method)()
+    expected = ["Travis", "Oliphant"]
+    assert result[1] == expected
+
+
+@pytest.mark.parametrize(
+    "data, pat",
+    [
+        (["bd asdf jfg", "kjasdflqw asdfnfk"], None),
+        (["bd asdf jfg", "kjasdflqw asdfnfk"], "asdf"),
+        (["bd_asdf_jfg", "kjasdflqw_asdfnfk"], "_"),
+    ],
+)
+@pytest.mark.parametrize("n", [-1, 0])
+def test_split_maxsplit(data, pat, any_string_dtype, n):
+    # re.split 0, str.split -1
+    s = Series(data, dtype=any_string_dtype)
+
+    result = s.str.split(pat=pat, n=n)
+    xp = s.str.split(pat=pat)
+    tm.assert_series_equal(result, xp)
+
+
+@pytest.mark.parametrize(
+    "data, pat, expected",
+    [
+        (
+            ["split once", "split once too!"],
+            None,
+            Series({0: ["split", "once"], 1: ["split", "once too!"]}),
+        ),
+        (
+            ["split_once", "split_once_too!"],
+            "_",
+            Series({0: ["split", "once"], 1: ["split", "once_too!"]}),
+        ),
+    ],
+)
+def test_split_no_pat_with_nonzero_n(data, pat, expected, any_string_dtype):
+    s = Series(data, dtype=any_string_dtype)
+    result = s.str.split(pat=pat, n=1)
+    tm.assert_series_equal(expected, result, check_index_type=False)
+
+
+def test_split_to_dataframe_no_splits(any_string_dtype):
+    s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype)
+    result = s.str.split("_", expand=True)
+    exp = DataFrame({0: Series(["nosplit", "alsonosplit"], dtype=any_string_dtype)})
+    tm.assert_frame_equal(result, exp)
+
+
+def test_split_to_dataframe(any_string_dtype):
+    s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype)
+    result = s.str.split("_", expand=True)
+    exp = DataFrame(
+        {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]},
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+
+def test_split_to_dataframe_unequal_splits(any_string_dtype):
+    s = Series(
+        ["some_unequal_splits", "one_of_these_things_is_not"], dtype=any_string_dtype
+    )
+    result = s.str.split("_", expand=True)
+    exp = DataFrame(
+        {
+            0: ["some", "one"],
+            1: ["unequal", "of"],
+            2: ["splits", "these"],
+            3: [None, "things"],
+            4: [None, "is"],
+            5: [None, "not"],
+        },
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+
+def test_split_to_dataframe_with_index(any_string_dtype):
+    s = Series(
+        ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype
+    )
+    result = s.str.split("_", expand=True)
+    exp = DataFrame(
+        {0: ["some", "with"], 1: ["splits", "index"]},
+        index=["preserve", "me"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+    with pytest.raises(ValueError, match="expand must be"):
+        s.str.split("_", expand="not_a_boolean")
+
+
+def test_split_to_multiindex_expand_no_splits():
+    # https://github.com/pandas-dev/pandas/issues/23677
+
+    idx = Index(["nosplit", "alsonosplit", np.nan])
+    result = idx.str.split("_", expand=True)
+    exp = idx
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 1
+
+
+def test_split_to_multiindex_expand():
+    idx = Index(["some_equal_splits", "with_no_nans", np.nan, None])
+    result = idx.str.split("_", expand=True)
+    exp = MultiIndex.from_tuples(
+        [
+            ("some", "equal", "splits"),
+            ("with", "no", "nans"),
+            [np.nan, np.nan, np.nan],
+            [None, None, None],
+        ]
+    )
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 3
+
+
+def test_split_to_multiindex_expand_unequal_splits():
+    idx = Index(["some_unequal_splits", "one_of_these_things_is_not", np.nan, None])
+    result = idx.str.split("_", expand=True)
+    exp = MultiIndex.from_tuples(
+        [
+            ("some", "unequal", "splits", np.nan, np.nan, np.nan),
+            ("one", "of", "these", "things", "is", "not"),
+            (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan),
+            (None, None, None, None, None, None),
+        ]
+    )
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 6
+
+    with pytest.raises(ValueError, match="expand must be"):
+        idx.str.split("_", expand="not_a_boolean")
+
+
+def test_rsplit_to_dataframe_expand_no_splits(any_string_dtype):
+    s = Series(["nosplit", "alsonosplit"], dtype=any_string_dtype)
+    result = s.str.rsplit("_", expand=True)
+    exp = DataFrame({0: Series(["nosplit", "alsonosplit"])}, dtype=any_string_dtype)
+    tm.assert_frame_equal(result, exp)
+
+
+def test_rsplit_to_dataframe_expand(any_string_dtype):
+    s = Series(["some_equal_splits", "with_no_nans"], dtype=any_string_dtype)
+    result = s.str.rsplit("_", expand=True)
+    exp = DataFrame(
+        {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]},
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+    result = s.str.rsplit("_", expand=True, n=2)
+    exp = DataFrame(
+        {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]},
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+    result = s.str.rsplit("_", expand=True, n=1)
+    exp = DataFrame(
+        {0: ["some_equal", "with_no"], 1: ["splits", "nans"]}, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, exp)
+
+
+def test_rsplit_to_dataframe_expand_with_index(any_string_dtype):
+    s = Series(
+        ["some_splits", "with_index"], index=["preserve", "me"], dtype=any_string_dtype
+    )
+    result = s.str.rsplit("_", expand=True)
+    exp = DataFrame(
+        {0: ["some", "with"], 1: ["splits", "index"]},
+        index=["preserve", "me"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, exp)
+
+
+def test_rsplit_to_multiindex_expand_no_split():
+    idx = Index(["nosplit", "alsonosplit"])
+    result = idx.str.rsplit("_", expand=True)
+    exp = idx
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 1
+
+
+def test_rsplit_to_multiindex_expand():
+    idx = Index(["some_equal_splits", "with_no_nans"])
+    result = idx.str.rsplit("_", expand=True)
+    exp = MultiIndex.from_tuples([("some", "equal", "splits"), ("with", "no", "nans")])
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 3
+
+
+def test_rsplit_to_multiindex_expand_n():
+    idx = Index(["some_equal_splits", "with_no_nans"])
+    result = idx.str.rsplit("_", expand=True, n=1)
+    exp = MultiIndex.from_tuples([("some_equal", "splits"), ("with_no", "nans")])
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == 2
+
+
+def test_split_nan_expand(any_string_dtype):
+    # gh-18450
+    s = Series(["foo,bar,baz", np.nan], dtype=any_string_dtype)
+    result = s.str.split(",", expand=True)
+    exp = DataFrame(
+        [["foo", "bar", "baz"], [np.nan, np.nan, np.nan]], dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, exp)
+
+    # check that these are actually np.nan/pd.NA and not None
+    # TODO see GH 18463
+    # tm.assert_frame_equal does not differentiate
+    if is_object_or_nan_string_dtype(any_string_dtype):
+        assert all(np.isnan(x) for x in result.iloc[1])
+    else:
+        assert all(x is pd.NA for x in result.iloc[1])
+
+
+def test_split_with_name_series(any_string_dtype):
+    # GH 12617
+
+    # should preserve name
+    s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype)
+    res = s.str.split(",")
+    exp = Series([["a", "b"], ["c", "d"]], name="xxx")
+    tm.assert_series_equal(res, exp)
+
+    res = s.str.split(",", expand=True)
+    exp = DataFrame([["a", "b"], ["c", "d"]], dtype=any_string_dtype)
+    tm.assert_frame_equal(res, exp)
+
+
+def test_split_with_name_index():
+    # GH 12617
+    idx = Index(["a,b", "c,d"], name="xxx")
+    res = idx.str.split(",")
+    exp = Index([["a", "b"], ["c", "d"]], name="xxx")
+    assert res.nlevels == 1
+    tm.assert_index_equal(res, exp)
+
+    res = idx.str.split(",", expand=True)
+    exp = MultiIndex.from_tuples([("a", "b"), ("c", "d")])
+    assert res.nlevels == 2
+    tm.assert_index_equal(res, exp)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            [
+                ("a", "__", "b__c"),
+                ("c", "__", "d__e"),
+                np.nan,
+                ("f", "__", "g__h"),
+                None,
+            ],
+        ],
+        [
+            "rpartition",
+            [
+                ("a__b", "__", "c"),
+                ("c__d", "__", "e"),
+                np.nan,
+                ("f__g", "__", "h"),
+                None,
+            ],
+        ],
+    ],
+)
+def test_partition_series_more_than_one_char(method, exp, any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    # more than one char
+    s = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None], dtype=any_string_dtype)
+    result = getattr(s.str, method)("__", expand=False)
+    expected = Series(exp)
+    expected = _convert_na_value(s, expected)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None],
+        ],
+        [
+            "rpartition",
+            [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None],
+        ],
+    ],
+)
+def test_partition_series_none(any_string_dtype, method, exp):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    # None
+    s = Series(["a b c", "c d e", np.nan, "f g h", None], dtype=any_string_dtype)
+    result = getattr(s.str, method)(expand=False)
+    expected = Series(exp)
+    expected = _convert_na_value(s, expected)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            [("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None],
+        ],
+        [
+            "rpartition",
+            [("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None],
+        ],
+    ],
+)
+def test_partition_series_not_split(any_string_dtype, method, exp):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    # Not split
+    s = Series(["abc", "cde", np.nan, "fgh", None], dtype=any_string_dtype)
+    result = getattr(s.str, method)("_", expand=False)
+    expected = Series(exp)
+    expected = _convert_na_value(s, expected)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")],
+        ],
+        [
+            "rpartition",
+            [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")],
+        ],
+    ],
+)
+def test_partition_series_unicode(any_string_dtype, method, exp):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    # unicode
+    s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
+
+    result = getattr(s.str, method)("_", expand=False)
+    expected = Series(exp)
+    expected = _convert_na_value(s, expected)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("method", ["partition", "rpartition"])
+def test_partition_series_stdlib(any_string_dtype, method):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    # compare to standard lib
+    s = Series(["A_B_C", "B_C_D", "E_F_G", "EFGHEF"], dtype=any_string_dtype)
+    result = getattr(s.str, method)("_", expand=False).tolist()
+    assert result == [getattr(v, method)("_") for v in s]
+
+
+@pytest.mark.parametrize(
+    "method, expand, exp, exp_levels",
+    [
+        [
+            "partition",
+            False,
+            np.array(
+                [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],
+                dtype=object,
+            ),
+            1,
+        ],
+        [
+            "rpartition",
+            False,
+            np.array(
+                [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],
+                dtype=object,
+            ),
+            1,
+        ],
+    ],
+)
+def test_partition_index(method, expand, exp, exp_levels):
+    # https://github.com/pandas-dev/pandas/issues/23558
+
+    values = Index(["a_b_c", "c_d_e", "f_g_h", np.nan, None])
+
+    result = getattr(values.str, method)("_", expand=expand)
+    exp = Index(exp)
+    tm.assert_index_equal(result, exp)
+    assert result.nlevels == exp_levels
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            {
+                0: ["a", "c", np.nan, "f", None],
+                1: ["_", "_", np.nan, "_", None],
+                2: ["b_c", "d_e", np.nan, "g_h", None],
+            },
+        ],
+        [
+            "rpartition",
+            {
+                0: ["a_b", "c_d", np.nan, "f_g", None],
+                1: ["_", "_", np.nan, "_", None],
+                2: ["c", "e", np.nan, "h", None],
+            },
+        ],
+    ],
+)
+def test_partition_to_dataframe(any_string_dtype, method, exp):
+    # https://github.com/pandas-dev/pandas/issues/23558
+
+    s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype)
+    result = getattr(s.str, method)("_")
+    expected = DataFrame(
+        exp,
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        [
+            "partition",
+            {
+                0: ["a", "c", np.nan, "f", None],
+                1: ["_", "_", np.nan, "_", None],
+                2: ["b_c", "d_e", np.nan, "g_h", None],
+            },
+        ],
+        [
+            "rpartition",
+            {
+                0: ["a_b", "c_d", np.nan, "f_g", None],
+                1: ["_", "_", np.nan, "_", None],
+                2: ["c", "e", np.nan, "h", None],
+            },
+        ],
+    ],
+)
+def test_partition_to_dataframe_from_series(any_string_dtype, method, exp):
+    # https://github.com/pandas-dev/pandas/issues/23558
+    s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None], dtype=any_string_dtype)
+    result = getattr(s.str, method)("_", expand=True)
+    expected = DataFrame(
+        exp,
+        dtype=any_string_dtype,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_partition_with_name(any_string_dtype):
+    # GH 12617
+
+    s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype)
+    result = s.str.partition(",")
+    expected = DataFrame(
+        {0: ["a", "c"], 1: [",", ","], 2: ["b", "d"]}, dtype=any_string_dtype
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_partition_with_name_expand(any_string_dtype):
+    # GH 12617
+    # should preserve name
+    s = Series(["a,b", "c,d"], name="xxx", dtype=any_string_dtype)
+    result = s.str.partition(",", expand=False)
+    expected = Series([("a", ",", "b"), ("c", ",", "d")], name="xxx")
+    tm.assert_series_equal(result, expected)
+
+
+def test_partition_index_with_name():
+    idx = Index(["a,b", "c,d"], name="xxx")
+    result = idx.str.partition(",")
+    expected = MultiIndex.from_tuples([("a", ",", "b"), ("c", ",", "d")])
+    assert result.nlevels == 3
+    tm.assert_index_equal(result, expected)
+
+
+def test_partition_index_with_name_expand_false():
+    idx = Index(["a,b", "c,d"], name="xxx")
+    # should preserve name
+    result = idx.str.partition(",", expand=False)
+    expected = Index(np.array([("a", ",", "b"), ("c", ",", "d")]), name="xxx")
+    assert result.nlevels == 1
+    tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize("method", ["partition", "rpartition"])
+def test_partition_sep_kwarg(any_string_dtype, method):
+    # GH 22676; depr kwarg "pat" in favor of "sep"
+    s = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
+
+    expected = getattr(s.str, method)(sep="_")
+    result = getattr(s.str, method)("_")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_get():
+    ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"])
+    result = ser.str.split("_").str.get(1)
+    expected = Series(["b", "d", np.nan, "g"], dtype=object)
+    tm.assert_series_equal(result, expected)
+
+
+def test_get_mixed_object():
+    ser = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0])
+    result = ser.str.split("_").str.get(1)
+    expected = Series(
+        ["b", np.nan, "d", np.nan, np.nan, None, np.nan, np.nan], dtype=object
+    )
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("idx", [2, -3])
+def test_get_bounds(idx):
+    ser = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"])
+    result = ser.str.split("_").str.get(idx)
+    expected = Series(["3", "8", np.nan], dtype=object)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "idx, exp", [[2, [3, 3, np.nan, "b"]], [-1, [3, 3, np.nan, np.nan]]]
+)
+def test_get_complex(idx, exp):
+    # GH 20671, getting value not in dict raising `KeyError`
+    ser = Series([(1, 2, 3), [1, 2, 3], {1, 2, 3}, {1: "a", 2: "b", 3: "c"}])
+
+    result = ser.str.get(idx)
+    expected = Series(exp)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("to_type", [tuple, list, np.array])
+def test_get_complex_nested(to_type):
+    ser = Series([to_type([to_type([1, 2])])])
+
+    result = ser.str.get(0)
+    expected = Series([to_type([1, 2])])
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.get(1)
+    expected = Series([np.nan])
+    tm.assert_series_equal(result, expected)
+
+
+def test_get_strings(any_string_dtype):
+    ser = Series(["a", "ab", np.nan, "abc"], dtype=any_string_dtype)
+    result = ser.str.get(2)
+    expected = Series([np.nan, np.nan, np.nan, "c"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_string_array.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_string_array.py
@ -0,0 +1,111 @@
+import numpy as np
+import pytest
+
+from pandas._libs import lib
+
+from pandas import (
+    NA,
+    DataFrame,
+    Series,
+    _testing as tm,
+    option_context,
+)
+
+
+def test_string_array(nullable_string_dtype, any_string_method):
+    method_name, args, kwargs = any_string_method
+
+    data = ["a", "bb", np.nan, "ccc"]
+    a = Series(data, dtype=object)
+    b = Series(data, dtype=nullable_string_dtype)
+
+    if method_name == "decode":
+        with pytest.raises(TypeError, match="a bytes-like object is required"):
+            getattr(b.str, method_name)(*args, **kwargs)
+        return
+
+    expected = getattr(a.str, method_name)(*args, **kwargs)
+    result = getattr(b.str, method_name)(*args, **kwargs)
+
+    if isinstance(expected, Series):
+        if expected.dtype == "object" and lib.is_string_array(
+            expected.dropna().values,
+        ):
+            assert result.dtype == nullable_string_dtype
+            result = result.astype(object)
+
+        elif expected.dtype == "object" and lib.is_bool_array(
+            expected.values, skipna=True
+        ):
+            assert result.dtype == "boolean"
+            expected = expected.astype("boolean")
+
+        elif expected.dtype == "bool":
+            assert result.dtype == "boolean"
+            result = result.astype("bool")
+
+        elif expected.dtype == "float" and expected.isna().any():
+            assert result.dtype == "Int64"
+            result = result.astype("float")
+
+        if expected.dtype == object:
+            # GH#18463
+            expected[expected.isna()] = NA
+
+    elif isinstance(expected, DataFrame):
+        columns = expected.select_dtypes(include="object").columns
+        assert all(result[columns].dtypes == nullable_string_dtype)
+        result[columns] = result[columns].astype(object)
+        with option_context("future.no_silent_downcasting", True):
+            expected[columns] = expected[columns].fillna(NA)  # GH#18463
+
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method,expected",
+    [
+        ("count", [2, None]),
+        ("find", [0, None]),
+        ("index", [0, None]),
+        ("rindex", [2, None]),
+    ],
+)
+def test_string_array_numeric_integer_array(nullable_string_dtype, method, expected):
+    s = Series(["aba", None], dtype=nullable_string_dtype)
+    result = getattr(s.str, method)("a")
+    expected = Series(expected, dtype="Int64")
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method,expected",
+    [
+        ("isdigit", [False, None, True]),
+        ("isalpha", [True, None, False]),
+        ("isalnum", [True, None, True]),
+        ("isnumeric", [False, None, True]),
+    ],
+)
+def test_string_array_boolean_array(nullable_string_dtype, method, expected):
+    s = Series(["a", None, "1"], dtype=nullable_string_dtype)
+    result = getattr(s.str, method)()
+    expected = Series(expected, dtype="boolean")
+    tm.assert_series_equal(result, expected)
+
+
+def test_string_array_extract(nullable_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/30969
+    # Only expand=False & multiple groups was failing
+
+    a = Series(["a1", "b2", "cc"], dtype=nullable_string_dtype)
+    b = Series(["a1", "b2", "cc"], dtype="object")
+    pat = r"(\w)(\d)"
+
+    result = a.str.extract(pat, expand=False)
+    expected = b.str.extract(pat, expand=False)
+    expected = expected.fillna(NA)  # GH#18463
+    assert all(result.dtypes == nullable_string_dtype)
+
+    result = result.astype(object)
+    tm.assert_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/strings/test_strings.py
+++ b/lib/python3.11/site-packages/pandas/tests/strings/test_strings.py
@ -0,0 +1,802 @@
+from datetime import (
+    datetime,
+    timedelta,
+)
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+)
+import pandas._testing as tm
+from pandas.core.strings.accessor import StringMethods
+from pandas.tests.strings import is_object_or_nan_string_dtype
+
+
+@pytest.mark.parametrize("pattern", [0, True, Series(["foo", "bar"])])
+def test_startswith_endswith_non_str_patterns(pattern):
+    # GH3485
+    ser = Series(["foo", "bar"])
+    msg = f"expected a string or tuple, not {type(pattern).__name__}"
+    with pytest.raises(TypeError, match=msg):
+        ser.str.startswith(pattern)
+    with pytest.raises(TypeError, match=msg):
+        ser.str.endswith(pattern)
+
+
+def test_iter_raises():
+    # GH 54173
+    ser = Series(["foo", "bar"])
+    with pytest.raises(TypeError, match="'StringMethods' object is not iterable"):
+        iter(ser.str)
+
+
+# test integer/float dtypes (inferred by constructor) and mixed
+
+
+def test_count(any_string_dtype):
+    ser = Series(["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=any_string_dtype)
+    result = ser.str.count("f[o]+")
+    expected_dtype = (
+        np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64"
+    )
+    expected = Series([1, 2, np.nan, 4], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_count_mixed_object():
+    ser = Series(
+        ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
+        dtype=object,
+    )
+    result = ser.str.count("a")
+    expected = Series([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan])
+    tm.assert_series_equal(result, expected)
+
+
+def test_repeat(any_string_dtype):
+    ser = Series(["a", "b", np.nan, "c", np.nan, "d"], dtype=any_string_dtype)
+
+    result = ser.str.repeat(3)
+    expected = Series(
+        ["aaa", "bbb", np.nan, "ccc", np.nan, "ddd"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.repeat([1, 2, 3, 4, 5, 6])
+    expected = Series(
+        ["a", "bb", np.nan, "cccc", np.nan, "dddddd"], dtype=any_string_dtype
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_repeat_mixed_object():
+    ser = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0])
+    result = ser.str.repeat(3)
+    expected = Series(
+        ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("arg, repeat", [[None, 4], ["b", None]])
+def test_repeat_with_null(any_string_dtype, arg, repeat):
+    # GH: 31632
+    ser = Series(["a", arg], dtype=any_string_dtype)
+    result = ser.str.repeat([3, repeat])
+    expected = Series(["aaa", None], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_empty_str_methods(any_string_dtype):
+    empty_str = empty = Series(dtype=any_string_dtype)
+    empty_inferred_str = Series(dtype="str")
+    if is_object_or_nan_string_dtype(any_string_dtype):
+        empty_int = Series(dtype="int64")
+        empty_bool = Series(dtype=bool)
+    else:
+        empty_int = Series(dtype="Int64")
+        empty_bool = Series(dtype="boolean")
+    empty_object = Series(dtype=object)
+    empty_bytes = Series(dtype=object)
+    empty_df = DataFrame()
+
+    # GH7241
+    # (extract) on empty series
+
+    tm.assert_series_equal(empty_str, empty.str.cat(empty))
+    assert "" == empty.str.cat()
+    tm.assert_series_equal(empty_str, empty.str.title())
+    tm.assert_series_equal(empty_int, empty.str.count("a"))
+    tm.assert_series_equal(empty_bool, empty.str.contains("a"))
+    tm.assert_series_equal(empty_bool, empty.str.startswith("a"))
+    tm.assert_series_equal(empty_bool, empty.str.endswith("a"))
+    tm.assert_series_equal(empty_str, empty.str.lower())
+    tm.assert_series_equal(empty_str, empty.str.upper())
+    tm.assert_series_equal(empty_str, empty.str.replace("a", "b"))
+    tm.assert_series_equal(empty_str, empty.str.repeat(3))
+    tm.assert_series_equal(empty_bool, empty.str.match("^a"))
+    tm.assert_frame_equal(
+        DataFrame(columns=[0], dtype=any_string_dtype),
+        empty.str.extract("()", expand=True),
+    )
+    tm.assert_frame_equal(
+        DataFrame(columns=[0, 1], dtype=any_string_dtype),
+        empty.str.extract("()()", expand=True),
+    )
+    tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False))
+    tm.assert_frame_equal(
+        DataFrame(columns=[0, 1], dtype=any_string_dtype),
+        empty.str.extract("()()", expand=False),
+    )
+    tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies())
+    tm.assert_series_equal(empty_str, empty_str.str.join(""))
+    tm.assert_series_equal(empty_int, empty.str.len())
+    tm.assert_series_equal(empty_object, empty_str.str.findall("a"))
+    tm.assert_series_equal(empty_int, empty.str.find("a"))
+    tm.assert_series_equal(empty_int, empty.str.rfind("a"))
+    tm.assert_series_equal(empty_str, empty.str.pad(42))
+    tm.assert_series_equal(empty_str, empty.str.center(42))
+    tm.assert_series_equal(empty_object, empty.str.split("a"))
+    tm.assert_series_equal(empty_object, empty.str.rsplit("a"))
+    tm.assert_series_equal(empty_object, empty.str.partition("a", expand=False))
+    tm.assert_frame_equal(empty_df, empty.str.partition("a"))
+    tm.assert_series_equal(empty_object, empty.str.rpartition("a", expand=False))
+    tm.assert_frame_equal(empty_df, empty.str.rpartition("a"))
+    tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
+    tm.assert_series_equal(empty_str, empty.str.slice(step=1))
+    tm.assert_series_equal(empty_str, empty.str.strip())
+    tm.assert_series_equal(empty_str, empty.str.lstrip())
+    tm.assert_series_equal(empty_str, empty.str.rstrip())
+    tm.assert_series_equal(empty_str, empty.str.wrap(42))
+    tm.assert_series_equal(empty_str, empty.str.get(0))
+    tm.assert_series_equal(empty_inferred_str, empty_bytes.str.decode("ascii"))
+    tm.assert_series_equal(empty_bytes, empty.str.encode("ascii"))
+    # ismethods should always return boolean (GH 29624)
+    tm.assert_series_equal(empty_bool, empty.str.isalnum())
+    tm.assert_series_equal(empty_bool, empty.str.isalpha())
+    tm.assert_series_equal(empty_bool, empty.str.isdigit())
+    tm.assert_series_equal(empty_bool, empty.str.isspace())
+    tm.assert_series_equal(empty_bool, empty.str.islower())
+    tm.assert_series_equal(empty_bool, empty.str.isupper())
+    tm.assert_series_equal(empty_bool, empty.str.istitle())
+    tm.assert_series_equal(empty_bool, empty.str.isnumeric())
+    tm.assert_series_equal(empty_bool, empty.str.isdecimal())
+    tm.assert_series_equal(empty_str, empty.str.capitalize())
+    tm.assert_series_equal(empty_str, empty.str.swapcase())
+    tm.assert_series_equal(empty_str, empty.str.normalize("NFC"))
+
+    table = str.maketrans("a", "b")
+    tm.assert_series_equal(empty_str, empty.str.translate(table))
+
+
+@pytest.mark.parametrize(
+    "method, expected",
+    [
+        ("isalnum", [True, True, True, True, True, False, True, True, False, False]),
+        ("isalpha", [True, True, True, False, False, False, True, False, False, False]),
+        (
+            "isdigit",
+            [False, False, False, True, False, False, False, True, False, False],
+        ),
+        (
+            "isnumeric",
+            [False, False, False, True, False, False, False, True, False, False],
+        ),
+        (
+            "isspace",
+            [False, False, False, False, False, False, False, False, False, True],
+        ),
+        (
+            "islower",
+            [False, True, False, False, False, False, False, False, False, False],
+        ),
+        (
+            "isupper",
+            [True, False, False, False, True, False, True, False, False, False],
+        ),
+        (
+            "istitle",
+            [True, False, True, False, True, False, False, False, False, False],
+        ),
+    ],
+)
+def test_ismethods(method, expected, any_string_dtype):
+    ser = Series(
+        ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", "  "], dtype=any_string_dtype
+    )
+    expected_dtype = (
+        "bool" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
+    )
+    expected = Series(expected, dtype=expected_dtype)
+    result = getattr(ser.str, method)()
+    tm.assert_series_equal(result, expected)
+
+    # compare with standard library
+    expected_stdlib = [getattr(item, method)() for item in ser]
+    assert list(result) == expected_stdlib
+
+    # with missing value
+    ser.iloc[[1, 2, 3, 4]] = np.nan
+    result = getattr(ser.str, method)()
+    if ser.dtype == "object":
+        expected = expected.astype(object)
+        expected.iloc[[1, 2, 3, 4]] = np.nan
+    elif ser.dtype == "str":
+        # NaN propagates as False
+        expected.iloc[[1, 2, 3, 4]] = False
+    else:
+        # nullable dtypes propagate NaN
+        expected.iloc[[1, 2, 3, 4]] = np.nan
+
+
+@pytest.mark.parametrize(
+    "method, expected",
+    [
+        ("isnumeric", [False, True, True, False, True, True, False]),
+        ("isdecimal", [False, True, False, False, False, True, False]),
+    ],
+)
+def test_isnumeric_unicode(method, expected, any_string_dtype):
+    # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
+    # 0x2605: ★ not number
+    # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
+    # 0xFF13: ３ Em 3  # noqa: RUF003
+    ser = Series(
+        ["A", "3", "¼", "★", "፸", "３", "four"], dtype=any_string_dtype  # noqa: RUF001
+    )
+    expected_dtype = (
+        "bool" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
+    )
+    expected = Series(expected, dtype=expected_dtype)
+    result = getattr(ser.str, method)()
+    tm.assert_series_equal(result, expected)
+
+    # compare with standard library
+    expected = [getattr(item, method)() for item in ser]
+    assert list(result) == expected
+
+
+@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
+@pytest.mark.parametrize(
+    "method, expected",
+    [
+        ("isnumeric", [False, np.nan, True, False, np.nan, True, False]),
+        ("isdecimal", [False, np.nan, False, False, np.nan, True, False]),
+    ],
+)
+def test_isnumeric_unicode_missing(method, expected, any_string_dtype):
+    values = ["A", np.nan, "¼", "★", np.nan, "３", "four"]  # noqa: RUF001
+    ser = Series(values, dtype=any_string_dtype)
+    if any_string_dtype == "str":
+        # NaN propagates as False
+        expected = Series(expected, dtype=object).fillna(False).astype(bool)
+    else:
+        expected_dtype = (
+            "object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
+        )
+        expected = Series(expected, dtype=expected_dtype)
+    result = getattr(ser.str, method)()
+    tm.assert_series_equal(result, expected)
+
+
+def test_spilt_join_roundtrip(any_string_dtype):
+    ser = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"], dtype=any_string_dtype)
+    result = ser.str.split("_").str.join("_")
+    expected = ser.astype(object)
+    tm.assert_series_equal(result, expected)
+
+
+def test_spilt_join_roundtrip_mixed_object():
+    ser = Series(
+        ["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0]
+    )
+    result = ser.str.split("_").str.join("_")
+    expected = Series(
+        ["a_b", np.nan, "asdf_cas_asdf", np.nan, np.nan, "foo", None, np.nan, np.nan],
+        dtype=object,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_len(any_string_dtype):
+    ser = Series(
+        ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"],
+        dtype=any_string_dtype,
+    )
+    result = ser.str.len()
+    expected_dtype = (
+        "float64" if is_object_or_nan_string_dtype(any_string_dtype) else "Int64"
+    )
+    expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_len_mixed():
+    ser = Series(
+        ["a_b", np.nan, "asdf_cas_asdf", True, datetime.today(), "foo", None, 1, 2.0]
+    )
+    result = ser.str.len()
+    expected = Series([3, np.nan, 13, np.nan, np.nan, 3, np.nan, np.nan, np.nan])
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method,sub,start,end,expected",
+    [
+        ("index", "EF", None, None, [4, 3, 1, 0]),
+        ("rindex", "EF", None, None, [4, 5, 7, 4]),
+        ("index", "EF", 3, None, [4, 3, 7, 4]),
+        ("rindex", "EF", 3, None, [4, 5, 7, 4]),
+        ("index", "E", 4, 8, [4, 5, 7, 4]),
+        ("rindex", "E", 0, 5, [4, 3, 1, 4]),
+    ],
+)
+def test_index(method, sub, start, end, index_or_series, any_string_dtype, expected):
+    obj = index_or_series(
+        ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype
+    )
+    expected_dtype = (
+        np.int64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64"
+    )
+    expected = index_or_series(expected, dtype=expected_dtype)
+
+    result = getattr(obj.str, method)(sub, start, end)
+
+    if index_or_series is Series:
+        tm.assert_series_equal(result, expected)
+    else:
+        tm.assert_index_equal(result, expected)
+
+    # compare with standard library
+    expected = [getattr(item, method)(sub, start, end) for item in obj]
+    assert list(result) == expected
+
+
+def test_index_not_found_raises(index_or_series, any_string_dtype):
+    obj = index_or_series(
+        ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"], dtype=any_string_dtype
+    )
+    with pytest.raises(ValueError, match="substring not found"):
+        obj.str.index("DE")
+
+
+@pytest.mark.parametrize("method", ["index", "rindex"])
+def test_index_wrong_type_raises(index_or_series, any_string_dtype, method):
+    obj = index_or_series([], dtype=any_string_dtype)
+    msg = "expected a string object, not int"
+
+    with pytest.raises(TypeError, match=msg):
+        getattr(obj.str, method)(0)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["index", [1, 1, 0]],
+        ["rindex", [3, 1, 2]],
+    ],
+)
+def test_index_missing(any_string_dtype, method, exp):
+    ser = Series(["abcb", "ab", "bcbe", np.nan], dtype=any_string_dtype)
+    expected_dtype = (
+        np.float64 if is_object_or_nan_string_dtype(any_string_dtype) else "Int64"
+    )
+
+    result = getattr(ser.str, method)("b")
+    expected = Series(exp + [np.nan], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_pipe_failures(any_string_dtype):
+    # #2119
+    ser = Series(["A|B|C"], dtype=any_string_dtype)
+
+    result = ser.str.split("|")
+    expected = Series([["A", "B", "C"]], dtype=object)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.replace("|", " ", regex=False)
+    expected = Series(["A B C"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "start, stop, step, expected",
+    [
+        (2, 5, None, ["foo", "bar", np.nan, "baz"]),
+        (0, 3, -1, ["", "", np.nan, ""]),
+        (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
+        (None, 2, -1, ["owtoo", "owtra", np.nan, "xuqza"]),
+        (3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
+        (3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
+    ],
+)
+def test_slice(start, stop, step, expected, any_string_dtype):
+    ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype)
+    result = ser.str.slice(start, stop, step)
+    expected = Series(expected, dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "start, stop, step, expected",
+    [
+        (2, 5, None, ["foo", np.nan, "bar", np.nan, np.nan, None, np.nan, np.nan]),
+        (4, 1, -1, ["oof", np.nan, "rab", np.nan, np.nan, None, np.nan, np.nan]),
+    ],
+)
+def test_slice_mixed_object(start, stop, step, expected):
+    ser = Series(["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0])
+    result = ser.str.slice(start, stop, step)
+    expected = Series(expected, dtype=object)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "start,stop,repl,expected",
+    [
+        (2, 3, None, ["shrt", "a it longer", "evnlongerthanthat", "", np.nan]),
+        (2, 3, "z", ["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]),
+        (2, 2, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]),
+        (2, 1, "z", ["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]),
+        (-1, None, "z", ["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]),
+        (None, -2, "z", ["zrt", "zer", "zat", "z", np.nan]),
+        (6, 8, "z", ["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]),
+        (-10, 3, "z", ["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]),
+    ],
+)
+def test_slice_replace(start, stop, repl, expected, any_string_dtype):
+    ser = Series(
+        ["short", "a bit longer", "evenlongerthanthat", "", np.nan],
+        dtype=any_string_dtype,
+    )
+    expected = Series(expected, dtype=any_string_dtype)
+    result = ser.str.slice_replace(start, stop, repl)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["strip", ["aa", "bb", np.nan, "cc"]],
+        ["lstrip", ["aa   ", "bb \n", np.nan, "cc  "]],
+        ["rstrip", ["  aa", " bb", np.nan, "cc"]],
+    ],
+)
+def test_strip_lstrip_rstrip(any_string_dtype, method, exp):
+    ser = Series(["  aa   ", " bb \n", np.nan, "cc  "], dtype=any_string_dtype)
+
+    result = getattr(ser.str, method)()
+    expected = Series(exp, dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["strip", ["aa", np.nan, "bb"]],
+        ["lstrip", ["aa  ", np.nan, "bb \t\n"]],
+        ["rstrip", ["  aa", np.nan, " bb"]],
+    ],
+)
+def test_strip_lstrip_rstrip_mixed_object(method, exp):
+    ser = Series(["  aa  ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0])
+
+    result = getattr(ser.str, method)()
+    expected = Series(exp + [np.nan, np.nan, None, np.nan, np.nan], dtype=object)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method, exp",
+    [
+        ["strip", ["ABC", " BNSD", "LDFJH "]],
+        ["lstrip", ["ABCxx", " BNSD", "LDFJH xx"]],
+        ["rstrip", ["xxABC", "xx BNSD", "LDFJH "]],
+    ],
+)
+def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp):
+    ser = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype)
+
+    result = getattr(ser.str, method)("x")
+    expected = Series(exp, dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
+)
+def test_removeprefix(any_string_dtype, prefix, expected):
+    ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
+    result = ser.str.removeprefix(prefix)
+    ser_expected = Series(expected, dtype=any_string_dtype)
+    tm.assert_series_equal(result, ser_expected)
+
+
+@pytest.mark.parametrize(
+    "suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
+)
+def test_removesuffix(any_string_dtype, suffix, expected):
+    ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
+    result = ser.str.removesuffix(suffix)
+    ser_expected = Series(expected, dtype=any_string_dtype)
+    tm.assert_series_equal(result, ser_expected)
+
+
+def test_string_slice_get_syntax(any_string_dtype):
+    ser = Series(
+        ["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", np.nan, "CYYYBYYY", "dog", "cYYYt"],
+        dtype=any_string_dtype,
+    )
+
+    result = ser.str[0]
+    expected = ser.str.get(0)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str[:3]
+    expected = ser.str.slice(stop=3)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str[2::-1]
+    expected = ser.str.slice(start=2, step=-1)
+    tm.assert_series_equal(result, expected)
+
+
+def test_string_slice_out_of_bounds_nested():
+    ser = Series([(1, 2), (1,), (3, 4, 5)])
+    result = ser.str[1]
+    expected = Series([2, np.nan, 4])
+    tm.assert_series_equal(result, expected)
+
+
+def test_string_slice_out_of_bounds(any_string_dtype):
+    ser = Series(["foo", "b", "ba"], dtype=any_string_dtype)
+    result = ser.str[1]
+    expected = Series(["o", np.nan, "a"], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_encode_decode(any_string_dtype):
+    ser = Series(["a", "b", "a\xe4"], dtype=any_string_dtype).str.encode("utf-8")
+    result = ser.str.decode("utf-8")
+    expected = Series(["a", "b", "a\xe4"], dtype="str")
+    tm.assert_series_equal(result, expected)
+
+
+def test_encode_errors_kwarg(any_string_dtype):
+    ser = Series(["a", "b", "a\x9d"], dtype=any_string_dtype)
+
+    msg = (
+        r"'charmap' codec can't encode character '\\x9d' in position 1: "
+        "character maps to <undefined>"
+    )
+    with pytest.raises(UnicodeEncodeError, match=msg):
+        ser.str.encode("cp1252")
+
+    result = ser.str.encode("cp1252", "ignore")
+    expected = ser.map(lambda x: x.encode("cp1252", "ignore"))
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_errors_kwarg():
+    ser = Series([b"a", b"b", b"a\x9d"])
+
+    msg = (
+        "'charmap' codec can't decode byte 0x9d in position 1: "
+        "character maps to <undefined>"
+    )
+    with pytest.raises(UnicodeDecodeError, match=msg):
+        ser.str.decode("cp1252")
+
+    result = ser.str.decode("cp1252", "ignore")
+    expected = ser.map(lambda x: x.decode("cp1252", "ignore")).astype("str")
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_string_dtype(string_dtype):
+    # https://github.com/pandas-dev/pandas/pull/60940
+    ser = Series([b"a", b"b"])
+    result = ser.str.decode("utf-8", dtype=string_dtype)
+    expected = Series(["a", "b"], dtype=string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_object_dtype(object_dtype):
+    # https://github.com/pandas-dev/pandas/pull/60940
+    ser = Series([b"a", rb"\ud800"])
+    result = ser.str.decode("utf-8", dtype=object_dtype)
+    expected = Series(["a", r"\ud800"], dtype=object_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_decode_bad_dtype():
+    # https://github.com/pandas-dev/pandas/pull/60940
+    ser = Series([b"a", b"b"])
+    msg = "dtype must be string or object, got dtype='int64'"
+    with pytest.raises(ValueError, match=msg):
+        ser.str.decode("utf-8", dtype="int64")
+
+
+@pytest.mark.parametrize(
+    "form, expected",
+    [
+        ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]),
+        ("NFC", ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"]),  # noqa: RUF001
+    ],
+)
+def test_normalize(form, expected, any_string_dtype):
+    ser = Series(
+        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],  # noqa: RUF001
+        index=["a", "b", "c", "d", "e"],
+        dtype=any_string_dtype,
+    )
+    expected = Series(expected, index=["a", "b", "c", "d", "e"], dtype=any_string_dtype)
+    result = ser.str.normalize(form)
+    tm.assert_series_equal(result, expected)
+
+
+def test_normalize_bad_arg_raises(any_string_dtype):
+    ser = Series(
+        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],  # noqa: RUF001
+        index=["a", "b", "c", "d", "e"],
+        dtype=any_string_dtype,
+    )
+    with pytest.raises(ValueError, match="invalid normalization form"):
+        ser.str.normalize("xxx")
+
+
+def test_normalize_index():
+    idx = Index(["ＡＢＣ", "１２３", "ｱｲｴ"])  # noqa: RUF001
+    expected = Index(["ABC", "123", "アイエ"])
+    result = idx.str.normalize("NFKC")
+    tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "values,inferred_type",
+    [
+        (["a", "b"], "string"),
+        (["a", "b", 1], "mixed-integer"),
+        (["a", "b", 1.3], "mixed"),
+        (["a", "b", 1.3, 1], "mixed-integer"),
+        (["aa", datetime(2011, 1, 1)], "mixed"),
+    ],
+)
+def test_index_str_accessor_visibility(values, inferred_type, index_or_series):
+    obj = index_or_series(values)
+    if index_or_series is Index:
+        assert obj.inferred_type == inferred_type
+
+    assert isinstance(obj.str, StringMethods)
+
+
+@pytest.mark.parametrize(
+    "values,inferred_type",
+    [
+        ([1, np.nan], "floating"),
+        ([datetime(2011, 1, 1)], "datetime64"),
+        ([timedelta(1)], "timedelta64"),
+    ],
+)
+def test_index_str_accessor_non_string_values_raises(
+    values, inferred_type, index_or_series
+):
+    obj = index_or_series(values)
+    if index_or_series is Index:
+        assert obj.inferred_type == inferred_type
+
+    msg = "Can only use .str accessor with string values"
+    with pytest.raises(AttributeError, match=msg):
+        obj.str
+
+
+def test_index_str_accessor_multiindex_raises():
+    # MultiIndex has mixed dtype, but not allow to use accessor
+    idx = MultiIndex.from_tuples([("a", "b"), ("a", "b")])
+    assert idx.inferred_type == "mixed"
+
+    msg = "Can only use .str accessor with Index, not MultiIndex"
+    with pytest.raises(AttributeError, match=msg):
+        idx.str
+
+
+def test_str_accessor_no_new_attributes(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/10673
+    ser = Series(list("aabbcde"), dtype=any_string_dtype)
+    with pytest.raises(AttributeError, match="You cannot add any new attribute"):
+        ser.str.xlabel = "a"
+
+
+def test_cat_on_bytes_raises():
+    lhs = Series(np.array(list("abc"), "S1").astype(object))
+    rhs = Series(np.array(list("def"), "S1").astype(object))
+    msg = "Cannot use .str.cat with values of inferred dtype 'bytes'"
+    with pytest.raises(TypeError, match=msg):
+        lhs.str.cat(rhs)
+
+
+def test_str_accessor_in_apply_func():
+    # https://github.com/pandas-dev/pandas/issues/38979
+    df = DataFrame(zip("abc", "def"))
+    expected = Series(["A/D", "B/E", "C/F"])
+    result = df.apply(lambda f: "/".join(f.str.upper()), axis=1)
+    tm.assert_series_equal(result, expected)
+
+
+def test_zfill():
+    # https://github.com/pandas-dev/pandas/issues/20868
+    value = Series(["-1", "1", "1000", 10, np.nan])
+    expected = Series(["-01", "001", "1000", np.nan, np.nan], dtype=object)
+    tm.assert_series_equal(value.str.zfill(3), expected)
+
+    value = Series(["-2", "+5"])
+    expected = Series(["-0002", "+0005"])
+    tm.assert_series_equal(value.str.zfill(5), expected)
+
+
+def test_zfill_with_non_integer_argument():
+    value = Series(["-2", "+5"])
+    wid = "a"
+    msg = f"width must be of integer type, not {type(wid).__name__}"
+    with pytest.raises(TypeError, match=msg):
+        value.str.zfill(wid)
+
+
+def test_zfill_with_leading_sign():
+    value = Series(["-cat", "-1", "+dog"])
+    expected = Series(["-0cat", "-0001", "+0dog"])
+    tm.assert_series_equal(value.str.zfill(5), expected)
+
+
+def test_get_with_dict_label():
+    # GH47911
+    s = Series(
+        [
+            {"name": "Hello", "value": "World"},
+            {"name": "Goodbye", "value": "Planet"},
+            {"value": "Sea"},
+        ]
+    )
+    result = s.str.get("name")
+    expected = Series(["Hello", "Goodbye", None], dtype=object)
+    tm.assert_series_equal(result, expected)
+    result = s.str.get("value")
+    expected = Series(["World", "Planet", "Sea"], dtype=object)
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_str_decode():
+    # GH 22613
+    result = Series([b"x", b"y"]).str.decode(encoding="UTF-8", errors="strict")
+    expected = Series(["x", "y"], dtype="str")
+    tm.assert_series_equal(result, expected)
+
+
+def test_reversed_logical_ops(any_string_dtype):
+    # GH#60234
+    dtype = any_string_dtype
+    warn = None if dtype == object else DeprecationWarning
+    left = Series([True, False, False, True])
+    right = Series(["", "", "b", "c"], dtype=dtype)
+
+    msg = "operations between boolean dtype and"
+    with tm.assert_produces_warning(warn, match=msg):
+        result = left | right
+    expected = left | right.astype(bool)
+    tm.assert_series_equal(result, expected)
+
+    with tm.assert_produces_warning(warn, match=msg):
+        result = left & right
+    expected = left & right.astype(bool)
+    tm.assert_series_equal(result, expected)
+
+    with tm.assert_produces_warning(warn, match=msg):
+        result = left ^ right
+    expected = left ^ right.astype(bool)
+    tm.assert_series_equal(result, expected)