done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/tests/io/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/conftest.py
@ -0,0 +1,225 @@
+import shlex
+import subprocess
+import time
+import uuid
+
+import pytest
+
+from pandas.compat import (
+    is_ci_environment,
+    is_platform_arm,
+    is_platform_mac,
+    is_platform_windows,
+)
+import pandas.util._test_decorators as td
+
+import pandas.io.common as icom
+from pandas.io.parsers import read_csv
+
+
+@pytest.fixture
+def compression_to_extension():
+    return {value: key for key, value in icom.extension_to_compression.items()}
+
+
+@pytest.fixture
+def tips_file(datapath):
+    """Path to the tips dataset"""
+    return datapath("io", "data", "csv", "tips.csv")
+
+
+@pytest.fixture
+def jsonl_file(datapath):
+    """Path to a JSONL dataset"""
+    return datapath("io", "parser", "data", "items.jsonl")
+
+
+@pytest.fixture
+def salaries_table(datapath):
+    """DataFrame with the salaries dataset"""
+    return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
+
+
+@pytest.fixture
+def feather_file(datapath):
+    return datapath("io", "data", "feather", "feather-0_3_1.feather")
+
+
+@pytest.fixture
+def xml_file(datapath):
+    return datapath("io", "data", "xml", "books.xml")
+
+
+@pytest.fixture
+def s3_base(worker_id, monkeypatch):
+    """
+    Fixture for mocking S3 interaction.
+
+    Sets up moto server in separate process locally
+    Return url for motoserver/moto CI service
+    """
+    pytest.importorskip("s3fs")
+    pytest.importorskip("boto3")
+
+    # temporary workaround as moto fails for botocore >= 1.11 otherwise,
+    # see https://github.com/spulec/moto/issues/1924 & 1952
+    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
+    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
+    if is_ci_environment():
+        if is_platform_arm() or is_platform_mac() or is_platform_windows():
+            # NOT RUN on Windows/macOS, only Ubuntu
+            # - subprocess in CI can cause timeouts
+            # - GitHub Actions do not support
+            #   container services for the above OSs
+            pytest.skip(
+                "S3 tests do not have a corresponding service on "
+                "Windows or macOS platforms"
+            )
+        else:
+            # set in .github/workflows/unit-tests.yml
+            yield "http://localhost:5000"
+    else:
+        requests = pytest.importorskip("requests")
+        pytest.importorskip("moto")
+        pytest.importorskip("flask")  # server mode needs flask too
+
+        # Launching moto in server mode, i.e., as a separate process
+        # with an S3 endpoint on localhost
+
+        worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
+        endpoint_port = f"555{worker_id}"
+        endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"
+
+        # pipe to null to avoid logging in terminal
+        with subprocess.Popen(
+            shlex.split(f"moto_server s3 -p {endpoint_port}"),
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        ) as proc:
+            timeout = 5
+            while timeout > 0:
+                try:
+                    # OK to go once server is accepting connections
+                    r = requests.get(endpoint_uri)
+                    if r.ok:
+                        break
+                except Exception:
+                    pass
+                timeout -= 0.1
+                time.sleep(0.1)
+            yield endpoint_uri
+
+            proc.terminate()
+
+
+@pytest.fixture
+def s3so(s3_base):
+    return {"client_kwargs": {"endpoint_url": s3_base}}
+
+
+@pytest.fixture
+def s3_resource(s3_base):
+    import boto3
+
+    s3 = boto3.resource("s3", endpoint_url=s3_base)
+    return s3
+
+
+@pytest.fixture
+def s3_public_bucket(s3_resource):
+    bucket = s3_resource.Bucket(f"pandas-test-{uuid.uuid4()}")
+    bucket.create()
+    yield bucket
+    bucket.objects.delete()
+    bucket.delete()
+
+
+@pytest.fixture
+def s3_public_bucket_with_data(
+    s3_public_bucket, tips_file, jsonl_file, feather_file, xml_file
+):
+    """
+    The following datasets
+    are loaded.
+
+    - tips.csv
+    - tips.csv.gz
+    - tips.csv.bz2
+    - items.jsonl
+    """
+    test_s3_files = [
+        ("tips#1.csv", tips_file),
+        ("tips.csv", tips_file),
+        ("tips.csv.gz", tips_file + ".gz"),
+        ("tips.csv.bz2", tips_file + ".bz2"),
+        ("items.jsonl", jsonl_file),
+        ("simple_dataset.feather", feather_file),
+        ("books.xml", xml_file),
+    ]
+    for s3_key, file_name in test_s3_files:
+        with open(file_name, "rb") as f:
+            s3_public_bucket.put_object(Key=s3_key, Body=f)
+    return s3_public_bucket
+
+
+@pytest.fixture
+def s3_private_bucket(s3_resource):
+    bucket = s3_resource.Bucket(f"cant_get_it-{uuid.uuid4()}")
+    bucket.create(ACL="private")
+    yield bucket
+    bucket.objects.delete()
+    bucket.delete()
+
+
+@pytest.fixture
+def s3_private_bucket_with_data(
+    s3_private_bucket, tips_file, jsonl_file, feather_file, xml_file
+):
+    """
+    The following datasets
+    are loaded.
+
+    - tips.csv
+    - tips.csv.gz
+    - tips.csv.bz2
+    - items.jsonl
+    """
+    test_s3_files = [
+        ("tips#1.csv", tips_file),
+        ("tips.csv", tips_file),
+        ("tips.csv.gz", tips_file + ".gz"),
+        ("tips.csv.bz2", tips_file + ".bz2"),
+        ("items.jsonl", jsonl_file),
+        ("simple_dataset.feather", feather_file),
+        ("books.xml", xml_file),
+    ]
+    for s3_key, file_name in test_s3_files:
+        with open(file_name, "rb") as f:
+            s3_private_bucket.put_object(Key=s3_key, Body=f)
+    return s3_private_bucket
+
+
+_compression_formats_params = [
+    (".no_compress", None),
+    ("", None),
+    (".gz", "gzip"),
+    (".GZ", "gzip"),
+    (".bz2", "bz2"),
+    (".BZ2", "bz2"),
+    (".zip", "zip"),
+    (".ZIP", "zip"),
+    (".xz", "xz"),
+    (".XZ", "xz"),
+    pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
+    pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
+]
+
+
+@pytest.fixture(params=_compression_formats_params[1:])
+def compression_format(request):
+    return request.param
+
+
+@pytest.fixture(params=_compression_formats_params)
+def compression_ext(request):
+    return request.param[0]
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_odf.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_odf.py
@ -0,0 +1,77 @@
+import functools
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_windows
+
+import pandas as pd
+import pandas._testing as tm
+
+pytest.importorskip("odf")
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+@pytest.fixture(autouse=True)
+def cd_and_set_engine(monkeypatch, datapath):
+    func = functools.partial(pd.read_excel, engine="odf")
+    monkeypatch.setattr(pd, "read_excel", func)
+    monkeypatch.chdir(datapath("io", "data", "excel"))
+
+
+def test_read_invalid_types_raises():
+    # the invalid_value_type.ods required manually editing
+    # of the included content.xml file
+    with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"):
+        pd.read_excel("invalid_value_type.ods")
+
+
+def test_read_writer_table():
+    # Also test reading tables from an text OpenDocument file
+    # (.odt)
+    index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header")
+    expected = pd.DataFrame(
+        [[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]],
+        index=index,
+        columns=["Column 1", "Unnamed: 2", "Column 3"],
+    )
+
+    result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_newlines_between_xml_elements_table():
+    # GH#45598
+    expected = pd.DataFrame(
+        [[1.0, 4.0, 7], [np.nan, np.nan, 8], [3.0, 6.0, 9]],
+        columns=["Column 1", "Column 2", "Column 3"],
+    )
+
+    result = pd.read_excel("test_newlines.ods")
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_unempty_cells():
+    expected = pd.DataFrame(
+        [1, np.nan, 3, np.nan, 5],
+        columns=["Column 1"],
+    )
+
+    result = pd.read_excel("test_unempty_cells.ods")
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_cell_annotation():
+    expected = pd.DataFrame(
+        ["test", np.nan, "test 3"],
+        columns=["Column 1"],
+    )
+
+    result = pd.read_excel("test_cell_annotation.ods")
+
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_odswriter.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_odswriter.py
@ -0,0 +1,106 @@
+from datetime import (
+    date,
+    datetime,
+)
+import re
+
+import pytest
+
+from pandas.compat import is_platform_windows
+
+import pandas as pd
+import pandas._testing as tm
+
+from pandas.io.excel import ExcelWriter
+
+odf = pytest.importorskip("odf")
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+@pytest.fixture
+def ext():
+    return ".ods"
+
+
+def test_write_append_mode_raises(ext):
+    msg = "Append mode is not supported with odf!"
+
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=msg):
+            ExcelWriter(f, engine="odf", mode="a")
+
+
+@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
+def test_engine_kwargs(ext, engine_kwargs):
+    # GH 42286
+    # GH 43445
+    # test for error: OpenDocumentSpreadsheet does not accept any arguments
+    with tm.ensure_clean(ext) as f:
+        if engine_kwargs is not None:
+            error = re.escape(
+                "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
+            )
+            with pytest.raises(
+                TypeError,
+                match=error,
+            ):
+                ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
+        else:
+            with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
+                pass
+
+
+def test_book_and_sheets_consistent(ext):
+    # GH#45687 - Ensure sheets is updated if user modifies book
+    with tm.ensure_clean(ext) as f:
+        with ExcelWriter(f) as writer:
+            assert writer.sheets == {}
+            table = odf.table.Table(name="test_name")
+            writer.book.spreadsheet.addElement(table)
+            assert writer.sheets == {"test_name": table}
+
+
+@pytest.mark.parametrize(
+    ["value", "cell_value_type", "cell_value_attribute", "cell_value"],
+    argvalues=[
+        (True, "boolean", "boolean-value", "true"),
+        ("test string", "string", "string-value", "test string"),
+        (1, "float", "value", "1"),
+        (1.5, "float", "value", "1.5"),
+        (
+            datetime(2010, 10, 10, 10, 10, 10),
+            "date",
+            "date-value",
+            "2010-10-10T10:10:10",
+        ),
+        (date(2010, 10, 10), "date", "date-value", "2010-10-10"),
+    ],
+)
+def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value):
+    # GH#54994 ODS: cell attributes should follow specification
+    # http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
+    from odf.namespaces import OFFICENS
+    from odf.table import (
+        TableCell,
+        TableRow,
+    )
+
+    table_cell_name = TableCell().qname
+
+    with tm.ensure_clean(ext) as f:
+        pd.DataFrame([[value]]).to_excel(f, header=False, index=False)
+
+        with pd.ExcelFile(f) as wb:
+            sheet = wb._reader.get_sheet_by_index(0)
+            sheet_rows = sheet.getElementsByType(TableRow)
+            sheet_cells = [
+                x
+                for x in sheet_rows[0].childNodes
+                if hasattr(x, "qname") and x.qname == table_cell_name
+            ]
+
+            cell = sheet_cells[0]
+            assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
+            assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_openpyxl.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_openpyxl.py
@ -0,0 +1,432 @@
+import contextlib
+from pathlib import Path
+import re
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_windows
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.excel import (
+    ExcelWriter,
+    _OpenpyxlWriter,
+)
+from pandas.io.excel._openpyxl import OpenpyxlReader
+
+openpyxl = pytest.importorskip("openpyxl")
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+@pytest.fixture
+def ext():
+    return ".xlsx"
+
+
+def test_to_excel_styleconverter():
+    from openpyxl import styles
+
+    hstyle = {
+        "font": {"color": "00FF0000", "bold": True},
+        "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
+        "alignment": {"horizontal": "center", "vertical": "top"},
+        "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
+        "number_format": {"format_code": "0.00"},
+        "protection": {"locked": True, "hidden": False},
+    }
+
+    font_color = styles.Color("00FF0000")
+    font = styles.Font(bold=True, color=font_color)
+    side = styles.Side(style=styles.borders.BORDER_THIN)
+    border = styles.Border(top=side, right=side, bottom=side, left=side)
+    alignment = styles.Alignment(horizontal="center", vertical="top")
+    fill_color = styles.Color(rgb="006666FF", tint=0.3)
+    fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
+
+    number_format = "0.00"
+
+    protection = styles.Protection(locked=True, hidden=False)
+
+    kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
+    assert kw["font"] == font
+    assert kw["border"] == border
+    assert kw["alignment"] == alignment
+    assert kw["fill"] == fill
+    assert kw["number_format"] == number_format
+    assert kw["protection"] == protection
+
+
+def test_write_cells_merge_styled(ext):
+    from pandas.io.formats.excel import ExcelCell
+
+    sheet_name = "merge_styled"
+
+    sty_b1 = {"font": {"color": "00FF0000"}}
+    sty_a2 = {"font": {"color": "0000FF00"}}
+
+    initial_cells = [
+        ExcelCell(col=1, row=0, val=42, style=sty_b1),
+        ExcelCell(col=0, row=1, val=99, style=sty_a2),
+    ]
+
+    sty_merged = {"font": {"color": "000000FF", "bold": True}}
+    sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
+    openpyxl_sty_merged = sty_kwargs["font"]
+    merge_cells = [
+        ExcelCell(
+            col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
+        )
+    ]
+
+    with tm.ensure_clean(ext) as path:
+        with _OpenpyxlWriter(path) as writer:
+            writer._write_cells(initial_cells, sheet_name=sheet_name)
+            writer._write_cells(merge_cells, sheet_name=sheet_name)
+
+            wks = writer.sheets[sheet_name]
+        xcell_b1 = wks["B1"]
+        xcell_a2 = wks["A2"]
+        assert xcell_b1.font == openpyxl_sty_merged
+        assert xcell_a2.font == openpyxl_sty_merged
+
+
+@pytest.mark.parametrize("iso_dates", [True, False])
+def test_engine_kwargs_write(ext, iso_dates):
+    # GH 42286 GH 43445
+    engine_kwargs = {"iso_dates": iso_dates}
+    with tm.ensure_clean(ext) as f:
+        with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
+            assert writer.book.iso_dates == iso_dates
+            # ExcelWriter won't allow us to close without writing something
+            DataFrame().to_excel(writer)
+
+
+def test_engine_kwargs_append_invalid(ext):
+    # GH 43445
+    # test whether an invalid engine kwargs actually raises
+    with tm.ensure_clean(ext) as f:
+        DataFrame(["hello", "world"]).to_excel(f)
+        with pytest.raises(
+            TypeError,
+            match=re.escape(
+                "load_workbook() got an unexpected keyword argument 'apple_banana'"
+            ),
+        ):
+            with ExcelWriter(
+                f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
+            ) as writer:
+                # ExcelWriter needs us to write something to close properly
+                DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
+
+
+@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
+def test_engine_kwargs_append_data_only(ext, data_only, expected):
+    # GH 43445
+    # tests whether the data_only engine_kwarg actually works well for
+    # openpyxl's load_workbook
+    with tm.ensure_clean(ext) as f:
+        DataFrame(["=1+1"]).to_excel(f)
+        with ExcelWriter(
+            f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
+        ) as writer:
+            assert writer.sheets["Sheet1"]["B2"].value == expected
+            # ExcelWriter needs us to writer something to close properly?
+            DataFrame().to_excel(writer, sheet_name="Sheet2")
+
+        # ensure that data_only also works for reading
+        #  and that formulas/values roundtrip
+        assert (
+            pd.read_excel(
+                f,
+                sheet_name="Sheet1",
+                engine="openpyxl",
+                engine_kwargs={"data_only": data_only},
+            ).iloc[0, 1]
+            == expected
+        )
+
+
+@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
+@pytest.mark.parametrize("kwarg_value", [True, False])
+def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
+    # GH 55027
+    # test that `read_only` and `data_only` can be passed to
+    #  `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
+    filename = datapath("io", "data", "excel", "test1" + ext)
+    with contextlib.closing(
+        OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
+    ) as reader:
+        assert getattr(reader.book, kwarg_name) == kwarg_value
+
+
+@pytest.mark.parametrize(
+    "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
+)
+def test_write_append_mode(ext, mode, expected):
+    df = DataFrame([1], columns=["baz"])
+
+    with tm.ensure_clean(ext) as f:
+        wb = openpyxl.Workbook()
+        wb.worksheets[0].title = "foo"
+        wb.worksheets[0]["A1"].value = "foo"
+        wb.create_sheet("bar")
+        wb.worksheets[1]["A1"].value = "bar"
+        wb.save(f)
+
+        with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
+            df.to_excel(writer, sheet_name="baz", index=False)
+
+        with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
+            result = [sheet.title for sheet in wb2.worksheets]
+            assert result == expected
+
+            for index, cell_value in enumerate(expected):
+                assert wb2.worksheets[index]["A1"].value == cell_value
+
+
+@pytest.mark.parametrize(
+    "if_sheet_exists,num_sheets,expected",
+    [
+        ("new", 2, ["apple", "banana"]),
+        ("replace", 1, ["pear"]),
+        ("overlay", 1, ["pear", "banana"]),
+    ],
+)
+def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
+    # GH 40230
+    df1 = DataFrame({"fruit": ["apple", "banana"]})
+    df2 = DataFrame({"fruit": ["pear"]})
+
+    with tm.ensure_clean(ext) as f:
+        df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
+        with ExcelWriter(
+            f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
+        ) as writer:
+            df2.to_excel(writer, sheet_name="foo", index=False)
+
+        with contextlib.closing(openpyxl.load_workbook(f)) as wb:
+            assert len(wb.sheetnames) == num_sheets
+            assert wb.sheetnames[0] == "foo"
+            result = pd.read_excel(wb, "foo", engine="openpyxl")
+            assert list(result["fruit"]) == expected
+            if len(wb.sheetnames) == 2:
+                result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
+                tm.assert_frame_equal(result, df2)
+
+
+@pytest.mark.parametrize(
+    "startrow, startcol, greeting, goodbye",
+    [
+        (0, 0, ["poop", "world"], ["goodbye", "people"]),
+        (0, 1, ["hello", "world"], ["poop", "people"]),
+        (1, 0, ["hello", "poop"], ["goodbye", "people"]),
+        (1, 1, ["hello", "world"], ["goodbye", "poop"]),
+    ],
+)
+def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
+    df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
+    df2 = DataFrame(["poop"])
+
+    with tm.ensure_clean(ext) as f:
+        df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
+        with ExcelWriter(
+            f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
+        ) as writer:
+            # use startrow+1 because we don't have a header
+            df2.to_excel(
+                writer,
+                index=False,
+                header=False,
+                startrow=startrow + 1,
+                startcol=startcol,
+                sheet_name="poo",
+            )
+
+        result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
+        expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "if_sheet_exists,msg",
+    [
+        (
+            "invalid",
+            "'invalid' is not valid for if_sheet_exists. Valid options "
+            "are 'error', 'new', 'replace' and 'overlay'.",
+        ),
+        (
+            "error",
+            "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
+        ),
+        (
+            None,
+            "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
+        ),
+    ],
+)
+def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
+    # GH 40230
+    df = DataFrame({"fruit": ["pear"]})
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            df.to_excel(f, sheet_name="foo", engine="openpyxl")
+            with ExcelWriter(
+                f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
+            ) as writer:
+                df.to_excel(writer, sheet_name="foo")
+
+
+def test_to_excel_with_openpyxl_engine(ext):
+    # GH 29854
+    with tm.ensure_clean(ext) as filename:
+        df1 = DataFrame({"A": np.linspace(1, 10, 10)})
+        df2 = DataFrame({"B": np.linspace(1, 20, 10)})
+        df = pd.concat([df1, df2], axis=1)
+        styled = df.style.map(
+            lambda val: f"color: {'red' if val < 0 else 'black'}"
+        ).highlight_max()
+
+        styled.to_excel(filename, engine="openpyxl")
+
+
+@pytest.mark.parametrize("read_only", [True, False])
+def test_read_workbook(datapath, ext, read_only):
+    # GH 39528
+    filename = datapath("io", "data", "excel", "test1" + ext)
+    with contextlib.closing(
+        openpyxl.load_workbook(filename, read_only=read_only)
+    ) as wb:
+        result = pd.read_excel(wb, engine="openpyxl")
+    expected = pd.read_excel(filename)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "header, expected_data",
+    [
+        (
+            0,
+            {
+                "Title": [np.nan, "A", 1, 2, 3],
+                "Unnamed: 1": [np.nan, "B", 4, 5, 6],
+                "Unnamed: 2": [np.nan, "C", 7, 8, 9],
+            },
+        ),
+        (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
+    ],
+)
+@pytest.mark.parametrize(
+    "filename", ["dimension_missing", "dimension_small", "dimension_large"]
+)
+# When read_only is None, use read_excel instead of a workbook
+@pytest.mark.parametrize("read_only", [True, False, None])
+def test_read_with_bad_dimension(
+    datapath, ext, header, expected_data, filename, read_only
+):
+    # GH 38956, 39001 - no/incorrect dimension information
+    path = datapath("io", "data", "excel", f"{filename}{ext}")
+    if read_only is None:
+        result = pd.read_excel(path, header=header)
+    else:
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl", header=header)
+    expected = DataFrame(expected_data)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_append_mode_file(ext):
+    # GH 39576
+    df = DataFrame()
+
+    with tm.ensure_clean(ext) as f:
+        df.to_excel(f, engine="openpyxl")
+
+        with ExcelWriter(
+            f, mode="a", engine="openpyxl", if_sheet_exists="new"
+        ) as writer:
+            df.to_excel(writer)
+
+        # make sure that zip files are not concatenated by making sure that
+        # "docProps/app.xml" only occurs twice in the file
+        data = Path(f).read_bytes()
+        first = data.find(b"docProps/app.xml")
+        second = data.find(b"docProps/app.xml", first + 1)
+        third = data.find(b"docProps/app.xml", second + 1)
+        assert second != -1 and third == -1
+
+
+# When read_only is None, use read_excel instead of a workbook
+@pytest.mark.parametrize("read_only", [True, False, None])
+def test_read_with_empty_trailing_rows(datapath, ext, read_only):
+    # GH 39181
+    path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
+    if read_only is None:
+        result = pd.read_excel(path)
+    else:
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl")
+    expected = DataFrame(
+        {
+            "Title": [np.nan, "A", 1, 2, 3],
+            "Unnamed: 1": [np.nan, "B", 4, 5, 6],
+            "Unnamed: 2": [np.nan, "C", 7, 8, 9],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+# When read_only is None, use read_excel instead of a workbook
+@pytest.mark.parametrize("read_only", [True, False, None])
+def test_read_empty_with_blank_row(datapath, ext, read_only):
+    # GH 39547 - empty excel file with a row that has no data
+    path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
+    if read_only is None:
+        result = pd.read_excel(path)
+    else:
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl")
+    expected = DataFrame()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_book_and_sheets_consistent(ext):
+    # GH#45687 - Ensure sheets is updated if user modifies book
+    with tm.ensure_clean(ext) as f:
+        with ExcelWriter(f, engine="openpyxl") as writer:
+            assert writer.sheets == {}
+            sheet = writer.book.create_sheet("test_name", 0)
+            assert writer.sheets == {"test_name": sheet}
+
+
+def test_ints_spelled_with_decimals(datapath, ext):
+    # GH 46988 - openpyxl returns this sheet with floats
+    path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
+    result = pd.read_excel(path)
+    expected = DataFrame(range(2, 12), columns=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_multiindex_header_no_index_names(datapath, ext):
+    # GH#47487
+    path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
+    result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
+    expected = DataFrame(
+        [[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
+        columns=pd.MultiIndex.from_tuples(
+            [("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
+        ),
+        index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
+    )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_readers.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_readers.py
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_style.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_style.py
@ -0,0 +1,298 @@
+import contextlib
+import time
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_windows
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    read_excel,
+)
+import pandas._testing as tm
+
+from pandas.io.excel import ExcelWriter
+from pandas.io.formats.excel import ExcelFormatter
+
+pytest.importorskip("jinja2")
+# jinja2 is currently required for Styler.__init__(). Technically Styler.to_excel
+# could compute styles and render to excel without jinja2, since there is no
+# 'template' file, but this needs the import error to delayed until render time.
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+def assert_equal_cell_styles(cell1, cell2):
+    # TODO: should find a better way to check equality
+    assert cell1.alignment.__dict__ == cell2.alignment.__dict__
+    assert cell1.border.__dict__ == cell2.border.__dict__
+    assert cell1.fill.__dict__ == cell2.fill.__dict__
+    assert cell1.font.__dict__ == cell2.font.__dict__
+    assert cell1.number_format == cell2.number_format
+    assert cell1.protection.__dict__ == cell2.protection.__dict__
+
+
+@pytest.mark.parametrize(
+    "engine",
+    ["xlsxwriter", "openpyxl"],
+)
+def test_styler_to_excel_unstyled(engine):
+    # compare DataFrame.to_excel and Styler.to_excel when no styles applied
+    pytest.importorskip(engine)
+    df = DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
+    with tm.ensure_clean(".xlsx") as path:
+        with ExcelWriter(path, engine=engine) as writer:
+            df.to_excel(writer, sheet_name="dataframe")
+            df.style.to_excel(writer, sheet_name="unstyled")
+
+        openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
+                assert len(col1) == len(col2)
+                for cell1, cell2 in zip(col1, col2):
+                    assert cell1.value == cell2.value
+                    assert_equal_cell_styles(cell1, cell2)
+
+
+shared_style_params = [
+    (
+        "background-color: #111222",
+        ["fill", "fgColor", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    (
+        "color: #111222",
+        ["font", "color", "value"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    ("font-family: Arial;", ["font", "name"], "arial"),
+    ("font-weight: bold;", ["font", "b"], True),
+    ("font-style: italic;", ["font", "i"], True),
+    ("text-decoration: underline;", ["font", "u"], "single"),
+    ("number-format: $??,???.00;", ["number_format"], "$??,???.00"),
+    ("text-align: left;", ["alignment", "horizontal"], "left"),
+    (
+        "vertical-align: bottom;",
+        ["alignment", "vertical"],
+        {"xlsxwriter": None, "openpyxl": "bottom"},  # xlsxwriter Fails
+    ),
+    ("vertical-align: middle;", ["alignment", "vertical"], "center"),
+    # Border widths
+    ("border-left: 2pt solid red", ["border", "left", "style"], "medium"),
+    ("border-left: 1pt dotted red", ["border", "left", "style"], "dotted"),
+    ("border-left: 2pt dotted red", ["border", "left", "style"], "mediumDashDotDot"),
+    ("border-left: 1pt dashed red", ["border", "left", "style"], "dashed"),
+    ("border-left: 2pt dashed red", ["border", "left", "style"], "mediumDashed"),
+    ("border-left: 1pt solid red", ["border", "left", "style"], "thin"),
+    ("border-left: 3pt solid red", ["border", "left", "style"], "thick"),
+    # Border expansion
+    (
+        "border-left: 2pt solid #111222",
+        ["border", "left", "color", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    ("border: 1pt solid red", ["border", "top", "style"], "thin"),
+    (
+        "border: 1pt solid #111222",
+        ["border", "top", "color", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    ("border: 1pt solid red", ["border", "right", "style"], "thin"),
+    (
+        "border: 1pt solid #111222",
+        ["border", "right", "color", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    ("border: 1pt solid red", ["border", "bottom", "style"], "thin"),
+    (
+        "border: 1pt solid #111222",
+        ["border", "bottom", "color", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    ("border: 1pt solid red", ["border", "left", "style"], "thin"),
+    (
+        "border: 1pt solid #111222",
+        ["border", "left", "color", "rgb"],
+        {"xlsxwriter": "FF111222", "openpyxl": "00111222"},
+    ),
+    # Border styles
+    (
+        "border-left-style: hair; border-left-color: black",
+        ["border", "left", "style"],
+        "hair",
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "engine",
+    ["xlsxwriter", "openpyxl"],
+)
+@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
+def test_styler_to_excel_basic(engine, css, attrs, expected):
+    pytest.importorskip(engine)
+    df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
+    styler = df.style.map(lambda x: css)
+
+    with tm.ensure_clean(".xlsx") as path:
+        with ExcelWriter(path, engine=engine) as writer:
+            df.to_excel(writer, sheet_name="dataframe")
+            styler.to_excel(writer, sheet_name="styled")
+
+        openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            # test unstyled data cell does not have expected styles
+            # test styled cell has expected styles
+            u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
+        for attr in attrs:
+            u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
+
+        if isinstance(expected, dict):
+            assert u_cell is None or u_cell != expected[engine]
+            assert s_cell == expected[engine]
+        else:
+            assert u_cell is None or u_cell != expected
+            assert s_cell == expected
+
+
+@pytest.mark.parametrize(
+    "engine",
+    ["xlsxwriter", "openpyxl"],
+)
+@pytest.mark.parametrize("css, attrs, expected", shared_style_params)
+def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
+    pytest.importorskip(engine)
+    df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
+
+    styler = df.style
+    styler.map_index(lambda x: css, axis=0)
+    styler.map_index(lambda x: css, axis=1)
+
+    null_styler = df.style
+    null_styler.map(lambda x: "null: css;")
+    null_styler.map_index(lambda x: "null: css;", axis=0)
+    null_styler.map_index(lambda x: "null: css;", axis=1)
+
+    with tm.ensure_clean(".xlsx") as path:
+        with ExcelWriter(path, engine=engine) as writer:
+            null_styler.to_excel(writer, sheet_name="null_styled")
+            styler.to_excel(writer, sheet_name="styled")
+
+        openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            # test null styled index cells does not have expected styles
+            # test styled cell has expected styles
+            ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
+            uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
+        for attr in attrs:
+            ui_cell, si_cell = getattr(ui_cell, attr, None), getattr(si_cell, attr)
+            uc_cell, sc_cell = getattr(uc_cell, attr, None), getattr(sc_cell, attr)
+
+        if isinstance(expected, dict):
+            assert ui_cell is None or ui_cell != expected[engine]
+            assert si_cell == expected[engine]
+            assert uc_cell is None or uc_cell != expected[engine]
+            assert sc_cell == expected[engine]
+        else:
+            assert ui_cell is None or ui_cell != expected
+            assert si_cell == expected
+            assert uc_cell is None or uc_cell != expected
+            assert sc_cell == expected
+
+
+# From https://openpyxl.readthedocs.io/en/stable/api/openpyxl.styles.borders.html
+# Note: Leaving behavior of "width"-type styles undefined; user should use border-width
+# instead
+excel_border_styles = [
+    # "thin",
+    "dashed",
+    "mediumDashDot",
+    "dashDotDot",
+    "hair",
+    "dotted",
+    "mediumDashDotDot",
+    # "medium",
+    "double",
+    "dashDot",
+    "slantDashDot",
+    # "thick",
+    "mediumDashed",
+]
+
+
+@pytest.mark.parametrize(
+    "engine",
+    ["xlsxwriter", "openpyxl"],
+)
+@pytest.mark.parametrize("border_style", excel_border_styles)
+def test_styler_to_excel_border_style(engine, border_style):
+    css = f"border-left: {border_style} black thin"
+    attrs = ["border", "left", "style"]
+    expected = border_style
+
+    pytest.importorskip(engine)
+    df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
+    styler = df.style.map(lambda x: css)
+
+    with tm.ensure_clean(".xlsx") as path:
+        with ExcelWriter(path, engine=engine) as writer:
+            df.to_excel(writer, sheet_name="dataframe")
+            styler.to_excel(writer, sheet_name="styled")
+
+        openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            # test unstyled data cell does not have expected styles
+            # test styled cell has expected styles
+            u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
+        for attr in attrs:
+            u_cell, s_cell = getattr(u_cell, attr, None), getattr(s_cell, attr)
+
+        if isinstance(expected, dict):
+            assert u_cell is None or u_cell != expected[engine]
+            assert s_cell == expected[engine]
+        else:
+            assert u_cell is None or u_cell != expected
+            assert s_cell == expected
+
+
+def test_styler_custom_converter():
+    openpyxl = pytest.importorskip("openpyxl")
+
+    def custom_converter(css):
+        return {"font": {"color": {"rgb": "111222"}}}
+
+    df = DataFrame(np.random.default_rng(2).standard_normal((1, 1)))
+    styler = df.style.map(lambda x: "color: #888999")
+    with tm.ensure_clean(".xlsx") as path:
+        with ExcelWriter(path, engine="openpyxl") as writer:
+            ExcelFormatter(styler, style_converter=custom_converter).write(
+                writer, sheet_name="custom"
+            )
+
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            assert wb["custom"].cell(2, 2).font.color.value == "00111222"
+
+
+@pytest.mark.single_cpu
+@td.skip_if_not_us_locale
+def test_styler_to_s3(s3_public_bucket, s3so):
+    # GH#46381
+
+    mock_bucket_name, target_file = s3_public_bucket.name, "test.xlsx"
+    df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
+    styler = df.style.set_sticky(axis="index")
+    styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
+    timeout = 5
+    while True:
+        if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
+            break
+        time.sleep(0.1)
+        timeout -= 0.1
+        assert timeout > 0, "Timed out waiting for file to appear on moto"
+        result = read_excel(
+            f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
+        )
+        tm.assert_frame_equal(result, df)
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_writers.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_writers.py
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_xlrd.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_xlrd.py
@ -0,0 +1,76 @@
+import io
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_windows
+
+import pandas as pd
+import pandas._testing as tm
+
+from pandas.io.excel import ExcelFile
+from pandas.io.excel._base import inspect_excel_format
+
+xlrd = pytest.importorskip("xlrd")
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+@pytest.fixture(params=[".xls"])
+def read_ext_xlrd(request):
+    """
+    Valid extensions for reading Excel files with xlrd.
+
+    Similar to read_ext, but excludes .ods, .xlsb, and for xlrd>2 .xlsx, .xlsm
+    """
+    return request.param
+
+
+def test_read_xlrd_book(read_ext_xlrd, datapath):
+    engine = "xlrd"
+    sheet_name = "Sheet1"
+    pth = datapath("io", "data", "excel", "test1.xls")
+    with xlrd.open_workbook(pth) as book:
+        with ExcelFile(book, engine=engine) as xl:
+            result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
+
+        expected = pd.read_excel(
+            book, sheet_name=sheet_name, engine=engine, index_col=0
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_xlsx_fails(datapath):
+    # GH 29375
+    from xlrd.biffh import XLRDError
+
+    path = datapath("io", "data", "excel", "test1.xlsx")
+    with pytest.raises(XLRDError, match="Excel xlsx file; not supported"):
+        pd.read_excel(path, engine="xlrd")
+
+
+def test_nan_in_xls(datapath):
+    # GH 54564
+    path = datapath("io", "data", "excel", "test6.xls")
+
+    expected = pd.DataFrame({0: np.r_[0, 2].astype("int64"), 1: np.r_[1, np.nan]})
+
+    result = pd.read_excel(path, header=None)
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "file_header",
+    [
+        b"\x09\x00\x04\x00\x07\x00\x10\x00",
+        b"\x09\x02\x06\x00\x00\x00\x10\x00",
+        b"\x09\x04\x06\x00\x00\x00\x10\x00",
+        b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1",
+    ],
+)
+def test_read_old_xls_files(file_header):
+    # GH 41226
+    f = io.BytesIO(file_header)
+    assert inspect_excel_format(f) == "xls"
--- a/lib/python3.11/site-packages/pandas/tests/io/excel/test_xlsxwriter.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/excel/test_xlsxwriter.py
@ -0,0 +1,86 @@
+import contextlib
+
+import pytest
+
+from pandas.compat import is_platform_windows
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.excel import ExcelWriter
+
+xlsxwriter = pytest.importorskip("xlsxwriter")
+
+if is_platform_windows():
+    pytestmark = pytest.mark.single_cpu
+
+
+@pytest.fixture
+def ext():
+    return ".xlsx"
+
+
+def test_column_format(ext):
+    # Test that column formats are applied to cells. Test for issue #9167.
+    # Applicable to xlsxwriter only.
+    openpyxl = pytest.importorskip("openpyxl")
+
+    with tm.ensure_clean(ext) as path:
+        frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})
+
+        with ExcelWriter(path) as writer:
+            frame.to_excel(writer)
+
+            # Add a number format to col B and ensure it is applied to cells.
+            num_format = "#,##0"
+            write_workbook = writer.book
+            write_worksheet = write_workbook.worksheets()[0]
+            col_format = write_workbook.add_format({"num_format": num_format})
+            write_worksheet.set_column("B:B", None, col_format)
+
+        with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook:
+            try:
+                read_worksheet = read_workbook["Sheet1"]
+            except TypeError:
+                # compat
+                read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
+
+        # Get the number format from the cell.
+        try:
+            cell = read_worksheet["B2"]
+        except TypeError:
+            # compat
+            cell = read_worksheet.cell("B2")
+
+        try:
+            read_num_format = cell.number_format
+        except AttributeError:
+            read_num_format = cell.style.number_format._format_code
+
+        assert read_num_format == num_format
+
+
+def test_write_append_mode_raises(ext):
+    msg = "Append mode is not supported with xlsxwriter!"
+
+    with tm.ensure_clean(ext) as f:
+        with pytest.raises(ValueError, match=msg):
+            ExcelWriter(f, engine="xlsxwriter", mode="a")
+
+
+@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
+def test_engine_kwargs(ext, nan_inf_to_errors):
+    # GH 42286
+    engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
+    with tm.ensure_clean(ext) as f:
+        with ExcelWriter(f, engine="xlsxwriter", engine_kwargs=engine_kwargs) as writer:
+            assert writer.book.nan_inf_to_errors == nan_inf_to_errors
+
+
+def test_book_and_sheets_consistent(ext):
+    # GH#45687 - Ensure sheets is updated if user modifies book
+    with tm.ensure_clean(ext) as f:
+        with ExcelWriter(f, engine="xlsxwriter") as writer:
+            assert writer.sheets == {}
+            sheet = writer.book.add_worksheet("test_name")
+            assert writer.sheets == {"test_name": sheet}
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_bar.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_bar.py
@ -0,0 +1,359 @@
+import io
+
+import numpy as np
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    read_csv,
+)
+
+pytest.importorskip("jinja2")
+
+
+def bar_grad(a=None, b=None, c=None, d=None):
+    """Used in multiple tests to simplify formatting of expected result"""
+    ret = [("width", "10em")]
+    if all(x is None for x in [a, b, c, d]):
+        return ret
+    return ret + [
+        (
+            "background",
+            f"linear-gradient(90deg,{','.join([x for x in [a, b, c, d] if x])})",
+        )
+    ]
+
+
+def no_bar():
+    return bar_grad()
+
+
+def bar_to(x, color="#d65f5f"):
+    return bar_grad(f" {color} {x:.1f}%", f" transparent {x:.1f}%")
+
+
+def bar_from_to(x, y, color="#d65f5f"):
+    return bar_grad(
+        f" transparent {x:.1f}%",
+        f" {color} {x:.1f}%",
+        f" {color} {y:.1f}%",
+        f" transparent {y:.1f}%",
+    )
+
+
+@pytest.fixture
+def df_pos():
+    return DataFrame([[1], [2], [3]])
+
+
+@pytest.fixture
+def df_neg():
+    return DataFrame([[-1], [-2], [-3]])
+
+
+@pytest.fixture
+def df_mix():
+    return DataFrame([[-3], [1], [2]])
+
+
+@pytest.mark.parametrize(
+    "align, exp",
+    [
+        ("left", [no_bar(), bar_to(50), bar_to(100)]),
+        ("right", [bar_to(100), bar_from_to(50, 100), no_bar()]),
+        ("mid", [bar_to(33.33), bar_to(66.66), bar_to(100)]),
+        ("zero", [bar_from_to(50, 66.7), bar_from_to(50, 83.3), bar_from_to(50, 100)]),
+        ("mean", [bar_to(50), no_bar(), bar_from_to(50, 100)]),
+        (2.0, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
+        (np.median, [bar_to(50), no_bar(), bar_from_to(50, 100)]),
+    ],
+)
+def test_align_positive_cases(df_pos, align, exp):
+    # test different align cases for all positive values
+    result = df_pos.style.bar(align=align)._compute().ctx
+    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "align, exp",
+    [
+        ("left", [bar_to(100), bar_to(50), no_bar()]),
+        ("right", [no_bar(), bar_from_to(50, 100), bar_to(100)]),
+        ("mid", [bar_from_to(66.66, 100), bar_from_to(33.33, 100), bar_to(100)]),
+        ("zero", [bar_from_to(33.33, 50), bar_from_to(16.66, 50), bar_to(50)]),
+        ("mean", [bar_from_to(50, 100), no_bar(), bar_to(50)]),
+        (-2.0, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
+        (np.median, [bar_from_to(50, 100), no_bar(), bar_to(50)]),
+    ],
+)
+def test_align_negative_cases(df_neg, align, exp):
+    # test different align cases for all negative values
+    result = df_neg.style.bar(align=align)._compute().ctx
+    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "align, exp",
+    [
+        ("left", [no_bar(), bar_to(80), bar_to(100)]),
+        ("right", [bar_to(100), bar_from_to(80, 100), no_bar()]),
+        ("mid", [bar_to(60), bar_from_to(60, 80), bar_from_to(60, 100)]),
+        ("zero", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
+        ("mean", [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
+        (-0.0, [bar_to(50), bar_from_to(50, 66.66), bar_from_to(50, 83.33)]),
+        (np.nanmedian, [bar_to(50), no_bar(), bar_from_to(50, 62.5)]),
+    ],
+)
+@pytest.mark.parametrize("nans", [True, False])
+def test_align_mixed_cases(df_mix, align, exp, nans):
+    # test different align cases for mixed positive and negative values
+    # also test no impact of NaNs and no_bar
+    expected = {(0, 0): exp[0], (1, 0): exp[1], (2, 0): exp[2]}
+    if nans:
+        df_mix.loc[3, :] = np.nan
+        expected.update({(3, 0): no_bar()})
+    result = df_mix.style.bar(align=align)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "align, exp",
+    [
+        (
+            "left",
+            {
+                "index": [[no_bar(), no_bar()], [bar_to(100), bar_to(100)]],
+                "columns": [[no_bar(), bar_to(100)], [no_bar(), bar_to(100)]],
+                "none": [[no_bar(), bar_to(33.33)], [bar_to(66.66), bar_to(100)]],
+            },
+        ),
+        (
+            "mid",
+            {
+                "index": [[bar_to(33.33), bar_to(50)], [bar_to(100), bar_to(100)]],
+                "columns": [[bar_to(50), bar_to(100)], [bar_to(75), bar_to(100)]],
+                "none": [[bar_to(25), bar_to(50)], [bar_to(75), bar_to(100)]],
+            },
+        ),
+        (
+            "zero",
+            {
+                "index": [
+                    [bar_from_to(50, 66.66), bar_from_to(50, 75)],
+                    [bar_from_to(50, 100), bar_from_to(50, 100)],
+                ],
+                "columns": [
+                    [bar_from_to(50, 75), bar_from_to(50, 100)],
+                    [bar_from_to(50, 87.5), bar_from_to(50, 100)],
+                ],
+                "none": [
+                    [bar_from_to(50, 62.5), bar_from_to(50, 75)],
+                    [bar_from_to(50, 87.5), bar_from_to(50, 100)],
+                ],
+            },
+        ),
+        (
+            2,
+            {
+                "index": [
+                    [bar_to(50), no_bar()],
+                    [bar_from_to(50, 100), bar_from_to(50, 100)],
+                ],
+                "columns": [
+                    [bar_to(50), no_bar()],
+                    [bar_from_to(50, 75), bar_from_to(50, 100)],
+                ],
+                "none": [
+                    [bar_from_to(25, 50), no_bar()],
+                    [bar_from_to(50, 75), bar_from_to(50, 100)],
+                ],
+            },
+        ),
+    ],
+)
+@pytest.mark.parametrize("axis", ["index", "columns", "none"])
+def test_align_axis(align, exp, axis):
+    # test all axis combinations with positive values and different aligns
+    data = DataFrame([[1, 2], [3, 4]])
+    result = (
+        data.style.bar(align=align, axis=None if axis == "none" else axis)
+        ._compute()
+        .ctx
+    )
+    expected = {
+        (0, 0): exp[axis][0][0],
+        (0, 1): exp[axis][0][1],
+        (1, 0): exp[axis][1][0],
+        (1, 1): exp[axis][1][1],
+    }
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "values, vmin, vmax",
+    [
+        ("positive", 1.5, 2.5),
+        ("negative", -2.5, -1.5),
+        ("mixed", -2.5, 1.5),
+    ],
+)
+@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"])  # test min/max separately
+@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
+def test_vmin_vmax_clipping(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
+    # test that clipping occurs if any vmin > data_values or vmax < data_values
+    if align == "mid":  # mid acts as left or right in each case
+        if values == "positive":
+            align = "left"
+        elif values == "negative":
+            align = "right"
+    df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
+    vmin = None if nullify == "vmin" else vmin
+    vmax = None if nullify == "vmax" else vmax
+
+    clip_df = df.where(df <= (vmax if vmax else 999), other=vmax)
+    clip_df = clip_df.where(clip_df >= (vmin if vmin else -999), other=vmin)
+
+    result = (
+        df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
+        ._compute()
+        .ctx
+    )
+    expected = clip_df.style.bar(align=align, color=["red", "green"])._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "values, vmin, vmax",
+    [
+        ("positive", 0.5, 4.5),
+        ("negative", -4.5, -0.5),
+        ("mixed", -4.5, 4.5),
+    ],
+)
+@pytest.mark.parametrize("nullify", [None, "vmin", "vmax"])  # test min/max separately
+@pytest.mark.parametrize("align", ["left", "right", "zero", "mid"])
+def test_vmin_vmax_widening(df_pos, df_neg, df_mix, values, vmin, vmax, nullify, align):
+    # test that widening occurs if any vmax > data_values or vmin < data_values
+    if align == "mid":  # mid acts as left or right in each case
+        if values == "positive":
+            align = "left"
+        elif values == "negative":
+            align = "right"
+    df = {"positive": df_pos, "negative": df_neg, "mixed": df_mix}[values]
+    vmin = None if nullify == "vmin" else vmin
+    vmax = None if nullify == "vmax" else vmax
+
+    expand_df = df.copy()
+    expand_df.loc[3, :], expand_df.loc[4, :] = vmin, vmax
+
+    result = (
+        df.style.bar(align=align, vmin=vmin, vmax=vmax, color=["red", "green"])
+        ._compute()
+        .ctx
+    )
+    expected = expand_df.style.bar(align=align, color=["red", "green"])._compute().ctx
+    assert result.items() <= expected.items()
+
+
+def test_numerics():
+    # test data is pre-selected for numeric values
+    data = DataFrame([[1, "a"], [2, "b"]])
+    result = data.style.bar()._compute().ctx
+    assert (0, 1) not in result
+    assert (1, 1) not in result
+
+
+@pytest.mark.parametrize(
+    "align, exp",
+    [
+        ("left", [no_bar(), bar_to(100, "green")]),
+        ("right", [bar_to(100, "red"), no_bar()]),
+        ("mid", [bar_to(25, "red"), bar_from_to(25, 100, "green")]),
+        ("zero", [bar_from_to(33.33, 50, "red"), bar_from_to(50, 100, "green")]),
+    ],
+)
+def test_colors_mixed(align, exp):
+    data = DataFrame([[-1], [3]])
+    result = data.style.bar(align=align, color=["red", "green"])._compute().ctx
+    assert result == {(0, 0): exp[0], (1, 0): exp[1]}
+
+
+def test_bar_align_height():
+    # test when keyword height is used 'no-repeat center' and 'background-size' present
+    data = DataFrame([[1], [2]])
+    result = data.style.bar(align="left", height=50)._compute().ctx
+    bg_s = "linear-gradient(90deg, #d65f5f 100.0%, transparent 100.0%) no-repeat center"
+    expected = {
+        (0, 0): [("width", "10em")],
+        (1, 0): [
+            ("width", "10em"),
+            ("background", bg_s),
+            ("background-size", "100% 50.0%"),
+        ],
+    }
+    assert result == expected
+
+
+def test_bar_value_error_raises():
+    df = DataFrame({"A": [-100, -60, -30, -20]})
+
+    msg = "`align` should be in {'left', 'right', 'mid', 'mean', 'zero'} or"
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]).to_html()
+
+    msg = r"`width` must be a value in \[0, 100\]"
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(width=200).to_html()
+
+    msg = r"`height` must be a value in \[0, 100\]"
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(height=200).to_html()
+
+
+def test_bar_color_and_cmap_error_raises():
+    df = DataFrame({"A": [1, 2, 3, 4]})
+    msg = "`color` and `cmap` cannot both be given"
+    # Test that providing both color and cmap raises a ValueError
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color="#d65f5f", cmap="viridis").to_html()
+
+
+def test_bar_invalid_color_type_error_raises():
+    df = DataFrame({"A": [1, 2, 3, 4]})
+    msg = (
+        r"`color` must be string or list or tuple of 2 strings,"
+        r"\(eg: color=\['#d65f5f', '#5fba7d'\]\)"
+    )
+    # Test that providing an invalid color type raises a ValueError
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color=123).to_html()
+
+    # Test that providing a color list with more than two elements raises a ValueError
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color=["#d65f5f", "#5fba7d", "#abcdef"]).to_html()
+
+
+def test_styler_bar_with_NA_values():
+    df1 = DataFrame({"A": [1, 2, NA, 4]})
+    df2 = DataFrame([[NA, NA], [NA, NA]])
+    expected_substring = "style type="
+    html_output1 = df1.style.bar(subset="A").to_html()
+    html_output2 = df2.style.bar(align="left", axis=None).to_html()
+    assert expected_substring in html_output1
+    assert expected_substring in html_output2
+
+
+def test_style_bar_with_pyarrow_NA_values():
+    pytest.importorskip("pyarrow")
+    data = """name,age,test1,test2,teacher
+        Adam,15,95.0,80,Ashby
+        Bob,16,81.0,82,Ashby
+        Dave,16,89.0,84,Jones
+        Fred,15,,88,Jones"""
+    df = read_csv(io.StringIO(data), dtype_backend="pyarrow")
+    expected_substring = "style type="
+    html_output = df.style.bar(subset="test1").to_html()
+    assert expected_substring in html_output
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_exceptions.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_exceptions.py
@ -0,0 +1,44 @@
+import pytest
+
+jinja2 = pytest.importorskip("jinja2")
+
+from pandas import (
+    DataFrame,
+    MultiIndex,
+)
+
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        data=[[0, -0.609], [1, -1.228]],
+        columns=["A", "B"],
+        index=["x", "y"],
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+def test_concat_bad_columns(styler):
+    msg = "`other.data` must have same columns as `Styler.data"
+    with pytest.raises(ValueError, match=msg):
+        styler.concat(DataFrame([[1, 2]]).style)
+
+
+def test_concat_bad_type(styler):
+    msg = "`other` must be of type `Styler`"
+    with pytest.raises(TypeError, match=msg):
+        styler.concat(DataFrame([[1, 2]]))
+
+
+def test_concat_bad_index_levels(styler, df):
+    df = df.copy()
+    df.index = MultiIndex.from_tuples([(0, 0), (1, 1)])
+    msg = "number of index levels must be same in `other`"
+    with pytest.raises(ValueError, match=msg):
+        styler.concat(df.style)
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_format.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_format.py
@ -0,0 +1,562 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    IndexSlice,
+    MultiIndex,
+    NaT,
+    Timestamp,
+    option_context,
+)
+
+pytest.importorskip("jinja2")
+from pandas.io.formats.style import Styler
+from pandas.io.formats.style_render import _str_escape
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        data=[[0, -0.609], [1, -1.228]],
+        columns=["A", "B"],
+        index=["x", "y"],
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+@pytest.fixture
+def df_multi():
+    return DataFrame(
+        data=np.arange(16).reshape(4, 4),
+        columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
+        index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
+    )
+
+
+@pytest.fixture
+def styler_multi(df_multi):
+    return Styler(df_multi, uuid_len=0)
+
+
+def test_display_format(styler):
+    ctx = styler.format("{:0.1f}")._translate(True, True)
+    assert all(["display_value" in c for c in row] for row in ctx["body"])
+    assert all([len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"])
+    assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
+
+
+@pytest.mark.parametrize("index", [True, False])
+@pytest.mark.parametrize("columns", [True, False])
+def test_display_format_index(styler, index, columns):
+    exp_index = ["x", "y"]
+    if index:
+        styler.format_index(lambda v: v.upper(), axis=0)  # test callable
+        exp_index = ["X", "Y"]
+
+    exp_columns = ["A", "B"]
+    if columns:
+        styler.format_index("*{}*", axis=1)  # test string
+        exp_columns = ["*A*", "*B*"]
+
+    ctx = styler._translate(True, True)
+
+    for r, row in enumerate(ctx["body"]):
+        assert row[0]["display_value"] == exp_index[r]
+
+    for c, col in enumerate(ctx["head"][1:]):
+        assert col["display_value"] == exp_columns[c]
+
+
+def test_format_dict(styler):
+    ctx = styler.format({"A": "{:0.1f}", "B": "{0:.2%}"})._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "0.0"
+    assert ctx["body"][0][2]["display_value"] == "-60.90%"
+
+
+def test_format_index_dict(styler):
+    ctx = styler.format_index({0: lambda v: v.upper()})._translate(True, True)
+    for i, val in enumerate(["X", "Y"]):
+        assert ctx["body"][i][0]["display_value"] == val
+
+
+def test_format_string(styler):
+    ctx = styler.format("{:.2f}")._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "0.00"
+    assert ctx["body"][0][2]["display_value"] == "-0.61"
+    assert ctx["body"][1][1]["display_value"] == "1.00"
+    assert ctx["body"][1][2]["display_value"] == "-1.23"
+
+
+def test_format_callable(styler):
+    ctx = styler.format(lambda v: "neg" if v < 0 else "pos")._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "pos"
+    assert ctx["body"][0][2]["display_value"] == "neg"
+    assert ctx["body"][1][1]["display_value"] == "pos"
+    assert ctx["body"][1][2]["display_value"] == "neg"
+
+
+def test_format_with_na_rep():
+    # GH 21527 28358
+    df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+    ctx = df.style.format(None, na_rep="-")._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "-"
+    assert ctx["body"][0][2]["display_value"] == "-"
+
+    ctx = df.style.format("{:.2%}", na_rep="-")._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "-"
+    assert ctx["body"][0][2]["display_value"] == "-"
+    assert ctx["body"][1][1]["display_value"] == "110.00%"
+    assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+    ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate(True, True)
+    assert ctx["body"][0][2]["display_value"] == "-"
+    assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+
+def test_format_index_with_na_rep():
+    df = DataFrame([[1, 2, 3, 4, 5]], columns=["A", None, np.nan, NaT, NA])
+    ctx = df.style.format_index(None, na_rep="--", axis=1)._translate(True, True)
+    assert ctx["head"][0][1]["display_value"] == "A"
+    for i in [2, 3, 4, 5]:
+        assert ctx["head"][0][i]["display_value"] == "--"
+
+
+def test_format_non_numeric_na():
+    # GH 21527 28358
+    df = DataFrame(
+        {
+            "object": [None, np.nan, "foo"],
+            "datetime": [None, NaT, Timestamp("20120101")],
+        }
+    )
+    ctx = df.style.format(None, na_rep="-")._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "-"
+    assert ctx["body"][0][2]["display_value"] == "-"
+    assert ctx["body"][1][1]["display_value"] == "-"
+    assert ctx["body"][1][2]["display_value"] == "-"
+
+
+@pytest.mark.parametrize(
+    "func, attr, kwargs",
+    [
+        ("format", "_display_funcs", {}),
+        ("format_index", "_display_funcs_index", {"axis": 0}),
+        ("format_index", "_display_funcs_columns", {"axis": 1}),
+    ],
+)
+def test_format_clear(styler, func, attr, kwargs):
+    assert (0, 0) not in getattr(styler, attr)  # using default
+    getattr(styler, func)("{:.2f}", **kwargs)
+    assert (0, 0) in getattr(styler, attr)  # formatter is specified
+    getattr(styler, func)(**kwargs)
+    assert (0, 0) not in getattr(styler, attr)  # formatter cleared to default
+
+
+@pytest.mark.parametrize(
+    "escape, exp",
+    [
+        ("html", "&lt;&gt;&amp;&#34;%$#_{}~^\\~ ^ \\ "),
+        (
+            "latex",
+            '<>\\&"\\%\\$\\#\\_\\{\\}\\textasciitilde \\textasciicircum '
+            "\\textbackslash \\textasciitilde \\space \\textasciicircum \\space "
+            "\\textbackslash \\space ",
+        ),
+    ],
+)
+def test_format_escape_html(escape, exp):
+    chars = '<>&"%$#_{}~^\\~ ^ \\ '
+    df = DataFrame([[chars]])
+
+    s = Styler(df, uuid_len=0).format("&{0}&", escape=None)
+    expected = f'<td id="T__row0_col0" class="data row0 col0" >&{chars}&</td>'
+    assert expected in s.to_html()
+
+    # only the value should be escaped before passing to the formatter
+    s = Styler(df, uuid_len=0).format("&{0}&", escape=escape)
+    expected = f'<td id="T__row0_col0" class="data row0 col0" >&{exp}&</td>'
+    assert expected in s.to_html()
+
+    # also test format_index()
+    styler = Styler(DataFrame(columns=[chars]), uuid_len=0)
+    styler.format_index("&{0}&", escape=None, axis=1)
+    assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{chars}&"
+    styler.format_index("&{0}&", escape=escape, axis=1)
+    assert styler._translate(True, True)["head"][0][1]["display_value"] == f"&{exp}&"
+
+
+@pytest.mark.parametrize(
+    "chars, expected",
+    [
+        (
+            r"$ \$&%#_{}~^\ $ &%#_{}~^\ $",
+            "".join(
+                [
+                    r"$ \$&%#_{}~^\ $ ",
+                    r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
+                    r"\textbackslash \space \$",
+                ]
+            ),
+        ),
+        (
+            r"\( &%#_{}~^\ \) &%#_{}~^\ \(",
+            "".join(
+                [
+                    r"\( &%#_{}~^\ \) ",
+                    r"\&\%\#\_\{\}\textasciitilde \textasciicircum ",
+                    r"\textbackslash \space \textbackslash (",
+                ]
+            ),
+        ),
+        (
+            r"$\&%#_{}^\$",
+            r"\$\textbackslash \&\%\#\_\{\}\textasciicircum \textbackslash \$",
+        ),
+        (
+            r"$ \frac{1}{2} $ \( \frac{1}{2} \)",
+            "".join(
+                [
+                    r"$ \frac{1}{2} $",
+                    r" \textbackslash ( \textbackslash frac\{1\}\{2\} \textbackslash )",
+                ]
+            ),
+        ),
+    ],
+)
+def test_format_escape_latex_math(chars, expected):
+    # GH 51903
+    # latex-math escape works for each DataFrame cell separately. If we have
+    # a combination of dollar signs and brackets, the dollar sign would apply.
+    df = DataFrame([[chars]])
+    s = df.style.format("{0}", escape="latex-math")
+    assert s._translate(True, True)["body"][0][1]["display_value"] == expected
+
+
+def test_format_escape_na_rep():
+    # tests the na_rep is not escaped
+    df = DataFrame([['<>&"', None]])
+    s = Styler(df, uuid_len=0).format("X&{0}>X", escape="html", na_rep="&")
+    ex = '<td id="T__row0_col0" class="data row0 col0" >X&&lt;&gt;&amp;&#34;>X</td>'
+    expected2 = '<td id="T__row0_col1" class="data row0 col1" >&</td>'
+    assert ex in s.to_html()
+    assert expected2 in s.to_html()
+
+    # also test for format_index()
+    df = DataFrame(columns=['<>&"', None])
+    styler = Styler(df, uuid_len=0)
+    styler.format_index("X&{0}>X", escape="html", na_rep="&", axis=1)
+    ctx = styler._translate(True, True)
+    assert ctx["head"][0][1]["display_value"] == "X&&lt;&gt;&amp;&#34;>X"
+    assert ctx["head"][0][2]["display_value"] == "&"
+
+
+def test_format_escape_floats(styler):
+    # test given formatter for number format is not impacted by escape
+    s = styler.format("{:.1f}", escape="html")
+    for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]:
+        assert expected in s.to_html()
+    # tests precision of floats is not impacted by escape
+    s = styler.format(precision=1, escape="html")
+    for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
+        assert expected in s.to_html()
+
+
+@pytest.mark.parametrize("formatter", [5, True, [2.0]])
+@pytest.mark.parametrize("func", ["format", "format_index"])
+def test_format_raises(styler, formatter, func):
+    with pytest.raises(TypeError, match="expected str or callable"):
+        getattr(styler, func)(formatter)
+
+
+@pytest.mark.parametrize(
+    "precision, expected",
+    [
+        (1, ["1.0", "2.0", "3.2", "4.6"]),
+        (2, ["1.00", "2.01", "3.21", "4.57"]),
+        (3, ["1.000", "2.009", "3.212", "4.566"]),
+    ],
+)
+def test_format_with_precision(precision, expected):
+    # Issue #13257
+    df = DataFrame([[1.0, 2.0090, 3.2121, 4.566]], columns=[1.0, 2.0090, 3.2121, 4.566])
+    styler = Styler(df)
+    styler.format(precision=precision)
+    styler.format_index(precision=precision, axis=1)
+
+    ctx = styler._translate(True, True)
+    for col, exp in enumerate(expected):
+        assert ctx["body"][0][col + 1]["display_value"] == exp  # format test
+        assert ctx["head"][0][col + 1]["display_value"] == exp  # format_index test
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize(
+    "level, expected",
+    [
+        (0, ["X", "X", "_", "_"]),  # level int
+        ("zero", ["X", "X", "_", "_"]),  # level name
+        (1, ["_", "_", "X", "X"]),  # other level int
+        ("one", ["_", "_", "X", "X"]),  # other level name
+        ([0, 1], ["X", "X", "X", "X"]),  # both levels
+        ([0, "zero"], ["X", "X", "_", "_"]),  # level int and name simultaneous
+        ([0, "one"], ["X", "X", "X", "X"]),  # both levels as int and name
+        (["one", "zero"], ["X", "X", "X", "X"]),  # both level names, reversed
+    ],
+)
+def test_format_index_level(axis, level, expected):
+    midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
+    df = DataFrame([[1, 2], [3, 4]])
+    if axis == 0:
+        df.index = midx
+    else:
+        df.columns = midx
+
+    styler = df.style.format_index(lambda v: "X", level=level, axis=axis)
+    ctx = styler._translate(True, True)
+
+    if axis == 0:  # compare index
+        result = [ctx["body"][s][0]["display_value"] for s in range(2)]
+        result += [ctx["body"][s][1]["display_value"] for s in range(2)]
+    else:  # compare columns
+        result = [ctx["head"][0][s + 1]["display_value"] for s in range(2)]
+        result += [ctx["head"][1][s + 1]["display_value"] for s in range(2)]
+
+    assert expected == result
+
+
+def test_format_subset():
+    df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
+    ctx = df.style.format(
+        {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=IndexSlice[0, :]
+    )._translate(True, True)
+    expected = "0.1"
+    raw_11 = "1.123400"
+    assert ctx["body"][0][1]["display_value"] == expected
+    assert ctx["body"][1][1]["display_value"] == raw_11
+    assert ctx["body"][0][2]["display_value"] == "12.34%"
+
+    ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, :])._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == expected
+    assert ctx["body"][1][1]["display_value"] == raw_11
+
+    ctx = df.style.format("{:0.1f}", subset=IndexSlice["a"])._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == expected
+    assert ctx["body"][0][2]["display_value"] == "0.123400"
+
+    ctx = df.style.format("{:0.1f}", subset=IndexSlice[0, "a"])._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == expected
+    assert ctx["body"][1][1]["display_value"] == raw_11
+
+    ctx = df.style.format("{:0.1f}", subset=IndexSlice[[0, 1], ["a"]])._translate(
+        True, True
+    )
+    assert ctx["body"][0][1]["display_value"] == expected
+    assert ctx["body"][1][1]["display_value"] == "1.1"
+    assert ctx["body"][0][2]["display_value"] == "0.123400"
+    assert ctx["body"][1][2]["display_value"] == raw_11
+
+
+@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
+@pytest.mark.parametrize("decimal", [".", "*"])
+@pytest.mark.parametrize("precision", [None, 2])
+@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
+def test_format_thousands(formatter, decimal, precision, func, col):
+    styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
+    result = getattr(styler, func)(  # testing float
+        thousands="_", formatter=formatter, decimal=decimal, precision=precision
+    )._translate(True, True)
+    assert "1_000_000" in result["body"][0][col]["display_value"]
+
+    styler = DataFrame([[1000000]], index=[1000000]).style
+    result = getattr(styler, func)(  # testing int
+        thousands="_", formatter=formatter, decimal=decimal, precision=precision
+    )._translate(True, True)
+    assert "1_000_000" in result["body"][0][col]["display_value"]
+
+    styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
+    result = getattr(styler, func)(  # testing complex
+        thousands="_", formatter=formatter, decimal=decimal, precision=precision
+    )._translate(True, True)
+    assert "1_000_000" in result["body"][0][col]["display_value"]
+
+
+@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
+@pytest.mark.parametrize("thousands", [None, ",", "*"])
+@pytest.mark.parametrize("precision", [None, 4])
+@pytest.mark.parametrize("func, col", [("format", 1), ("format_index", 0)])
+def test_format_decimal(formatter, thousands, precision, func, col):
+    styler = DataFrame([[1000000.123456789]], index=[1000000.123456789]).style
+    result = getattr(styler, func)(  # testing float
+        decimal="_", formatter=formatter, thousands=thousands, precision=precision
+    )._translate(True, True)
+    assert "000_123" in result["body"][0][col]["display_value"]
+
+    styler = DataFrame([[1 + 1000000.123456789j]], index=[1 + 1000000.123456789j]).style
+    result = getattr(styler, func)(  # testing complex
+        decimal="_", formatter=formatter, thousands=thousands, precision=precision
+    )._translate(True, True)
+    assert "000_123" in result["body"][0][col]["display_value"]
+
+
+def test_str_escape_error():
+    msg = "`escape` only permitted in {'html', 'latex', 'latex-math'}, got "
+    with pytest.raises(ValueError, match=msg):
+        _str_escape("text", "bad_escape")
+
+    with pytest.raises(ValueError, match=msg):
+        _str_escape("text", [])
+
+    _str_escape(2.00, "bad_escape")  # OK since dtype is float
+
+
+def test_long_int_formatting():
+    df = DataFrame(data=[[1234567890123456789]], columns=["test"])
+    styler = df.style
+    ctx = styler._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "1234567890123456789"
+
+    styler = df.style.format(thousands="_")
+    ctx = styler._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] == "1_234_567_890_123_456_789"
+
+
+def test_format_options():
+    df = DataFrame({"int": [2000, 1], "float": [1.009, None], "str": ["&<", "&~"]})
+    ctx = df.style._translate(True, True)
+
+    # test option: na_rep
+    assert ctx["body"][1][2]["display_value"] == "nan"
+    with option_context("styler.format.na_rep", "MISSING"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][1][2]["display_value"] == "MISSING"
+
+    # test option: decimal and precision
+    assert ctx["body"][0][2]["display_value"] == "1.009000"
+    with option_context("styler.format.decimal", "_"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][0][2]["display_value"] == "1_009000"
+    with option_context("styler.format.precision", 2):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][0][2]["display_value"] == "1.01"
+
+    # test option: thousands
+    assert ctx["body"][0][1]["display_value"] == "2000"
+    with option_context("styler.format.thousands", "_"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][0][1]["display_value"] == "2_000"
+
+    # test option: escape
+    assert ctx["body"][0][3]["display_value"] == "&<"
+    assert ctx["body"][1][3]["display_value"] == "&~"
+    with option_context("styler.format.escape", "html"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][0][3]["display_value"] == "&amp;&lt;"
+    with option_context("styler.format.escape", "latex"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
+    with option_context("styler.format.escape", "latex-math"):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][1][3]["display_value"] == "\\&\\textasciitilde "
+
+    # test option: formatter
+    with option_context("styler.format.formatter", {"int": "{:,.2f}"}):
+        ctx_with_op = df.style._translate(True, True)
+        assert ctx_with_op["body"][0][1]["display_value"] == "2,000.00"
+
+
+def test_precision_zero(df):
+    styler = Styler(df, precision=0)
+    ctx = styler._translate(True, True)
+    assert ctx["body"][0][2]["display_value"] == "-1"
+    assert ctx["body"][1][2]["display_value"] == "-1"
+
+
+@pytest.mark.parametrize(
+    "formatter, exp",
+    [
+        (lambda x: f"{x:.3f}", "9.000"),
+        ("{:.2f}", "9.00"),
+        ({0: "{:.1f}"}, "9.0"),
+        (None, "9"),
+    ],
+)
+def test_formatter_options_validator(formatter, exp):
+    df = DataFrame([[9]])
+    with option_context("styler.format.formatter", formatter):
+        assert f" {exp} " in df.style.to_latex()
+
+
+def test_formatter_options_raises():
+    msg = "Value must be an instance of"
+    with pytest.raises(ValueError, match=msg):
+        with option_context("styler.format.formatter", ["bad", "type"]):
+            DataFrame().style.to_latex()
+
+
+def test_1level_multiindex():
+    # GH 43383
+    midx = MultiIndex.from_product([[1, 2]], names=[""])
+    df = DataFrame(-1, index=midx, columns=[0, 1])
+    ctx = df.style._translate(True, True)
+    assert ctx["body"][0][0]["display_value"] == "1"
+    assert ctx["body"][0][0]["is_visible"] is True
+    assert ctx["body"][1][0]["display_value"] == "2"
+    assert ctx["body"][1][0]["is_visible"] is True
+
+
+def test_boolean_format():
+    # gh 46384: booleans do not collapse to integer representation on display
+    df = DataFrame([[True, False]])
+    ctx = df.style._translate(True, True)
+    assert ctx["body"][0][1]["display_value"] is True
+    assert ctx["body"][0][2]["display_value"] is False
+
+
+@pytest.mark.parametrize(
+    "hide, labels",
+    [
+        (False, [1, 2]),
+        (True, [1, 2, 3, 4]),
+    ],
+)
+def test_relabel_raise_length(styler_multi, hide, labels):
+    if hide:
+        styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
+    with pytest.raises(ValueError, match="``labels`` must be of length equal"):
+        styler_multi.relabel_index(labels=labels)
+
+
+def test_relabel_index(styler_multi):
+    labels = [(1, 2), (3, 4)]
+    styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")])
+    styler_multi.relabel_index(labels=labels)
+    ctx = styler_multi._translate(True, True)
+    assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items()
+    assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items()
+    assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items()
+    assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items()
+
+
+def test_relabel_columns(styler_multi):
+    labels = [(1, 2), (3, 4)]
+    styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")])
+    styler_multi.relabel_index(axis=1, labels=labels)
+    ctx = styler_multi._translate(True, True)
+    assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items()
+    assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items()
+    assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items()
+    assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items()
+
+
+def test_relabel_roundtrip(styler):
+    styler.relabel_index(["{}", "{}"])
+    ctx = styler._translate(True, True)
+    assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items()
+    assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_highlight.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_highlight.py
@ -0,0 +1,218 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    IndexSlice,
+)
+
+pytest.importorskip("jinja2")
+
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
+def df(request):
+    # GH 45804
+    return DataFrame(
+        {"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+def test_highlight_null(styler):
+    result = styler.highlight_null()._compute().ctx
+    expected = {
+        (1, 0): [("background-color", "red")],
+        (1, 1): [("background-color", "red")],
+    }
+    assert result == expected
+
+
+def test_highlight_null_subset(styler):
+    # GH 31345
+    result = (
+        styler.highlight_null(color="red", subset=["A"])
+        .highlight_null(color="green", subset=["B"])
+        ._compute()
+        .ctx
+    )
+    expected = {
+        (1, 0): [("background-color", "red")],
+        (1, 1): [("background-color", "green")],
+    }
+    assert result == expected
+
+
+@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
+def test_highlight_minmax_basic(df, f):
+    expected = {
+        (0, 1): [("background-color", "red")],
+        # ignores NaN row,
+        (2, 0): [("background-color", "red")],
+    }
+    if f == "highlight_min":
+        df = -df
+    result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"axis": None, "color": "red"},  # test axis
+        {"axis": 0, "subset": ["A"], "color": "red"},  # test subset and ignores NaN
+        {"axis": None, "props": "background-color: red"},  # test props
+    ],
+)
+def test_highlight_minmax_ext(df, f, kwargs):
+    expected = {(2, 0): [("background-color", "red")]}
+    if f == "highlight_min":
+        df = -df
+    result = getattr(df.style, f)(**kwargs)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
+@pytest.mark.parametrize("axis", [None, 0, 1])
+def test_highlight_minmax_nulls(f, axis):
+    # GH 42750
+    expected = {
+        (1, 0): [("background-color", "yellow")],
+        (1, 1): [("background-color", "yellow")],
+    }
+    if axis == 1:
+        expected.update({(2, 1): [("background-color", "yellow")]})
+
+    if f == "highlight_max":
+        df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
+    else:
+        df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
+
+    result = getattr(df.style, f)(axis=axis)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"left": 0, "right": 1},  # test basic range
+        {"left": 0, "right": 1, "props": "background-color: yellow"},  # test props
+        {"left": -100, "right": 100, "subset": IndexSlice[[0, 1], :]},  # test subset
+        {"left": 0, "subset": IndexSlice[[0, 1], :]},  # test no right
+        {"right": 1},  # test no left
+        {"left": [0, 0, 11], "axis": 0},  # test left as sequence
+        {"left": DataFrame({"A": [0, 0, 11], "B": [1, 1, 11]}), "axis": None},  # axis
+        {"left": 0, "right": [0, 1], "axis": 1},  # test sequence right
+    ],
+)
+def test_highlight_between(styler, kwargs):
+    expected = {
+        (0, 0): [("background-color", "yellow")],
+        (0, 1): [("background-color", "yellow")],
+    }
+    result = styler.highlight_between(**kwargs)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "arg, map, axis",
+    [
+        ("left", [1, 2], 0),  # 0 axis has 3 elements not 2
+        ("left", [1, 2, 3], 1),  # 1 axis has 2 elements not 3
+        ("left", np.array([[1, 2], [1, 2]]), None),  # df is (2,3) not (2,2)
+        ("right", [1, 2], 0),  # same tests as above for 'right' not 'left'
+        ("right", [1, 2, 3], 1),  # ..
+        ("right", np.array([[1, 2], [1, 2]]), None),  # ..
+    ],
+)
+def test_highlight_between_raises(arg, styler, map, axis):
+    msg = f"supplied '{arg}' is not correct shape"
+    with pytest.raises(ValueError, match=msg):
+        styler.highlight_between(**{arg: map, "axis": axis})._compute()
+
+
+def test_highlight_between_raises2(styler):
+    msg = "values can be 'both', 'left', 'right', or 'neither'"
+    with pytest.raises(ValueError, match=msg):
+        styler.highlight_between(inclusive="badstring")._compute()
+
+    with pytest.raises(ValueError, match=msg):
+        styler.highlight_between(inclusive=1)._compute()
+
+
+@pytest.mark.parametrize(
+    "inclusive, expected",
+    [
+        (
+            "both",
+            {
+                (0, 0): [("background-color", "yellow")],
+                (0, 1): [("background-color", "yellow")],
+            },
+        ),
+        ("neither", {}),
+        ("left", {(0, 0): [("background-color", "yellow")]}),
+        ("right", {(0, 1): [("background-color", "yellow")]}),
+    ],
+)
+def test_highlight_between_inclusive(styler, inclusive, expected):
+    kwargs = {"left": 0, "right": 1, "subset": IndexSlice[[0, 1], :]}
+    result = styler.highlight_between(**kwargs, inclusive=inclusive)._compute()
+    assert result.ctx == expected
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"q_left": 0.5, "q_right": 1, "axis": 0},  # base case
+        {"q_left": 0.5, "q_right": 1, "axis": None},  # test axis
+        {"q_left": 0, "q_right": 1, "subset": IndexSlice[2, :]},  # test subset
+        {"q_left": 0.5, "axis": 0},  # test no high
+        {"q_right": 1, "subset": IndexSlice[2, :], "axis": 1},  # test no low
+        {"q_left": 0.5, "axis": 0, "props": "background-color: yellow"},  # tst prop
+    ],
+)
+def test_highlight_quantile(styler, kwargs):
+    expected = {
+        (2, 0): [("background-color", "yellow")],
+        (2, 1): [("background-color", "yellow")],
+    }
+    result = styler.highlight_quantile(**kwargs)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "f,kwargs",
+    [
+        ("highlight_min", {"axis": 1, "subset": IndexSlice[1, :]}),
+        ("highlight_max", {"axis": 0, "subset": [0]}),
+        ("highlight_quantile", {"axis": None, "q_left": 0.6, "q_right": 0.8}),
+        ("highlight_between", {"subset": [0]}),
+    ],
+)
+@pytest.mark.parametrize(
+    "df",
+    [
+        DataFrame([[0, 10], [20, 30]], dtype=int),
+        DataFrame([[0, 10], [20, 30]], dtype=float),
+        DataFrame([[0, 10], [20, 30]], dtype="datetime64[ns]"),
+        DataFrame([[0, 10], [20, 30]], dtype=str),
+        DataFrame([[0, 10], [20, 30]], dtype="timedelta64[ns]"),
+    ],
+)
+def test_all_highlight_dtypes(f, kwargs, df):
+    if f == "highlight_quantile" and isinstance(df.iloc[0, 0], (str)):
+        return None  # quantile incompatible with str
+    if f == "highlight_between":
+        kwargs["left"] = df.iloc[1, 0]  # set the range low for testing
+
+    expected = {(1, 0): [("background-color", "yellow")]}
+    result = getattr(df.style, f)(**kwargs)._compute().ctx
+    assert result == expected
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_html.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_html.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_matplotlib.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_matplotlib.py
@ -0,0 +1,335 @@
+import gc
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    IndexSlice,
+    Series,
+)
+
+pytest.importorskip("matplotlib")
+pytest.importorskip("jinja2")
+
+import matplotlib as mpl
+
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture(autouse=True)
+def mpl_cleanup():
+    # matplotlib/testing/decorators.py#L24
+    # 1) Resets units registry
+    # 2) Resets rc_context
+    # 3) Closes all figures
+    mpl = pytest.importorskip("matplotlib")
+    mpl_units = pytest.importorskip("matplotlib.units")
+    plt = pytest.importorskip("matplotlib.pyplot")
+    orig_units_registry = mpl_units.registry.copy()
+    with mpl.rc_context():
+        mpl.use("template")
+        yield
+    mpl_units.registry.clear()
+    mpl_units.registry.update(orig_units_registry)
+    plt.close("all")
+    # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close  # noqa: E501
+    gc.collect(1)
+
+
+@pytest.fixture
+def df():
+    return DataFrame([[1, 2], [2, 4]], columns=["A", "B"])
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+@pytest.fixture
+def df_blank():
+    return DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"])
+
+
+@pytest.fixture
+def styler_blank(df_blank):
+    return Styler(df_blank, uuid_len=0)
+
+
+@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
+def test_function_gradient(styler, f):
+    for c_map in [None, "YlOrRd"]:
+        result = getattr(styler, f)(cmap=c_map)._compute().ctx
+        assert all("#" in x[0][1] for x in result.values())
+        assert result[(0, 0)] == result[(0, 1)]
+        assert result[(1, 0)] == result[(1, 1)]
+
+
+@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
+def test_background_gradient_color(styler, f):
+    result = getattr(styler, f)(subset=IndexSlice[1, "A"])._compute().ctx
+    if f == "background_gradient":
+        assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")]
+    elif f == "text_gradient":
+        assert result[(1, 0)] == [("color", "#fff7fb")]
+
+
+@pytest.mark.parametrize(
+    "axis, expected",
+    [
+        (0, ["low", "low", "high", "high"]),
+        (1, ["low", "high", "low", "high"]),
+        (None, ["low", "mid", "mid", "high"]),
+    ],
+)
+@pytest.mark.parametrize("f", ["background_gradient", "text_gradient"])
+def test_background_gradient_axis(styler, axis, expected, f):
+    if f == "background_gradient":
+        colors = {
+            "low": [("background-color", "#f7fbff"), ("color", "#000000")],
+            "mid": [("background-color", "#abd0e6"), ("color", "#000000")],
+            "high": [("background-color", "#08306b"), ("color", "#f1f1f1")],
+        }
+    elif f == "text_gradient":
+        colors = {
+            "low": [("color", "#f7fbff")],
+            "mid": [("color", "#abd0e6")],
+            "high": [("color", "#08306b")],
+        }
+    result = getattr(styler, f)(cmap="Blues", axis=axis)._compute().ctx
+    for i, cell in enumerate([(0, 0), (0, 1), (1, 0), (1, 1)]):
+        assert result[cell] == colors[expected[i]]
+
+
+@pytest.mark.parametrize(
+    "cmap, expected",
+    [
+        (
+            "PuBu",
+            {
+                (4, 5): [("background-color", "#86b0d3"), ("color", "#000000")],
+                (4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")],
+            },
+        ),
+        (
+            "YlOrRd",
+            {
+                (4, 8): [("background-color", "#fd913e"), ("color", "#000000")],
+                (4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")],
+            },
+        ),
+        (
+            None,
+            {
+                (7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")],
+                (7, 1): [("background-color", "#4cc26c"), ("color", "#000000")],
+            },
+        ),
+    ],
+)
+def test_text_color_threshold(cmap, expected):
+    # GH 39888
+    df = DataFrame(np.arange(100).reshape(10, 10))
+    result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx
+    for k in expected.keys():
+        assert result[k] == expected[k]
+
+
+def test_background_gradient_vmin_vmax():
+    # GH 12145
+    df = DataFrame(range(5))
+    ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx
+    assert ctx[(0, 0)] == ctx[(1, 0)]
+    assert ctx[(4, 0)] == ctx[(3, 0)]
+
+
+def test_background_gradient_int64():
+    # GH 28869
+    df1 = Series(range(3)).to_frame()
+    df2 = Series(range(3), dtype="Int64").to_frame()
+    ctx1 = df1.style.background_gradient()._compute().ctx
+    ctx2 = df2.style.background_gradient()._compute().ctx
+    assert ctx2[(0, 0)] == ctx1[(0, 0)]
+    assert ctx2[(1, 0)] == ctx1[(1, 0)]
+    assert ctx2[(2, 0)] == ctx1[(2, 0)]
+
+
+@pytest.mark.parametrize(
+    "axis, gmap, expected",
+    [
+        (
+            0,
+            [1, 2],
+            {
+                (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
+                (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
+            },
+        ),
+        (
+            1,
+            [1, 2],
+            {
+                (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
+                (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
+            },
+        ),
+        (
+            None,
+            np.array([[2, 1], [1, 2]]),
+            {
+                (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")],
+                (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")],
+                (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")],
+            },
+        ),
+    ],
+)
+def test_background_gradient_gmap_array(styler_blank, axis, gmap, expected):
+    # tests when gmap is given as a sequence and converted to ndarray
+    result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute().ctx
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)]
+)
+def test_background_gradient_gmap_array_raises(gmap, axis):
+    # test when gmap as converted ndarray is bad shape
+    df = DataFrame([[0, 0, 0], [0, 0, 0]])
+    msg = "supplied 'gmap' is not correct shape"
+    with pytest.raises(ValueError, match=msg):
+        df.style.background_gradient(gmap=gmap, axis=axis)._compute()
+
+
+@pytest.mark.parametrize(
+    "gmap",
+    [
+        DataFrame(  # reverse the columns
+            [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"]
+        ),
+        DataFrame(  # reverse the index
+            [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"]
+        ),
+        DataFrame(  # reverse the index and columns
+            [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"]
+        ),
+        DataFrame(  # add unnecessary columns
+            [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"]
+        ),
+        DataFrame(  # add unnecessary index
+            [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "subset, exp_gmap",  # exp_gmap is underlying map DataFrame should conform to
+    [
+        (None, [[1, 2], [2, 1]]),
+        (["A"], [[1], [2]]),  # slice only column "A" in data and gmap
+        (["B", "A"], [[2, 1], [1, 2]]),  # reverse the columns in data
+        (IndexSlice["X", :], [[1, 2]]),  # slice only index "X" in data and gmap
+        (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]),  # reverse the index in data
+    ],
+)
+def test_background_gradient_gmap_dataframe_align(styler_blank, gmap, subset, exp_gmap):
+    # test gmap given as DataFrame that it aligns to the data including subset
+    expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap, subset=subset)
+    result = styler_blank.background_gradient(axis=None, gmap=gmap, subset=subset)
+    assert expected._compute().ctx == result._compute().ctx
+
+
+@pytest.mark.parametrize(
+    "gmap, axis, exp_gmap",
+    [
+        (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]),  # revrse the index
+        (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]),  # revrse the cols
+        (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]),  # add idx
+        (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]),  # add col
+    ],
+)
+def test_background_gradient_gmap_series_align(styler_blank, gmap, axis, exp_gmap):
+    # test gmap given as Series that it aligns to the data including subset
+    expected = styler_blank.background_gradient(axis=None, gmap=exp_gmap)._compute()
+    result = styler_blank.background_gradient(axis=axis, gmap=gmap)._compute()
+    assert expected.ctx == result.ctx
+
+
+@pytest.mark.parametrize(
+    "gmap, axis",
+    [
+        (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1),
+        (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0),
+    ],
+)
+def test_background_gradient_gmap_wrong_dataframe(styler_blank, gmap, axis):
+    # test giving a gmap in DataFrame but with wrong axis
+    msg = "'gmap' is a DataFrame but underlying data for operations is a Series"
+    with pytest.raises(ValueError, match=msg):
+        styler_blank.background_gradient(gmap=gmap, axis=axis)._compute()
+
+
+def test_background_gradient_gmap_wrong_series(styler_blank):
+    # test giving a gmap in Series form but with wrong axis
+    msg = "'gmap' is a Series but underlying data for operations is a DataFrame"
+    gmap = Series([1, 2], index=["X", "Y"])
+    with pytest.raises(ValueError, match=msg):
+        styler_blank.background_gradient(gmap=gmap, axis=None)._compute()
+
+
+def test_background_gradient_nullable_dtypes():
+    # GH 50712
+    df1 = DataFrame([[1], [0], [np.nan]], dtype=float)
+    df2 = DataFrame([[1], [0], [None]], dtype="Int64")
+
+    ctx1 = df1.style.background_gradient()._compute().ctx
+    ctx2 = df2.style.background_gradient()._compute().ctx
+    assert ctx1 == ctx2
+
+
+@pytest.mark.parametrize(
+    "cmap",
+    ["PuBu", mpl.colormaps["PuBu"]],
+)
+def test_bar_colormap(cmap):
+    data = DataFrame([[1, 2], [3, 4]])
+    ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
+    pubu_colors = {
+        (0, 0): "#d0d1e6",
+        (1, 0): "#056faf",
+        (0, 1): "#73a9cf",
+        (1, 1): "#023858",
+    }
+    for k, v in pubu_colors.items():
+        assert v in ctx[k][1][1]
+
+
+def test_bar_color_raises(df):
+    msg = "`color` must be string or list or tuple of 2 strings"
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color={"a", "b"}).to_html()
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color=["a", "b", "c"]).to_html()
+
+    msg = "`color` and `cmap` cannot both be given"
+    with pytest.raises(ValueError, match=msg):
+        df.style.bar(color="something", cmap="something else").to_html()
+
+
+@pytest.mark.parametrize(
+    "plot_method",
+    ["scatter", "hexbin"],
+)
+def test_pass_colormap_instance(df, plot_method):
+    # https://github.com/pandas-dev/pandas/issues/49374
+    cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]])
+    df["c"] = df.A + df.B
+    kwargs = {"x": "A", "y": "B", "c": "c", "colormap": cmap}
+    if plot_method == "hexbin":
+        kwargs["C"] = kwargs.pop("c")
+    getattr(df.plot, plot_method)(**kwargs)
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_non_unique.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_non_unique.py
@ -0,0 +1,140 @@
+from textwrap import dedent
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    IndexSlice,
+)
+
+pytest.importorskip("jinja2")
+
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["i", "j", "j"],
+        columns=["c", "d", "d"],
+        dtype=float,
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+def test_format_non_unique(df):
+    # GH 41269
+
+    # test dict
+    html = df.style.format({"d": "{:.1f}"}).to_html()
+    for val in ["1.000000<", "4.000000<", "7.000000<"]:
+        assert val in html
+    for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
+        assert val in html
+
+    # test subset
+    html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).to_html()
+    for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
+        assert val in html
+    for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
+        assert val in html
+
+
+@pytest.mark.parametrize("func", ["apply", "map"])
+def test_apply_map_non_unique_raises(df, func):
+    # GH 41269
+    if func == "apply":
+        op = lambda s: ["color: red;"] * len(s)
+    else:
+        op = lambda v: "color: red;"
+
+    with pytest.raises(KeyError, match="`Styler.apply` and `.map` are not"):
+        getattr(df.style, func)(op)._compute()
+
+
+def test_table_styles_dict_non_unique_index(styler):
+    styles = styler.set_table_styles(
+        {"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
+    ).table_styles
+    assert styles == [
+        {"selector": "td.row1", "props": [("a", "v")]},
+        {"selector": "td.row2", "props": [("a", "v")]},
+    ]
+
+
+def test_table_styles_dict_non_unique_columns(styler):
+    styles = styler.set_table_styles(
+        {"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
+    ).table_styles
+    assert styles == [
+        {"selector": "td.col1", "props": [("a", "v")]},
+        {"selector": "td.col2", "props": [("a", "v")]},
+    ]
+
+
+def test_tooltips_non_unique_raises(styler):
+    # ttips has unique keys
+    ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
+    styler.set_tooltips(ttips=ttips)  # OK
+
+    # ttips has non-unique columns
+    ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
+    with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
+        styler.set_tooltips(ttips=ttips)
+
+    # ttips has non-unique index
+    ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
+    with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
+        styler.set_tooltips(ttips=ttips)
+
+
+def test_set_td_classes_non_unique_raises(styler):
+    # classes has unique keys
+    classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
+    styler.set_td_classes(classes=classes)  # OK
+
+    # classes has non-unique columns
+    classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
+    with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
+        styler.set_td_classes(classes=classes)
+
+    # classes has non-unique index
+    classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
+    with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
+        styler.set_td_classes(classes=classes)
+
+
+def test_hide_columns_non_unique(styler):
+    ctx = styler.hide(["d"], axis="columns")._translate(True, True)
+
+    assert ctx["head"][0][1]["display_value"] == "c"
+    assert ctx["head"][0][1]["is_visible"] is True
+
+    assert ctx["head"][0][2]["display_value"] == "d"
+    assert ctx["head"][0][2]["is_visible"] is False
+
+    assert ctx["head"][0][3]["display_value"] == "d"
+    assert ctx["head"][0][3]["is_visible"] is False
+
+    assert ctx["body"][0][1]["is_visible"] is True
+    assert ctx["body"][0][2]["is_visible"] is False
+    assert ctx["body"][0][3]["is_visible"] is False
+
+
+def test_latex_non_unique(styler):
+    result = styler.to_latex()
+    assert result == dedent(
+        """\
+        \\begin{tabular}{lrrr}
+         & c & d & d \\\\
+        i & 1.000000 & 2.000000 & 3.000000 \\\\
+        j & 4.000000 & 5.000000 & 6.000000 \\\\
+        j & 7.000000 & 8.000000 & 9.000000 \\\\
+        \\end{tabular}
+    """
+    )
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_style.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_style.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_latex.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_latex.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_string.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_to_string.py
@ -0,0 +1,96 @@
+from textwrap import dedent
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+
+pytest.importorskip("jinja2")
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        {"A": [0, 1], "B": [-0.61, -1.22], "C": Series(["ab", "cd"], dtype=object)}
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0, precision=2)
+
+
+def test_basic_string(styler):
+    result = styler.to_string()
+    expected = dedent(
+        """\
+     A B C
+    0 0 -0.61 ab
+    1 1 -1.22 cd
+    """
+    )
+    assert result == expected
+
+
+def test_string_delimiter(styler):
+    result = styler.to_string(delimiter=";")
+    expected = dedent(
+        """\
+    ;A;B;C
+    0;0;-0.61;ab
+    1;1;-1.22;cd
+    """
+    )
+    assert result == expected
+
+
+def test_concat(styler):
+    result = styler.concat(styler.data.agg(["sum"]).style).to_string()
+    expected = dedent(
+        """\
+     A B C
+    0 0 -0.61 ab
+    1 1 -1.22 cd
+    sum 1 -1.830000 abcd
+    """
+    )
+    assert result == expected
+
+
+def test_concat_recursion(styler):
+    df = styler.data
+    styler1 = styler
+    styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
+    styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
+    result = styler1.concat(styler2.concat(styler3)).to_string()
+    expected = dedent(
+        """\
+     A B C
+    0 0 -0.61 ab
+    1 1 -1.22 cd
+    sum 1 -1.830 abcd
+    sum 1 -1.8300 abcd
+    """
+    )
+    assert result == expected
+
+
+def test_concat_chain(styler):
+    df = styler.data
+    styler1 = styler
+    styler2 = Styler(df.agg(["sum"]), uuid_len=0, precision=3)
+    styler3 = Styler(df.agg(["sum"]), uuid_len=0, precision=4)
+    result = styler1.concat(styler2).concat(styler3).to_string()
+    expected = dedent(
+        """\
+     A B C
+    0 0 -0.61 ab
+    1 1 -1.22 cd
+    sum 1 -1.830 abcd
+    sum 1 -1.8300 abcd
+    """
+    )
+    assert result == expected
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_tooltip.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/style/test_tooltip.py
@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+
+pytest.importorskip("jinja2")
+from pandas.io.formats.style import Styler
+
+
+@pytest.fixture
+def df():
+    return DataFrame(
+        data=[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
+        columns=["A", "B", "C"],
+        index=["x", "y", "z"],
+    )
+
+
+@pytest.fixture
+def styler(df):
+    return Styler(df, uuid_len=0)
+
+
+@pytest.mark.parametrize(
+    "ttips",
+    [
+        DataFrame(  # Test basic reindex and ignoring blank
+            data=[["Min", "Max"], [np.nan, ""]],
+            columns=["A", "C"],
+            index=["x", "y"],
+        ),
+        DataFrame(  # Test non-referenced columns, reversed col names, short index
+            data=[["Max", "Min", "Bad-Col"]], columns=["C", "A", "D"], index=["x"]
+        ),
+    ],
+)
+def test_tooltip_render(ttips, styler):
+    # GH 21266
+    result = styler.set_tooltips(ttips).to_html()
+
+    # test tooltip table level class
+    assert "#T_ .pd-t {\n  visibility: hidden;\n" in result
+
+    # test 'Min' tooltip added
+    assert "#T_ #T__row0_col0:hover .pd-t {\n  visibility: visible;\n}" in result
+    assert '#T_ #T__row0_col0 .pd-t::after {\n  content: "Min";\n}' in result
+    assert 'class="data row0 col0" >0<span class="pd-t"></span></td>' in result
+
+    # test 'Max' tooltip added
+    assert "#T_ #T__row0_col2:hover .pd-t {\n  visibility: visible;\n}" in result
+    assert '#T_ #T__row0_col2 .pd-t::after {\n  content: "Max";\n}' in result
+    assert 'class="data row0 col2" >2<span class="pd-t"></span></td>' in result
+
+    # test Nan, empty string and bad column ignored
+    assert "#T_ #T__row1_col0:hover .pd-t {\n  visibility: visible;\n}" not in result
+    assert "#T_ #T__row1_col1:hover .pd-t {\n  visibility: visible;\n}" not in result
+    assert "#T_ #T__row0_col1:hover .pd-t {\n  visibility: visible;\n}" not in result
+    assert "#T_ #T__row1_col2:hover .pd-t {\n  visibility: visible;\n}" not in result
+    assert "Bad-Col" not in result
+
+
+def test_tooltip_ignored(styler):
+    # GH 21266
+    result = styler.to_html()  # no set_tooltips() creates no <span>
+    assert '<style type="text/css">\n</style>' in result
+    assert '<span class="pd-t"></span>' not in result
+
+
+def test_tooltip_css_class(styler):
+    # GH 21266
+    result = styler.set_tooltips(
+        DataFrame([["tooltip"]], index=["x"], columns=["A"]),
+        css_class="other-class",
+        props=[("color", "green")],
+    ).to_html()
+    assert "#T_ .other-class {\n  color: green;\n" in result
+    assert '#T_ #T__row0_col0 .other-class::after {\n  content: "tooltip";\n' in result
+
+    # GH 39563
+    result = styler.set_tooltips(  # set_tooltips overwrites previous
+        DataFrame([["tooltip"]], index=["x"], columns=["A"]),
+        css_class="another-class",
+        props="color:green;color:red;",
+    ).to_html()
+    assert "#T_ .another-class {\n  color: green;\n  color: red;\n}" in result
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_console.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_console.py
@ -0,0 +1,72 @@
+import locale
+
+import pytest
+
+from pandas._config import detect_console_encoding
+
+
+class MockEncoding:
+    """
+    Used to add a side effect when accessing the 'encoding' property. If the
+    side effect is a str in nature, the value will be returned. Otherwise, the
+    side effect should be an exception that will be raised.
+    """
+
+    def __init__(self, encoding) -> None:
+        super().__init__()
+        self.val = encoding
+
+    @property
+    def encoding(self):
+        return self.raise_or_return(self.val)
+
+    @staticmethod
+    def raise_or_return(val):
+        if isinstance(val, str):
+            return val
+        else:
+            raise val
+
+
+@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]])
+def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
+    # Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
+    # they have values filled.
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr(f"sys.{empty}", MockEncoding(""))
+        context.setattr(f"sys.{filled}", MockEncoding(filled))
+        assert detect_console_encoding() == filled
+
+
+@pytest.mark.parametrize("encoding", [AttributeError, OSError, "ascii"])
+def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr("locale.getpreferredencoding", lambda: "foo")
+        context.setattr("sys.stdout", MockEncoding(encoding))
+        assert detect_console_encoding() == "foo"
+
+
+@pytest.mark.parametrize(
+    "std,locale",
+    [
+        ["ascii", "ascii"],
+        ["ascii", locale.Error],
+        [AttributeError, "ascii"],
+        [AttributeError, locale.Error],
+        [OSError, "ascii"],
+        [OSError, locale.Error],
+    ],
+)
+def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
+    # When both the stdout/stdin encoding and locale preferred encoding checks
+    # fail (or return 'ascii', we should default to the sys default encoding.
+    # GH 21552
+    with monkeypatch.context() as context:
+        context.setattr(
+            "locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale)
+        )
+        context.setattr("sys.stdout", MockEncoding(std))
+        context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding")
+        assert detect_console_encoding() == "sysDefaultEncoding"
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_css.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_css.py
@ -0,0 +1,289 @@
+import pytest
+
+from pandas.errors import CSSWarning
+
+import pandas._testing as tm
+
+from pandas.io.formats.css import CSSResolver
+
+
+def assert_resolves(css, props, inherited=None):
+    resolve = CSSResolver()
+    actual = resolve(css, inherited=inherited)
+    assert props == actual
+
+
+def assert_same_resolution(css1, css2, inherited=None):
+    resolve = CSSResolver()
+    resolved1 = resolve(css1, inherited=inherited)
+    resolved2 = resolve(css2, inherited=inherited)
+    assert resolved1 == resolved2
+
+
+@pytest.mark.parametrize(
+    "name,norm,abnorm",
+    [
+        (
+            "whitespace",
+            "hello: world; foo: bar",
+            " \t hello \t :\n  world \n  ;  \n foo: \tbar\n\n",
+        ),
+        ("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"),
+        ("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"),
+        ("empty-list", "", ";"),
+    ],
+)
+def test_css_parse_normalisation(name, norm, abnorm):
+    assert_same_resolution(norm, abnorm)
+
+
+@pytest.mark.parametrize(
+    "invalid_css,remainder",
+    [
+        # No colon
+        ("hello-world", ""),
+        ("border-style: solid; hello-world", "border-style: solid"),
+        (
+            "border-style: solid; hello-world; font-weight: bold",
+            "border-style: solid; font-weight: bold",
+        ),
+        # Unclosed string fail
+        # Invalid size
+        ("font-size: blah", "font-size: 1em"),
+        ("font-size: 1a2b", "font-size: 1em"),
+        ("font-size: 1e5pt", "font-size: 1em"),
+        ("font-size: 1+6pt", "font-size: 1em"),
+        ("font-size: 1unknownunit", "font-size: 1em"),
+        ("font-size: 10", "font-size: 1em"),
+        ("font-size: 10 pt", "font-size: 1em"),
+        # Too many args
+        ("border-top: 1pt solid red green", "border-top: 1pt solid green"),
+    ],
+)
+def test_css_parse_invalid(invalid_css, remainder):
+    with tm.assert_produces_warning(CSSWarning):
+        assert_same_resolution(invalid_css, remainder)
+
+
+@pytest.mark.parametrize(
+    "shorthand,expansions",
+    [
+        ("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]),
+        ("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]),
+        (
+            "border-width",
+            [
+                "border-top-width",
+                "border-right-width",
+                "border-bottom-width",
+                "border-left-width",
+            ],
+        ),
+        (
+            "border-color",
+            [
+                "border-top-color",
+                "border-right-color",
+                "border-bottom-color",
+                "border-left-color",
+            ],
+        ),
+        (
+            "border-style",
+            [
+                "border-top-style",
+                "border-right-style",
+                "border-bottom-style",
+                "border-left-style",
+            ],
+        ),
+    ],
+)
+def test_css_side_shorthands(shorthand, expansions):
+    top, right, bottom, left = expansions
+
+    assert_resolves(
+        f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}
+    )
+
+    assert_resolves(
+        f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}
+    )
+
+    assert_resolves(
+        f"{shorthand}: 1pt 4pt 2pt",
+        {top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"},
+    )
+
+    assert_resolves(
+        f"{shorthand}: 1pt 4pt 2pt 0pt",
+        {top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"},
+    )
+
+    with tm.assert_produces_warning(CSSWarning):
+        assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {})
+
+
+@pytest.mark.parametrize(
+    "shorthand,sides",
+    [
+        ("border-top", ["top"]),
+        ("border-right", ["right"]),
+        ("border-bottom", ["bottom"]),
+        ("border-left", ["left"]),
+        ("border", ["top", "right", "bottom", "left"]),
+    ],
+)
+def test_css_border_shorthand_sides(shorthand, sides):
+    def create_border_dict(sides, color=None, style=None, width=None):
+        resolved = {}
+        for side in sides:
+            if color:
+                resolved[f"border-{side}-color"] = color
+            if style:
+                resolved[f"border-{side}-style"] = style
+            if width:
+                resolved[f"border-{side}-width"] = width
+        return resolved
+
+    assert_resolves(
+        f"{shorthand}: 1pt red solid", create_border_dict(sides, "red", "solid", "1pt")
+    )
+
+
+@pytest.mark.parametrize(
+    "prop, expected",
+    [
+        ("1pt red solid", ("red", "solid", "1pt")),
+        ("red 1pt solid", ("red", "solid", "1pt")),
+        ("red solid 1pt", ("red", "solid", "1pt")),
+        ("solid 1pt red", ("red", "solid", "1pt")),
+        ("red solid", ("red", "solid", "1.500000pt")),
+        # Note: color=black is not CSS conforming
+        # (See https://drafts.csswg.org/css-backgrounds/#border-shorthands)
+        ("1pt solid", ("black", "solid", "1pt")),
+        ("1pt red", ("red", "none", "1pt")),
+        ("red", ("red", "none", "1.500000pt")),
+        ("1pt", ("black", "none", "1pt")),
+        ("solid", ("black", "solid", "1.500000pt")),
+        # Sizes
+        ("1em", ("black", "none", "12pt")),
+    ],
+)
+def test_css_border_shorthands(prop, expected):
+    color, style, width = expected
+
+    assert_resolves(
+        f"border-left: {prop}",
+        {
+            "border-left-color": color,
+            "border-left-style": style,
+            "border-left-width": width,
+        },
+    )
+
+
+@pytest.mark.parametrize(
+    "style,inherited,equiv",
+    [
+        ("margin: 1px; margin: 2px", "", "margin: 2px"),
+        ("margin: 1px", "margin: 2px", "margin: 1px"),
+        ("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"),
+        (
+            "margin: 1px; margin-top: 2px",
+            "",
+            "margin-left: 1px; margin-right: 1px; "
+            "margin-bottom: 1px; margin-top: 2px",
+        ),
+        ("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"),
+        ("margin: 1px", "margin-top: 2px", "margin: 1px"),
+        (
+            "margin: 1px; margin-top: inherit",
+            "margin: 2px",
+            "margin: 1px; margin-top: 2px",
+        ),
+    ],
+)
+def test_css_precedence(style, inherited, equiv):
+    resolve = CSSResolver()
+    inherited_props = resolve(inherited)
+    style_props = resolve(style, inherited=inherited_props)
+    equiv_props = resolve(equiv)
+    assert style_props == equiv_props
+
+
+@pytest.mark.parametrize(
+    "style,equiv",
+    [
+        (
+            "margin: 1px; margin-top: inherit",
+            "margin-bottom: 1px; margin-right: 1px; margin-left: 1px",
+        ),
+        ("margin-top: inherit", ""),
+        ("margin-top: initial", ""),
+    ],
+)
+def test_css_none_absent(style, equiv):
+    assert_same_resolution(style, equiv)
+
+
+@pytest.mark.parametrize(
+    "size,resolved",
+    [
+        ("xx-small", "6pt"),
+        ("x-small", f"{7.5:f}pt"),
+        ("small", f"{9.6:f}pt"),
+        ("medium", "12pt"),
+        ("large", f"{13.5:f}pt"),
+        ("x-large", "18pt"),
+        ("xx-large", "24pt"),
+        ("8px", "6pt"),
+        ("1.25pc", "15pt"),
+        (".25in", "18pt"),
+        ("02.54cm", "72pt"),
+        ("25.4mm", "72pt"),
+        ("101.6q", "72pt"),
+        ("101.6q", "72pt"),
+    ],
+)
+@pytest.mark.parametrize("relative_to", [None, "16pt"])  # invariant to inherited size
+def test_css_absolute_font_size(size, relative_to, resolved):
+    if relative_to is None:
+        inherited = None
+    else:
+        inherited = {"font-size": relative_to}
+    assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
+
+
+@pytest.mark.parametrize(
+    "size,relative_to,resolved",
+    [
+        ("1em", None, "12pt"),
+        ("1.0em", None, "12pt"),
+        ("1.25em", None, "15pt"),
+        ("1em", "16pt", "16pt"),
+        ("1.0em", "16pt", "16pt"),
+        ("1.25em", "16pt", "20pt"),
+        ("1rem", "16pt", "12pt"),
+        ("1.0rem", "16pt", "12pt"),
+        ("1.25rem", "16pt", "15pt"),
+        ("100%", None, "12pt"),
+        ("125%", None, "15pt"),
+        ("100%", "16pt", "16pt"),
+        ("125%", "16pt", "20pt"),
+        ("2ex", None, "12pt"),
+        ("2.0ex", None, "12pt"),
+        ("2.50ex", None, "15pt"),
+        ("inherit", "16pt", "16pt"),
+        ("smaller", None, "10pt"),
+        ("smaller", "18pt", "15pt"),
+        ("larger", None, f"{14.4:f}pt"),
+        ("larger", "15pt", "18pt"),
+    ],
+)
+def test_css_relative_font_size(size, relative_to, resolved):
+    if relative_to is None:
+        inherited = None
+    else:
+        inherited = {"font-size": relative_to}
+    assert_resolves(f"font-size: {size}", {"font-size": resolved}, inherited=inherited)
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_eng_formatting.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_eng_formatting.py
@ -0,0 +1,254 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    reset_option,
+    set_eng_float_format,
+)
+
+from pandas.io.formats.format import EngFormatter
+
+
+@pytest.fixture(autouse=True)
+def reset_float_format():
+    yield
+    reset_option("display.float_format")
+
+
+class TestEngFormatter:
+    def test_eng_float_formatter2(self, float_frame):
+        df = float_frame
+        df.loc[5] = 0
+
+        set_eng_float_format()
+        repr(df)
+
+        set_eng_float_format(use_eng_prefix=True)
+        repr(df)
+
+        set_eng_float_format(accuracy=0)
+        repr(df)
+
+    def test_eng_float_formatter(self):
+        df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})
+
+        set_eng_float_format()
+        result = df.to_string()
+        expected = (
+            "             A\n"
+            "0    1.410E+00\n"
+            "1  141.000E+00\n"
+            "2   14.100E+03\n"
+            "3    1.410E+06"
+        )
+        assert result == expected
+
+        set_eng_float_format(use_eng_prefix=True)
+        result = df.to_string()
+        expected = "         A\n0    1.410\n1  141.000\n2  14.100k\n3   1.410M"
+        assert result == expected
+
+        set_eng_float_format(accuracy=0)
+        result = df.to_string()
+        expected = "         A\n0    1E+00\n1  141E+00\n2   14E+03\n3    1E+06"
+        assert result == expected
+
+    def compare(self, formatter, input, output):
+        formatted_input = formatter(input)
+        assert formatted_input == output
+
+    def compare_all(self, formatter, in_out):
+        """
+        Parameters:
+        -----------
+        formatter: EngFormatter under test
+        in_out: list of tuples. Each tuple = (number, expected_formatting)
+
+        It is tested if 'formatter(number) == expected_formatting'.
+        *number* should be >= 0 because formatter(-number) == fmt is also
+        tested. *fmt* is derived from *expected_formatting*
+        """
+        for input, output in in_out:
+            self.compare(formatter, input, output)
+            self.compare(formatter, -input, "-" + output[1:])
+
+    def test_exponents_with_eng_prefix(self):
+        formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
+        f = np.sqrt(2)
+        in_out = [
+            (f * 10**-24, " 1.414y"),
+            (f * 10**-23, " 14.142y"),
+            (f * 10**-22, " 141.421y"),
+            (f * 10**-21, " 1.414z"),
+            (f * 10**-20, " 14.142z"),
+            (f * 10**-19, " 141.421z"),
+            (f * 10**-18, " 1.414a"),
+            (f * 10**-17, " 14.142a"),
+            (f * 10**-16, " 141.421a"),
+            (f * 10**-15, " 1.414f"),
+            (f * 10**-14, " 14.142f"),
+            (f * 10**-13, " 141.421f"),
+            (f * 10**-12, " 1.414p"),
+            (f * 10**-11, " 14.142p"),
+            (f * 10**-10, " 141.421p"),
+            (f * 10**-9, " 1.414n"),
+            (f * 10**-8, " 14.142n"),
+            (f * 10**-7, " 141.421n"),
+            (f * 10**-6, " 1.414u"),
+            (f * 10**-5, " 14.142u"),
+            (f * 10**-4, " 141.421u"),
+            (f * 10**-3, " 1.414m"),
+            (f * 10**-2, " 14.142m"),
+            (f * 10**-1, " 141.421m"),
+            (f * 10**0, " 1.414"),
+            (f * 10**1, " 14.142"),
+            (f * 10**2, " 141.421"),
+            (f * 10**3, " 1.414k"),
+            (f * 10**4, " 14.142k"),
+            (f * 10**5, " 141.421k"),
+            (f * 10**6, " 1.414M"),
+            (f * 10**7, " 14.142M"),
+            (f * 10**8, " 141.421M"),
+            (f * 10**9, " 1.414G"),
+            (f * 10**10, " 14.142G"),
+            (f * 10**11, " 141.421G"),
+            (f * 10**12, " 1.414T"),
+            (f * 10**13, " 14.142T"),
+            (f * 10**14, " 141.421T"),
+            (f * 10**15, " 1.414P"),
+            (f * 10**16, " 14.142P"),
+            (f * 10**17, " 141.421P"),
+            (f * 10**18, " 1.414E"),
+            (f * 10**19, " 14.142E"),
+            (f * 10**20, " 141.421E"),
+            (f * 10**21, " 1.414Z"),
+            (f * 10**22, " 14.142Z"),
+            (f * 10**23, " 141.421Z"),
+            (f * 10**24, " 1.414Y"),
+            (f * 10**25, " 14.142Y"),
+            (f * 10**26, " 141.421Y"),
+        ]
+        self.compare_all(formatter, in_out)
+
+    def test_exponents_without_eng_prefix(self):
+        formatter = EngFormatter(accuracy=4, use_eng_prefix=False)
+        f = np.pi
+        in_out = [
+            (f * 10**-24, " 3.1416E-24"),
+            (f * 10**-23, " 31.4159E-24"),
+            (f * 10**-22, " 314.1593E-24"),
+            (f * 10**-21, " 3.1416E-21"),
+            (f * 10**-20, " 31.4159E-21"),
+            (f * 10**-19, " 314.1593E-21"),
+            (f * 10**-18, " 3.1416E-18"),
+            (f * 10**-17, " 31.4159E-18"),
+            (f * 10**-16, " 314.1593E-18"),
+            (f * 10**-15, " 3.1416E-15"),
+            (f * 10**-14, " 31.4159E-15"),
+            (f * 10**-13, " 314.1593E-15"),
+            (f * 10**-12, " 3.1416E-12"),
+            (f * 10**-11, " 31.4159E-12"),
+            (f * 10**-10, " 314.1593E-12"),
+            (f * 10**-9, " 3.1416E-09"),
+            (f * 10**-8, " 31.4159E-09"),
+            (f * 10**-7, " 314.1593E-09"),
+            (f * 10**-6, " 3.1416E-06"),
+            (f * 10**-5, " 31.4159E-06"),
+            (f * 10**-4, " 314.1593E-06"),
+            (f * 10**-3, " 3.1416E-03"),
+            (f * 10**-2, " 31.4159E-03"),
+            (f * 10**-1, " 314.1593E-03"),
+            (f * 10**0, " 3.1416E+00"),
+            (f * 10**1, " 31.4159E+00"),
+            (f * 10**2, " 314.1593E+00"),
+            (f * 10**3, " 3.1416E+03"),
+            (f * 10**4, " 31.4159E+03"),
+            (f * 10**5, " 314.1593E+03"),
+            (f * 10**6, " 3.1416E+06"),
+            (f * 10**7, " 31.4159E+06"),
+            (f * 10**8, " 314.1593E+06"),
+            (f * 10**9, " 3.1416E+09"),
+            (f * 10**10, " 31.4159E+09"),
+            (f * 10**11, " 314.1593E+09"),
+            (f * 10**12, " 3.1416E+12"),
+            (f * 10**13, " 31.4159E+12"),
+            (f * 10**14, " 314.1593E+12"),
+            (f * 10**15, " 3.1416E+15"),
+            (f * 10**16, " 31.4159E+15"),
+            (f * 10**17, " 314.1593E+15"),
+            (f * 10**18, " 3.1416E+18"),
+            (f * 10**19, " 31.4159E+18"),
+            (f * 10**20, " 314.1593E+18"),
+            (f * 10**21, " 3.1416E+21"),
+            (f * 10**22, " 31.4159E+21"),
+            (f * 10**23, " 314.1593E+21"),
+            (f * 10**24, " 3.1416E+24"),
+            (f * 10**25, " 31.4159E+24"),
+            (f * 10**26, " 314.1593E+24"),
+        ]
+        self.compare_all(formatter, in_out)
+
+    def test_rounding(self):
+        formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
+        in_out = [
+            (5.55555, " 5.556"),
+            (55.5555, " 55.556"),
+            (555.555, " 555.555"),
+            (5555.55, " 5.556k"),
+            (55555.5, " 55.556k"),
+            (555555, " 555.555k"),
+        ]
+        self.compare_all(formatter, in_out)
+
+        formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
+        in_out = [
+            (5.55555, " 5.6"),
+            (55.5555, " 55.6"),
+            (555.555, " 555.6"),
+            (5555.55, " 5.6k"),
+            (55555.5, " 55.6k"),
+            (555555, " 555.6k"),
+        ]
+        self.compare_all(formatter, in_out)
+
+        formatter = EngFormatter(accuracy=0, use_eng_prefix=True)
+        in_out = [
+            (5.55555, " 6"),
+            (55.5555, " 56"),
+            (555.555, " 556"),
+            (5555.55, " 6k"),
+            (55555.5, " 56k"),
+            (555555, " 556k"),
+        ]
+        self.compare_all(formatter, in_out)
+
+        formatter = EngFormatter(accuracy=3, use_eng_prefix=True)
+        result = formatter(0)
+        assert result == " 0.000"
+
+    def test_nan(self):
+        # Issue #11981
+
+        formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
+        result = formatter(np.nan)
+        assert result == "NaN"
+
+        df = DataFrame(
+            {
+                "a": [1.5, 10.3, 20.5],
+                "b": [50.3, 60.67, 70.12],
+                "c": [100.2, 101.33, 120.33],
+            }
+        )
+        pt = df.pivot_table(values="a", index="b", columns="c")
+        set_eng_float_format(accuracy=1)
+        result = pt.to_string()
+        assert "NaN" in result
+
+    def test_inf(self):
+        # Issue #11981
+
+        formatter = EngFormatter(accuracy=1, use_eng_prefix=True)
+        result = formatter(np.inf)
+        assert result == "inf"
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_format.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_format.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_ipython_compat.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_ipython_compat.py
@ -0,0 +1,90 @@
+import numpy as np
+
+import pandas._config.config as cf
+
+from pandas import (
+    DataFrame,
+    MultiIndex,
+)
+
+
+class TestTableSchemaRepr:
+    def test_publishes(self, ip):
+        ipython = ip.instance(config=ip.config)
+        df = DataFrame({"A": [1, 2]})
+        objects = [df["A"], df]  # dataframe / series
+        expected_keys = [
+            {"text/plain", "application/vnd.dataresource+json"},
+            {"text/plain", "text/html", "application/vnd.dataresource+json"},
+        ]
+
+        opt = cf.option_context("display.html.table_schema", True)
+        last_obj = None
+        for obj, expected in zip(objects, expected_keys):
+            last_obj = obj
+            with opt:
+                formatted = ipython.display_formatter.format(obj)
+            assert set(formatted[0].keys()) == expected
+
+        with_latex = cf.option_context("styler.render.repr", "latex")
+
+        with opt, with_latex:
+            formatted = ipython.display_formatter.format(last_obj)
+
+        expected = {
+            "text/plain",
+            "text/html",
+            "text/latex",
+            "application/vnd.dataresource+json",
+        }
+        assert set(formatted[0].keys()) == expected
+
+    def test_publishes_not_implemented(self, ip):
+        # column MultiIndex
+        # GH#15996
+        midx = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx
+        )
+
+        opt = cf.option_context("display.html.table_schema", True)
+
+        with opt:
+            formatted = ip.instance(config=ip.config).display_formatter.format(df)
+
+        expected = {"text/plain", "text/html"}
+        assert set(formatted[0].keys()) == expected
+
+    def test_config_on(self):
+        df = DataFrame({"A": [1, 2]})
+        with cf.option_context("display.html.table_schema", True):
+            result = df._repr_data_resource_()
+
+        assert result is not None
+
+    def test_config_default_off(self):
+        df = DataFrame({"A": [1, 2]})
+        with cf.option_context("display.html.table_schema", False):
+            result = df._repr_data_resource_()
+
+        assert result is None
+
+    def test_enable_data_resource_formatter(self, ip):
+        # GH#10491
+        formatters = ip.instance(config=ip.config).display_formatter.formatters
+        mimetype = "application/vnd.dataresource+json"
+
+        with cf.option_context("display.html.table_schema", True):
+            assert "application/vnd.dataresource+json" in formatters
+            assert formatters[mimetype].enabled
+
+        # still there, just disabled
+        assert "application/vnd.dataresource+json" in formatters
+        assert not formatters[mimetype].enabled
+
+        # able to re-set
+        with cf.option_context("display.html.table_schema", True):
+            assert "application/vnd.dataresource+json" in formatters
+            assert formatters[mimetype].enabled
+            # smoke test that it works
+            ip.instance(config=ip.config).display_formatter.format(cf)
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_printing.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_printing.py
@ -0,0 +1,129 @@
+# Note! This file is aimed specifically at pandas.io.formats.printing utility
+# functions, not the general printing of pandas objects.
+import string
+
+import pandas._config.config as cf
+
+from pandas.io.formats import printing
+
+
+def test_adjoin():
+    data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
+    expected = "a  dd  ggg\nb  ee  hhh\nc  ff  iii"
+
+    adjoined = printing.adjoin(2, *data)
+
+    assert adjoined == expected
+
+
+class TestPPrintThing:
+    def test_repr_binary_type(self):
+        letters = string.ascii_letters
+        try:
+            raw = bytes(letters, encoding=cf.get_option("display.encoding"))
+        except TypeError:
+            raw = bytes(letters)
+        b = str(raw.decode("utf-8"))
+        res = printing.pprint_thing(b, quote_strings=True)
+        assert res == repr(b)
+        res = printing.pprint_thing(b, quote_strings=False)
+        assert res == b
+
+    def test_repr_obeys_max_seq_limit(self):
+        with cf.option_context("display.max_seq_items", 2000):
+            assert len(printing.pprint_thing(list(range(1000)))) > 1000
+
+        with cf.option_context("display.max_seq_items", 5):
+            assert len(printing.pprint_thing(list(range(1000)))) < 100
+
+        with cf.option_context("display.max_seq_items", 1):
+            assert len(printing.pprint_thing(list(range(1000)))) < 9
+
+    def test_repr_set(self):
+        assert printing.pprint_thing({1}) == "{1}"
+
+
+class TestFormatBase:
+    def test_adjoin(self):
+        data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]]
+        expected = "a  dd  ggg\nb  ee  hhh\nc  ff  iii"
+
+        adjoined = printing.adjoin(2, *data)
+
+        assert adjoined == expected
+
+    def test_adjoin_unicode(self):
+        data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]]
+        expected = "あ  dd  ggg\nb  ええ  hhh\nc  ff  いいい"
+        adjoined = printing.adjoin(2, *data)
+        assert adjoined == expected
+
+        adj = printing._EastAsianTextAdjustment()
+
+        expected = """あ  dd    ggg
+b   ええ  hhh
+c   ff    いいい"""
+
+        adjoined = adj.adjoin(2, *data)
+        assert adjoined == expected
+        cols = adjoined.split("\n")
+        assert adj.len(cols[0]) == 13
+        assert adj.len(cols[1]) == 13
+        assert adj.len(cols[2]) == 16
+
+        expected = """あ       dd         ggg
+b        ええ       hhh
+c        ff         いいい"""
+
+        adjoined = adj.adjoin(7, *data)
+        assert adjoined == expected
+        cols = adjoined.split("\n")
+        assert adj.len(cols[0]) == 23
+        assert adj.len(cols[1]) == 23
+        assert adj.len(cols[2]) == 26
+
+    def test_justify(self):
+        adj = printing._EastAsianTextAdjustment()
+
+        def just(x, *args, **kwargs):
+            # wrapper to test single str
+            return adj.justify([x], *args, **kwargs)[0]
+
+        assert just("abc", 5, mode="left") == "abc  "
+        assert just("abc", 5, mode="center") == " abc "
+        assert just("abc", 5, mode="right") == "  abc"
+        assert just("abc", 5, mode="left") == "abc  "
+        assert just("abc", 5, mode="center") == " abc "
+        assert just("abc", 5, mode="right") == "  abc"
+
+        assert just("パンダ", 5, mode="left") == "パンダ"
+        assert just("パンダ", 5, mode="center") == "パンダ"
+        assert just("パンダ", 5, mode="right") == "パンダ"
+
+        assert just("パンダ", 10, mode="left") == "パンダ    "
+        assert just("パンダ", 10, mode="center") == "  パンダ  "
+        assert just("パンダ", 10, mode="right") == "    パンダ"
+
+    def test_east_asian_len(self):
+        adj = printing._EastAsianTextAdjustment()
+
+        assert adj.len("abc") == 3
+        assert adj.len("abc") == 3
+
+        assert adj.len("パンダ") == 6
+        assert adj.len("ﾊﾟﾝﾀﾞ") == 5
+        assert adj.len("パンダpanda") == 11
+        assert adj.len("ﾊﾟﾝﾀﾞpanda") == 10
+
+    def test_ambiguous_width(self):
+        adj = printing._EastAsianTextAdjustment()
+        assert adj.len("¡¡ab") == 4
+
+        with cf.option_context("display.unicode.ambiguous_as_wide", True):
+            adj = printing._EastAsianTextAdjustment()
+            assert adj.len("¡¡ab") == 6
+
+        data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]]
+        expected = "あ  dd    ggg \nb   ええ  ¡¡ab\nc   ff    いいい"
+        adjoined = adj.adjoin(2, *data)
+        assert adjoined == expected
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_csv.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_csv.py
@ -0,0 +1,758 @@
+import io
+import os
+import sys
+from zipfile import ZipFile
+
+from _csv import Error
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    compat,
+)
+import pandas._testing as tm
+
+
+class TestToCSV:
+    def test_to_csv_with_single_column(self):
+        # see gh-18676, https://bugs.python.org/issue32255
+        #
+        # Python's CSV library adds an extraneous '""'
+        # before the newline when the NaN-value is in
+        # the first row. Otherwise, only the newline
+        # character is added. This behavior is inconsistent
+        # and was patched in https://bugs.python.org/pull_request4672.
+        df1 = DataFrame([None, 1])
+        expected1 = """\
+""
+1.0
+"""
+        with tm.ensure_clean("test.csv") as path:
+            df1.to_csv(path, header=None, index=None)
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected1
+
+        df2 = DataFrame([1, None])
+        expected2 = """\
+1.0
+""
+"""
+        with tm.ensure_clean("test.csv") as path:
+            df2.to_csv(path, header=None, index=None)
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected2
+
+    def test_to_csv_default_encoding(self):
+        # GH17097
+        df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
+
+        with tm.ensure_clean("test.csv") as path:
+            # the default to_csv encoding is uft-8.
+            df.to_csv(path)
+            tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
+
+    def test_to_csv_quotechar(self):
+        df = DataFrame({"col": [1, 2]})
+        expected = """\
+"","col"
+"0","1"
+"1","2"
+"""
+
+        with tm.ensure_clean("test.csv") as path:
+            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+        expected = """\
+$$,$col$
+$0$,$1$
+$1$,$2$
+"""
+
+        with tm.ensure_clean("test.csv") as path:
+            df.to_csv(path, quoting=1, quotechar="$")
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+        with tm.ensure_clean("test.csv") as path:
+            with pytest.raises(TypeError, match="quotechar"):
+                df.to_csv(path, quoting=1, quotechar=None)
+
+    def test_to_csv_doublequote(self):
+        df = DataFrame({"col": ['a"a', '"bb"']})
+        expected = '''\
+"","col"
+"0","a""a"
+"1","""bb"""
+'''
+
+        with tm.ensure_clean("test.csv") as path:
+            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+        with tm.ensure_clean("test.csv") as path:
+            with pytest.raises(Error, match="escapechar"):
+                df.to_csv(path, doublequote=False)  # no escapechar set
+
+    def test_to_csv_escapechar(self):
+        df = DataFrame({"col": ['a"a', '"bb"']})
+        expected = """\
+"","col"
+"0","a\\"a"
+"1","\\"bb\\""
+"""
+
+        with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
+            df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+        df = DataFrame({"col": ["a,a", ",bb,"]})
+        expected = """\
+,col
+0,a\\,a
+1,\\,bb\\,
+"""
+
+        with tm.ensure_clean("test.csv") as path:
+            df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+    def test_csv_to_string(self):
+        df = DataFrame({"col": [1, 2]})
+        expected_rows = [",col", "0,1", "1,2"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv() == expected
+
+    def test_to_csv_decimal(self):
+        # see gh-781
+        df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]})
+
+        expected_rows = [",col1,col2,col3", "0,1,a,10.1"]
+        expected_default = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv() == expected_default
+
+        expected_rows = [";col1;col2;col3", "0;1;a;10,1"]
+        expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(decimal=",", sep=";") == expected_european_excel
+
+        expected_rows = [",col1,col2,col3", "0,1,a,10.10"]
+        expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(float_format="%.2f") == expected_float_format_default
+
+        expected_rows = [";col1;col2;col3", "0;1;a;10,10"]
+        expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert (
+            df.to_csv(decimal=",", sep=";", float_format="%.2f")
+            == expected_float_format
+        )
+
+        # see gh-11553: testing if decimal is taken into account for '0.0'
+        df = DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1})
+
+        expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(index=False, decimal="^") == expected
+
+        # same but for an index
+        assert df.set_index("a").to_csv(decimal="^") == expected
+
+        # same for a multi-index
+        assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected
+
+    def test_to_csv_float_format(self):
+        # testing if float_format is taken into account for the index
+        # GH 11553
+        df = DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1})
+
+        expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.set_index("a").to_csv(float_format="%.2f") == expected
+
+        # same for a multi-index
+        assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected
+
+    def test_to_csv_na_rep(self):
+        # see gh-11553
+        #
+        # Testing if NaN values are correctly represented in the index.
+        df = DataFrame({"a": [0, np.nan], "b": [0, 1], "c": [2, 3]})
+        expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index("a").to_csv(na_rep="_") == expected
+        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+
+        # now with an index containing only NaNs
+        df = DataFrame({"a": np.nan, "b": [0, 1], "c": [2, 3]})
+        expected_rows = ["a,b,c", "_,0,2", "_,1,3"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index("a").to_csv(na_rep="_") == expected
+        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+
+        # check if na_rep parameter does not break anything when no NaN
+        df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]})
+        expected_rows = ["a,b,c", "0,0,2", "0,1,3"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        assert df.set_index("a").to_csv(na_rep="_") == expected
+        assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected
+
+        csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ")
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
+        assert expected == csv
+
+    def test_to_csv_na_rep_nullable_string(self, nullable_string_dtype):
+        # GH 29975
+        # Make sure full na_rep shows up when a dtype is provided
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"])
+        csv = pd.Series(["a", pd.NA, "c"], dtype=nullable_string_dtype).to_csv(
+            na_rep="ZZZZZ"
+        )
+        assert expected == csv
+
+    def test_to_csv_date_format(self):
+        # GH 10209
+        df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")})
+        df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")})
+
+        expected_rows = [
+            ",A",
+            "0,2013-01-01 00:00:00",
+            "1,2013-01-01 00:00:01",
+            "2,2013-01-01 00:00:02",
+            "3,2013-01-01 00:00:03",
+            "4,2013-01-01 00:00:04",
+        ]
+        expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_sec.to_csv() == expected_default_sec
+
+        expected_rows = [
+            ",A",
+            "0,2013-01-01 00:00:00",
+            "1,2013-01-02 00:00:00",
+            "2,2013-01-03 00:00:00",
+            "3,2013-01-04 00:00:00",
+            "4,2013-01-05 00:00:00",
+        ]
+        expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day
+
+        expected_rows = [
+            ",A",
+            "0,2013-01-01",
+            "1,2013-01-01",
+            "2,2013-01-01",
+            "3,2013-01-01",
+            "4,2013-01-01",
+        ]
+        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
+
+        expected_rows = [
+            ",A",
+            "0,2013-01-01",
+            "1,2013-01-02",
+            "2,2013-01-03",
+            "3,2013-01-04",
+            "4,2013-01-05",
+        ]
+        expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df_day.to_csv() == expected_default_day
+        assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day
+
+        # see gh-7791
+        #
+        # Testing if date_format parameter is taken into account
+        # for multi-indexed DataFrames.
+        df_sec["B"] = 0
+        df_sec["C"] = 1
+
+        expected_rows = ["A,B,C", "2013-01-01,0,1.0"]
+        expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"])
+        assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec
+
+    def test_to_csv_different_datetime_formats(self):
+        # GH#21734
+        df = DataFrame(
+            {
+                "date": pd.to_datetime("1970-01-01"),
+                "datetime": pd.date_range("1970-01-01", periods=2, freq="h"),
+            }
+        )
+        expected_rows = [
+            "date,datetime",
+            "1970-01-01,1970-01-01 00:00:00",
+            "1970-01-01,1970-01-01 01:00:00",
+        ]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert df.to_csv(index=False) == expected
+
+    def test_to_csv_date_format_in_categorical(self):
+        # GH#40754
+        ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d"))
+        ser = ser.astype("category")
+        expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""'])
+        assert ser.to_csv(index=False) == expected
+
+        ser = pd.Series(
+            pd.date_range(
+                start="2021-03-27", freq="D", periods=1, tz="Europe/Berlin"
+            ).append(pd.DatetimeIndex([pd.NaT]))
+        )
+        ser = ser.astype("category")
+        assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected
+
+    def test_to_csv_float_ea_float_format(self):
+        # GH#45991
+        df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
+        df["a"] = df["a"].astype("Float64")
+        result = df.to_csv(index=False, float_format="%.5f")
+        expected = tm.convert_rows_list_to_csv_str(
+            ["a,b", "1.10000,c", "2.02000,c", ",c", "6.00001,c"]
+        )
+        assert result == expected
+
+    def test_to_csv_float_ea_no_float_format(self):
+        # GH#45991
+        df = DataFrame({"a": [1.1, 2.02, pd.NA, 6.000006], "b": "c"})
+        df["a"] = df["a"].astype("Float64")
+        result = df.to_csv(index=False)
+        expected = tm.convert_rows_list_to_csv_str(
+            ["a,b", "1.1,c", "2.02,c", ",c", "6.000006,c"]
+        )
+        assert result == expected
+
+    def test_to_csv_multi_index(self):
+        # see gh-6618
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]]))
+
+        exp_rows = [",1", ",2", "0,1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ["1", "2", "1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+        df = DataFrame(
+            [1],
+            columns=pd.MultiIndex.from_arrays([[1], [2]]),
+            index=pd.MultiIndex.from_arrays([[1], [2]]),
+        )
+
+        exp_rows = [",,1", ",,2", "1,2,1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ["1", "2", "1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+        df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]]))
+
+        exp_rows = [",foo", ",bar", "0,1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv() == exp
+
+        exp_rows = ["foo", "bar", "1"]
+        exp = tm.convert_rows_list_to_csv_str(exp_rows)
+        assert df.to_csv(index=False) == exp
+
+    @pytest.mark.parametrize(
+        "ind,expected",
+        [
+            (
+                pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]),
+                "x,data\n1.0,1\n",
+            ),
+            (
+                pd.MultiIndex(
+                    levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"]
+                ),
+                "x,y,data\n1.0,2.0,1\n",
+            ),
+        ],
+    )
+    def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series):
+        # see gh-19589
+        obj = frame_or_series(pd.Series([1], ind, name="data"))
+
+        result = obj.to_csv(lineterminator="\n", header=True)
+        assert result == expected
+
+    def test_to_csv_string_array_ascii(self):
+        # GH 10813
+        str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
+        df = DataFrame(str_array)
+        expected_ascii = """\
+,names
+0,"['foo', 'bar']"
+1,"['baz', 'qux']"
+"""
+        with tm.ensure_clean("str_test.csv") as path:
+            df.to_csv(path, encoding="ascii")
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected_ascii
+
+    def test_to_csv_string_array_utf8(self):
+        # GH 10813
+        str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
+        df = DataFrame(str_array)
+        expected_utf8 = """\
+,names
+0,"['foo', 'bar']"
+1,"['baz', 'qux']"
+"""
+        with tm.ensure_clean("unicode_test.csv") as path:
+            df.to_csv(path, encoding="utf-8")
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected_utf8
+
+    def test_to_csv_string_with_lf(self):
+        # GH 20353
+        data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
+        df = DataFrame(data)
+        with tm.ensure_clean("lf_test.csv") as path:
+            # case 1: The default line terminator(=os.linesep)(PR 21406)
+            os_linesep = os.linesep.encode("utf-8")
+            expected_noarg = (
+                b"int,str_lf"
+                + os_linesep
+                + b"1,abc"
+                + os_linesep
+                + b'2,"d\nef"'
+                + os_linesep
+                + b'3,"g\nh\n\ni"'
+                + os_linesep
+            )
+            df.to_csv(path, index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_noarg
+        with tm.ensure_clean("lf_test.csv") as path:
+            # case 2: LF as line terminator
+            expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
+            df.to_csv(path, lineterminator="\n", index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_lf
+        with tm.ensure_clean("lf_test.csv") as path:
+            # case 3: CRLF as line terminator
+            # 'lineterminator' should not change inner element
+            expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
+            df.to_csv(path, lineterminator="\r\n", index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_crlf
+
+    def test_to_csv_string_with_crlf(self):
+        # GH 20353
+        data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
+        df = DataFrame(data)
+        with tm.ensure_clean("crlf_test.csv") as path:
+            # case 1: The default line terminator(=os.linesep)(PR 21406)
+            os_linesep = os.linesep.encode("utf-8")
+            expected_noarg = (
+                b"int,str_crlf"
+                + os_linesep
+                + b"1,abc"
+                + os_linesep
+                + b'2,"d\r\nef"'
+                + os_linesep
+                + b'3,"g\r\nh\r\n\r\ni"'
+                + os_linesep
+            )
+            df.to_csv(path, index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_noarg
+        with tm.ensure_clean("crlf_test.csv") as path:
+            # case 2: LF as line terminator
+            expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
+            df.to_csv(path, lineterminator="\n", index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_lf
+        with tm.ensure_clean("crlf_test.csv") as path:
+            # case 3: CRLF as line terminator
+            # 'lineterminator' should not change inner element
+            expected_crlf = (
+                b"int,str_crlf\r\n"
+                b"1,abc\r\n"
+                b'2,"d\r\nef"\r\n'
+                b'3,"g\r\nh\r\n\r\ni"\r\n'
+            )
+            df.to_csv(path, lineterminator="\r\n", index=False)
+            with open(path, "rb") as f:
+                assert f.read() == expected_crlf
+
+    def test_to_csv_stdout_file(self, capsys):
+        # GH 21561
+        df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"])
+        expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"]
+        expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        df.to_csv(sys.stdout, encoding="ascii")
+        captured = capsys.readouterr()
+
+        assert captured.out == expected_ascii
+        assert not sys.stdout.closed
+
+    @pytest.mark.xfail(
+        compat.is_platform_windows(),
+        reason=(
+            "Especially in Windows, file stream should not be passed"
+            "to csv writer without newline='' option."
+            "(https://docs.python.org/3/library/csv.html#csv.writer)"
+        ),
+    )
+    def test_to_csv_write_to_open_file(self):
+        # GH 21696
+        df = DataFrame({"a": ["x", "y", "z"]})
+        expected = """\
+manual header
+x
+y
+z
+"""
+        with tm.ensure_clean("test.txt") as path:
+            with open(path, "w", encoding="utf-8") as f:
+                f.write("manual header\n")
+                df.to_csv(f, header=None, index=None)
+            with open(path, encoding="utf-8") as f:
+                assert f.read() == expected
+
+    def test_to_csv_write_to_open_file_with_newline_py3(self):
+        # see gh-21696
+        # see gh-20353
+        df = DataFrame({"a": ["x", "y", "z"]})
+        expected_rows = ["x", "y", "z"]
+        expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
+        with tm.ensure_clean("test.txt") as path:
+            with open(path, "w", newline="", encoding="utf-8") as f:
+                f.write("manual header\n")
+                df.to_csv(f, header=None, index=None)
+
+            with open(path, "rb") as f:
+                assert f.read() == bytes(expected, "utf-8")
+
+    @pytest.mark.parametrize("to_infer", [True, False])
+    @pytest.mark.parametrize("read_infer", [True, False])
+    def test_to_csv_compression(
+        self, compression_only, read_infer, to_infer, compression_to_extension
+    ):
+        # see gh-15008
+        compression = compression_only
+
+        # We'll complete file extension subsequently.
+        filename = "test."
+        filename += compression_to_extension[compression]
+
+        df = DataFrame({"A": [1]})
+
+        to_compression = "infer" if to_infer else compression
+        read_compression = "infer" if read_infer else compression
+
+        with tm.ensure_clean(filename) as path:
+            df.to_csv(path, compression=to_compression)
+            result = pd.read_csv(path, index_col=0, compression=read_compression)
+            tm.assert_frame_equal(result, df)
+
+    def test_to_csv_compression_dict(self, compression_only):
+        # GH 26023
+        method = compression_only
+        df = DataFrame({"ABC": [1]})
+        filename = "to_csv_compress_as_dict."
+        extension = {
+            "gzip": "gz",
+            "zstd": "zst",
+        }.get(method, method)
+        filename += extension
+        with tm.ensure_clean(filename) as path:
+            df.to_csv(path, compression={"method": method})
+            read_df = pd.read_csv(path, index_col=0)
+            tm.assert_frame_equal(read_df, df)
+
+    def test_to_csv_compression_dict_no_method_raises(self):
+        # GH 26023
+        df = DataFrame({"ABC": [1]})
+        compression = {"some_option": True}
+        msg = "must have key 'method'"
+
+        with tm.ensure_clean("out.zip") as path:
+            with pytest.raises(ValueError, match=msg):
+                df.to_csv(path, compression=compression)
+
+    @pytest.mark.parametrize("compression", ["zip", "infer"])
+    @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
+    def test_to_csv_zip_arguments(self, compression, archive_name):
+        # GH 26023
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean("to_csv_archive_name.zip") as path:
+            df.to_csv(
+                path, compression={"method": compression, "archive_name": archive_name}
+            )
+            with ZipFile(path) as zp:
+                assert len(zp.filelist) == 1
+                archived_file = zp.filelist[0].filename
+                assert archived_file == archive_name
+
+    @pytest.mark.parametrize(
+        "filename,expected_arcname",
+        [
+            ("archive.csv", "archive.csv"),
+            ("archive.tsv", "archive.tsv"),
+            ("archive.csv.zip", "archive.csv"),
+            ("archive.tsv.zip", "archive.tsv"),
+            ("archive.zip", "archive"),
+        ],
+    )
+    def test_to_csv_zip_infer_name(self, tmp_path, filename, expected_arcname):
+        # GH 39465
+        df = DataFrame({"ABC": [1]})
+        path = tmp_path / filename
+        df.to_csv(path, compression="zip")
+        with ZipFile(path) as zp:
+            assert len(zp.filelist) == 1
+            archived_file = zp.filelist[0].filename
+            assert archived_file == expected_arcname
+
+    @pytest.mark.parametrize("df_new_type", ["Int64"])
+    def test_to_csv_na_rep_long_string(self, df_new_type):
+        # see gh-25099
+        df = DataFrame({"c": [float("nan")] * 3})
+        df = df.astype(df_new_type)
+        expected_rows = ["c", "mynull", "mynull", "mynull"]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+
+        result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
+
+        assert expected == result
+
+    def test_to_csv_timedelta_precision(self):
+        # GH 6783
+        s = pd.Series([1, 1]).astype("timedelta64[ns]")
+        buf = io.StringIO()
+        s.to_csv(buf)
+        result = buf.getvalue()
+        expected_rows = [
+            ",0",
+            "0,0 days 00:00:00.000000001",
+            "1,0 days 00:00:00.000000001",
+        ]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert result == expected
+
+    def test_na_rep_truncated(self):
+        # https://github.com/pandas-dev/pandas/issues/31447
+        result = pd.Series(range(8, 12)).to_csv(na_rep="-")
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"])
+        assert result == expected
+
+        result = pd.Series([True, False]).to_csv(na_rep="nan")
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"])
+        assert result == expected
+
+        result = pd.Series([1.1, 2.2]).to_csv(na_rep=".")
+        expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"])
+        assert result == expected
+
+    @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
+    def test_to_csv_errors(self, errors):
+        # GH 22610
+        data = ["\ud800foo"]
+        ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
+        with tm.ensure_clean("test.csv") as path:
+            ser.to_csv(path, errors=errors)
+        # No use in reading back the data as it is not the same anymore
+        # due to the error handling
+
+    @pytest.mark.parametrize("mode", ["wb", "w"])
+    def test_to_csv_binary_handle(self, mode):
+        """
+        Binary file objects should work (if 'mode' contains a 'b') or even without
+        it in most cases.
+
+        GH 35058 and GH 19827
+        """
+        df = DataFrame(
+            1.1 * np.arange(120).reshape((30, 4)),
+            columns=Index(list("ABCD")),
+            index=Index([f"i-{i}" for i in range(30)]),
+        )
+        with tm.ensure_clean() as path:
+            with open(path, mode="w+b") as handle:
+                df.to_csv(handle, mode=mode)
+            tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+
+    @pytest.mark.parametrize("mode", ["wb", "w"])
+    def test_to_csv_encoding_binary_handle(self, mode):
+        """
+        Binary file objects should honor a specified encoding.
+
+        GH 23854 and GH 13068 with binary handles
+        """
+        # example from GH 23854
+        content = "a, b, 🐟".encode("utf-8-sig")
+        buffer = io.BytesIO(content)
+        df = pd.read_csv(buffer, encoding="utf-8-sig")
+
+        buffer = io.BytesIO()
+        df.to_csv(buffer, mode=mode, encoding="utf-8-sig", index=False)
+        buffer.seek(0)  # tests whether file handle wasn't closed
+        assert buffer.getvalue().startswith(content)
+
+        # example from GH 13068
+        with tm.ensure_clean() as path:
+            with open(path, "w+b") as handle:
+                DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
+
+                handle.seek(0)
+                assert handle.read().startswith(b'\xef\xbb\xbf""')
+
+
+def test_to_csv_iterative_compression_name(compression):
+    # GH 38714
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
+    )
+    with tm.ensure_clean() as path:
+        df.to_csv(path, compression=compression, chunksize=1)
+        tm.assert_frame_equal(
+            pd.read_csv(path, compression=compression, index_col=0), df
+        )
+
+
+def test_to_csv_iterative_compression_buffer(compression):
+    # GH 38714
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
+    )
+    with io.BytesIO() as buffer:
+        df.to_csv(buffer, compression=compression, chunksize=1)
+        buffer.seek(0)
+        tm.assert_frame_equal(
+            pd.read_csv(buffer, compression=compression, index_col=0), df
+        )
+        assert not buffer.closed
+
+
+def test_to_csv_pos_args_deprecation():
+    # GH-54229
+    df = DataFrame({"a": [1, 2, 3]})
+    msg = (
+        r"Starting with pandas version 3.0 all arguments of to_csv except for the "
+        r"argument 'path_or_buf' will be keyword-only."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        buffer = io.BytesIO()
+        df.to_csv(buffer, ";")
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_excel.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_excel.py
@ -0,0 +1,429 @@
+"""Tests formatting as writer-agnostic ExcelCells
+
+ExcelFormatter is tested implicitly in pandas/tests/io/excel
+"""
+import string
+
+import pytest
+
+from pandas.errors import CSSWarning
+
+import pandas._testing as tm
+
+from pandas.io.formats.excel import (
+    CssExcelCell,
+    CSSToExcelConverter,
+)
+
+
+@pytest.mark.parametrize(
+    "css,expected",
+    [
+        # FONT
+        # - name
+        ("font-family: foo,bar", {"font": {"name": "foo"}}),
+        ('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}),
+        ("font-family: foo,\nbar", {"font": {"name": "foo"}}),
+        ("font-family: foo, bar,    baz", {"font": {"name": "foo"}}),
+        ("font-family: bar, foo", {"font": {"name": "bar"}}),
+        ("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}),
+        ("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}),
+        ('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}),
+        ('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}),
+        # - family
+        ("font-family: serif", {"font": {"name": "serif", "family": 1}}),
+        ("font-family: Serif", {"font": {"name": "serif", "family": 1}}),
+        ("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}),
+        ("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}),
+        ("font-family: roman, sans serif", {"font": {"name": "roman"}}),
+        ("font-family: roman, sansserif", {"font": {"name": "roman"}}),
+        ("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}),
+        ("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}),
+        # - size
+        ("font-size: 1em", {"font": {"size": 12}}),
+        ("font-size: xx-small", {"font": {"size": 6}}),
+        ("font-size: x-small", {"font": {"size": 7.5}}),
+        ("font-size: small", {"font": {"size": 9.6}}),
+        ("font-size: medium", {"font": {"size": 12}}),
+        ("font-size: large", {"font": {"size": 13.5}}),
+        ("font-size: x-large", {"font": {"size": 18}}),
+        ("font-size: xx-large", {"font": {"size": 24}}),
+        ("font-size: 50%", {"font": {"size": 6}}),
+        # - bold
+        ("font-weight: 100", {"font": {"bold": False}}),
+        ("font-weight: 200", {"font": {"bold": False}}),
+        ("font-weight: 300", {"font": {"bold": False}}),
+        ("font-weight: 400", {"font": {"bold": False}}),
+        ("font-weight: normal", {"font": {"bold": False}}),
+        ("font-weight: lighter", {"font": {"bold": False}}),
+        ("font-weight: bold", {"font": {"bold": True}}),
+        ("font-weight: bolder", {"font": {"bold": True}}),
+        ("font-weight: 700", {"font": {"bold": True}}),
+        ("font-weight: 800", {"font": {"bold": True}}),
+        ("font-weight: 900", {"font": {"bold": True}}),
+        # - italic
+        ("font-style: italic", {"font": {"italic": True}}),
+        ("font-style: oblique", {"font": {"italic": True}}),
+        # - underline
+        ("text-decoration: underline", {"font": {"underline": "single"}}),
+        ("text-decoration: overline", {}),
+        ("text-decoration: none", {}),
+        # - strike
+        ("text-decoration: line-through", {"font": {"strike": True}}),
+        (
+            "text-decoration: underline line-through",
+            {"font": {"strike": True, "underline": "single"}},
+        ),
+        (
+            "text-decoration: underline; text-decoration: line-through",
+            {"font": {"strike": True}},
+        ),
+        # - color
+        ("color: red", {"font": {"color": "FF0000"}}),
+        ("color: #ff0000", {"font": {"color": "FF0000"}}),
+        ("color: #f0a", {"font": {"color": "FF00AA"}}),
+        # - shadow
+        ("text-shadow: none", {"font": {"shadow": False}}),
+        ("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}),
+        ("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}),
+        ("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}),
+        ("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}),
+        ("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}),
+        ("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}),
+        ("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}),
+        ("text-shadow: 0px -2em", {"font": {"shadow": True}}),
+        # FILL
+        # - color, fillType
+        (
+            "background-color: red",
+            {"fill": {"fgColor": "FF0000", "patternType": "solid"}},
+        ),
+        (
+            "background-color: #ff0000",
+            {"fill": {"fgColor": "FF0000", "patternType": "solid"}},
+        ),
+        (
+            "background-color: #f0a",
+            {"fill": {"fgColor": "FF00AA", "patternType": "solid"}},
+        ),
+        # BORDER
+        # - style
+        (
+            "border-style: solid",
+            {
+                "border": {
+                    "top": {"style": "medium"},
+                    "bottom": {"style": "medium"},
+                    "left": {"style": "medium"},
+                    "right": {"style": "medium"},
+                }
+            },
+        ),
+        (
+            "border-style: solid; border-width: thin",
+            {
+                "border": {
+                    "top": {"style": "thin"},
+                    "bottom": {"style": "thin"},
+                    "left": {"style": "thin"},
+                    "right": {"style": "thin"},
+                }
+            },
+        ),
+        (
+            "border-top-style: solid; border-top-width: thin",
+            {"border": {"top": {"style": "thin"}}},
+        ),
+        (
+            "border-top-style: solid; border-top-width: 1pt",
+            {"border": {"top": {"style": "thin"}}},
+        ),
+        ("border-top-style: solid", {"border": {"top": {"style": "medium"}}}),
+        (
+            "border-top-style: solid; border-top-width: medium",
+            {"border": {"top": {"style": "medium"}}},
+        ),
+        (
+            "border-top-style: solid; border-top-width: 2pt",
+            {"border": {"top": {"style": "medium"}}},
+        ),
+        (
+            "border-top-style: solid; border-top-width: thick",
+            {"border": {"top": {"style": "thick"}}},
+        ),
+        (
+            "border-top-style: solid; border-top-width: 4pt",
+            {"border": {"top": {"style": "thick"}}},
+        ),
+        (
+            "border-top-style: dotted",
+            {"border": {"top": {"style": "mediumDashDotDot"}}},
+        ),
+        (
+            "border-top-style: dotted; border-top-width: thin",
+            {"border": {"top": {"style": "dotted"}}},
+        ),
+        ("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}),
+        (
+            "border-top-style: dashed; border-top-width: thin",
+            {"border": {"top": {"style": "dashed"}}},
+        ),
+        ("border-top-style: double", {"border": {"top": {"style": "double"}}}),
+        # - color
+        (
+            "border-style: solid; border-color: #0000ff",
+            {
+                "border": {
+                    "top": {"style": "medium", "color": "0000FF"},
+                    "right": {"style": "medium", "color": "0000FF"},
+                    "bottom": {"style": "medium", "color": "0000FF"},
+                    "left": {"style": "medium", "color": "0000FF"},
+                }
+            },
+        ),
+        (
+            "border-top-style: double; border-top-color: blue",
+            {"border": {"top": {"style": "double", "color": "0000FF"}}},
+        ),
+        (
+            "border-top-style: solid; border-top-color: #06c",
+            {"border": {"top": {"style": "medium", "color": "0066CC"}}},
+        ),
+        (
+            "border-top-color: blue",
+            {"border": {"top": {"color": "0000FF", "style": "none"}}},
+        ),
+        # ALIGNMENT
+        # - horizontal
+        ("text-align: center", {"alignment": {"horizontal": "center"}}),
+        ("text-align: left", {"alignment": {"horizontal": "left"}}),
+        ("text-align: right", {"alignment": {"horizontal": "right"}}),
+        ("text-align: justify", {"alignment": {"horizontal": "justify"}}),
+        # - vertical
+        ("vertical-align: top", {"alignment": {"vertical": "top"}}),
+        ("vertical-align: text-top", {"alignment": {"vertical": "top"}}),
+        ("vertical-align: middle", {"alignment": {"vertical": "center"}}),
+        ("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}),
+        ("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}),
+        # - wrap_text
+        ("white-space: nowrap", {"alignment": {"wrap_text": False}}),
+        ("white-space: pre", {"alignment": {"wrap_text": False}}),
+        ("white-space: pre-line", {"alignment": {"wrap_text": False}}),
+        ("white-space: normal", {"alignment": {"wrap_text": True}}),
+        # NUMBER FORMAT
+        ("number-format: 0%", {"number_format": {"format_code": "0%"}}),
+        (
+            "number-format: 0§[Red](0)§-§@;",
+            {"number_format": {"format_code": "0;[red](0);-;@"}},  # GH 46152
+        ),
+    ],
+)
+def test_css_to_excel(css, expected):
+    convert = CSSToExcelConverter()
+    assert expected == convert(css)
+
+
+def test_css_to_excel_multiple():
+    convert = CSSToExcelConverter()
+    actual = convert(
+        """
+        font-weight: bold;
+        text-decoration: underline;
+        color: red;
+        border-width: thin;
+        text-align: center;
+        vertical-align: top;
+        unused: something;
+    """
+    )
+    assert {
+        "font": {"bold": True, "underline": "single", "color": "FF0000"},
+        "border": {
+            "top": {"style": "thin"},
+            "right": {"style": "thin"},
+            "bottom": {"style": "thin"},
+            "left": {"style": "thin"},
+        },
+        "alignment": {"horizontal": "center", "vertical": "top"},
+    } == actual
+
+
+@pytest.mark.parametrize(
+    "css,inherited,expected",
+    [
+        ("font-weight: bold", "", {"font": {"bold": True}}),
+        ("", "font-weight: bold", {"font": {"bold": True}}),
+        (
+            "font-weight: bold",
+            "font-style: italic",
+            {"font": {"bold": True, "italic": True}},
+        ),
+        ("font-style: normal", "font-style: italic", {"font": {"italic": False}}),
+        ("font-style: inherit", "", {}),
+        (
+            "font-style: normal; font-style: inherit",
+            "font-style: italic",
+            {"font": {"italic": True}},
+        ),
+    ],
+)
+def test_css_to_excel_inherited(css, inherited, expected):
+    convert = CSSToExcelConverter(inherited)
+    assert expected == convert(css)
+
+
+@pytest.mark.parametrize(
+    "input_color,output_color",
+    (
+        list(CSSToExcelConverter.NAMED_COLORS.items())
+        + [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()]
+        + [("#F0F", "FF00FF"), ("#ABC", "AABBCC")]
+    ),
+)
+def test_css_to_excel_good_colors(input_color, output_color):
+    # see gh-18392
+    css = (
+        f"border-top-color: {input_color}; "
+        f"border-right-color: {input_color}; "
+        f"border-bottom-color: {input_color}; "
+        f"border-left-color: {input_color}; "
+        f"background-color: {input_color}; "
+        f"color: {input_color}"
+    )
+
+    expected = {}
+
+    expected["fill"] = {"patternType": "solid", "fgColor": output_color}
+
+    expected["font"] = {"color": output_color}
+
+    expected["border"] = {
+        k: {"color": output_color, "style": "none"}
+        for k in ("top", "right", "bottom", "left")
+    }
+
+    with tm.assert_produces_warning(None):
+        convert = CSSToExcelConverter()
+        assert expected == convert(css)
+
+
+@pytest.mark.parametrize("input_color", [None, "not-a-color"])
+def test_css_to_excel_bad_colors(input_color):
+    # see gh-18392
+    css = (
+        f"border-top-color: {input_color}; "
+        f"border-right-color: {input_color}; "
+        f"border-bottom-color: {input_color}; "
+        f"border-left-color: {input_color}; "
+        f"background-color: {input_color}; "
+        f"color: {input_color}"
+    )
+
+    expected = {}
+
+    if input_color is not None:
+        expected["fill"] = {"patternType": "solid"}
+
+    with tm.assert_produces_warning(CSSWarning):
+        convert = CSSToExcelConverter()
+        assert expected == convert(css)
+
+
+def tests_css_named_colors_valid():
+    upper_hexs = set(map(str.upper, string.hexdigits))
+    for color in CSSToExcelConverter.NAMED_COLORS.values():
+        assert len(color) == 6 and all(c in upper_hexs for c in color)
+
+
+def test_css_named_colors_from_mpl_present():
+    mpl_colors = pytest.importorskip("matplotlib.colors")
+
+    pd_colors = CSSToExcelConverter.NAMED_COLORS
+    for name, color in mpl_colors.CSS4_COLORS.items():
+        assert name in pd_colors and pd_colors[name] == color[1:]
+
+
+@pytest.mark.parametrize(
+    "styles,expected",
+    [
+        ([("color", "green"), ("color", "red")], "color: red;"),
+        ([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"),
+        ([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"),
+    ],
+)
+def test_css_excel_cell_precedence(styles, expected):
+    """It applies favors latter declarations over former declarations"""
+    # See GH 47371
+    converter = CSSToExcelConverter()
+    converter._call_cached.cache_clear()
+    css_styles = {(0, 0): styles}
+    cell = CssExcelCell(
+        row=0,
+        col=0,
+        val="",
+        style=None,
+        css_styles=css_styles,
+        css_row=0,
+        css_col=0,
+        css_converter=converter,
+    )
+    converter._call_cached.cache_clear()
+
+    assert cell.style == converter(expected)
+
+
+@pytest.mark.parametrize(
+    "styles,cache_hits,cache_misses",
+    [
+        ([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1),
+        (
+            [
+                [("font-weight", "bold")],
+                [("font-weight", "normal"), ("font-weight", "bold")],
+            ],
+            1,
+            1,
+        ),
+        ([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1),
+        (
+            [
+                [("font-weight", "bold"), ("text-align", "center")],
+                [("font-weight", "bold"), ("text-align", "left")],
+            ],
+            0,
+            2,
+        ),
+        (
+            [
+                [("font-weight", "bold"), ("text-align", "center")],
+                [("font-weight", "bold"), ("text-align", "left")],
+                [("font-weight", "bold"), ("text-align", "center")],
+            ],
+            1,
+            2,
+        ),
+    ],
+)
+def test_css_excel_cell_cache(styles, cache_hits, cache_misses):
+    """It caches unique cell styles"""
+    # See GH 47371
+    converter = CSSToExcelConverter()
+    converter._call_cached.cache_clear()
+
+    css_styles = {(0, i): _style for i, _style in enumerate(styles)}
+    for css_row, css_col in css_styles:
+        CssExcelCell(
+            row=0,
+            col=0,
+            val="",
+            style=None,
+            css_styles=css_styles,
+            css_row=css_row,
+            css_col=css_col,
+            css_converter=converter,
+        )
+    cache_info = converter._call_cached.cache_info()
+    converter._call_cached.cache_clear()
+
+    assert cache_info.hits == cache_hits
+    assert cache_info.misses == cache_misses
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_html.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_html.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_latex.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_latex.py
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_markdown.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_markdown.py
@ -0,0 +1,106 @@
+from io import (
+    BytesIO,
+    StringIO,
+)
+
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+pytest.importorskip("tabulate")
+
+
+def test_simple():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf)
+    result = buf.getvalue()
+    assert (
+        result == "|    |   0 |\n|---:|----:|\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+    )
+
+
+def test_empty_frame():
+    buf = StringIO()
+    df = pd.DataFrame({"id": [], "first_name": [], "last_name": []}).set_index("id")
+    df.to_markdown(buf=buf)
+    result = buf.getvalue()
+    assert result == (
+        "| id   | first_name   | last_name   |\n"
+        "|------|--------------|-------------|"
+    )
+
+
+def test_other_tablefmt():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf, tablefmt="jira")
+    result = buf.getvalue()
+    assert result == "||    ||   0 ||\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+
+
+def test_other_headers():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf, headers=["foo", "bar"])
+    result = buf.getvalue()
+    assert result == (
+        "|   foo |   bar |\n|------:|------:|\n|     0 "
+        "|     1 |\n|     1 |     2 |\n|     2 |     3 |"
+    )
+
+
+def test_series():
+    buf = StringIO()
+    s = pd.Series([1, 2, 3], name="foo")
+    s.to_markdown(buf=buf)
+    result = buf.getvalue()
+    assert result == (
+        "|    |   foo |\n|---:|------:|\n|  0 |     1 "
+        "|\n|  1 |     2 |\n|  2 |     3 |"
+    )
+
+
+def test_no_buf():
+    df = pd.DataFrame([1, 2, 3])
+    result = df.to_markdown()
+    assert (
+        result == "|    |   0 |\n|---:|----:|\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+    )
+
+
+@pytest.mark.parametrize("index", [True, False])
+def test_index(index):
+    # GH 32667
+
+    df = pd.DataFrame([1, 2, 3])
+
+    result = df.to_markdown(index=index)
+
+    if index:
+        expected = (
+            "|    |   0 |\n|---:|----:|\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+        )
+    else:
+        expected = "|   0 |\n|----:|\n|   1 |\n|   2 |\n|   3 |"
+    assert result == expected
+
+
+def test_showindex_disallowed_in_kwargs():
+    # GH 32667; disallowing showindex in kwargs enforced in 2.0
+    df = pd.DataFrame([1, 2, 3])
+    with pytest.raises(ValueError, match="Pass 'index' instead of 'showindex"):
+        df.to_markdown(index=True, showindex=True)
+
+
+def test_markdown_pos_args_deprecatation():
+    # GH-54229
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    msg = (
+        r"Starting with pandas version 3.0 all arguments of to_markdown except for the "
+        r"argument 'buf' will be keyword-only."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        buffer = BytesIO()
+        df.to_markdown(buffer, "grid")
--- a/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_string.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/formats/test_to_string.py
--- a/lib/python3.11/site-packages/pandas/tests/io/generate_legacy_storage_files.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/generate_legacy_storage_files.py
@ -0,0 +1,350 @@
+"""
+self-contained to write legacy storage pickle files
+
+To use this script. Create an environment where you want
+generate pickles, say its for 0.20.3, with your pandas clone
+in ~/pandas
+
+. activate pandas_0.20.3
+cd ~/pandas/pandas
+
+$ python -m tests.io.generate_legacy_storage_files \
+    tests/io/data/legacy_pickle/0.20.3/ pickle
+
+This script generates a storage file for the current arch, system,
+and python version
+  pandas version: 0.20.3
+  output dir    : pandas/pandas/tests/io/data/legacy_pickle/0.20.3/
+  storage format: pickle
+created pickle file: 0.20.3_x86_64_darwin_3.5.2.pickle
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of pandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with main). These are then compared.
+
+If we have cases where we changed the signature (e.g. we renamed
+offset -> freq in Timestamp). Then we have to conditionally execute
+in the generate_legacy_storage_files.py to make it
+run under the older AND the newer version.
+
+"""
+
+from datetime import timedelta
+import os
+import pickle
+import platform as pl
+import sys
+
+# Remove script directory from path, otherwise Python will try to
+# import the JSON test directory as the json module
+sys.path.pop(0)
+
+import numpy as np
+
+import pandas
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    MultiIndex,
+    NaT,
+    Period,
+    RangeIndex,
+    Series,
+    Timestamp,
+    bdate_range,
+    date_range,
+    interval_range,
+    period_range,
+    timedelta_range,
+)
+from pandas.arrays import SparseArray
+
+from pandas.tseries.offsets import (
+    FY5253,
+    BusinessDay,
+    BusinessHour,
+    CustomBusinessDay,
+    DateOffset,
+    Day,
+    Easter,
+    Hour,
+    LastWeekOfMonth,
+    Minute,
+    MonthBegin,
+    MonthEnd,
+    QuarterBegin,
+    QuarterEnd,
+    SemiMonthBegin,
+    SemiMonthEnd,
+    Week,
+    WeekOfMonth,
+    YearBegin,
+    YearEnd,
+)
+
+
+def _create_sp_series():
+    nan = np.nan
+
+    # nan-based
+    arr = np.arange(15, dtype=np.float64)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    bseries = Series(SparseArray(arr, kind="block"))
+    bseries.name = "bseries"
+    return bseries
+
+
+def _create_sp_tsseries():
+    nan = np.nan
+
+    # nan-based
+    arr = np.arange(15, dtype=np.float64)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    date_index = bdate_range("1/1/2011", periods=len(arr))
+    bseries = Series(SparseArray(arr, kind="block"), index=date_index)
+    bseries.name = "btsseries"
+    return bseries
+
+
+def _create_sp_frame():
+    nan = np.nan
+
+    data = {
+        "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+        "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+        "C": np.arange(10).astype(np.int64),
+        "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan],
+    }
+
+    dates = bdate_range("1/1/2011", periods=10)
+    return DataFrame(data, index=dates).apply(SparseArray)
+
+
+def create_pickle_data():
+    """create the pickle data"""
+    data = {
+        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
+        "B": [0, 1, 0, 1, 0],
+        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
+        "D": date_range("1/1/2009", periods=5),
+        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
+    }
+
+    scalars = {"timestamp": Timestamp("20130101"), "period": Period("2012", "M")}
+
+    index = {
+        "int": Index(np.arange(10)),
+        "date": date_range("20130101", periods=10),
+        "period": period_range("2013-01-01", freq="M", periods=10),
+        "float": Index(np.arange(10, dtype=np.float64)),
+        "uint": Index(np.arange(10, dtype=np.uint64)),
+        "timedelta": timedelta_range("00:00:00", freq="30min", periods=10),
+        "string": Index(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
+    }
+
+    index["range"] = RangeIndex(10)
+
+    index["interval"] = interval_range(0, periods=10)
+
+    mi = {
+        "reg2": MultiIndex.from_tuples(
+            tuple(
+                zip(
+                    *[
+                        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
+                        ["one", "two", "one", "two", "one", "two", "one", "two"],
+                    ]
+                )
+            ),
+            names=["first", "second"],
+        )
+    }
+
+    series = {
+        "float": Series(data["A"]),
+        "int": Series(data["B"]),
+        "mixed": Series(data["E"]),
+        "ts": Series(
+            np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)
+        ),
+        "mi": Series(
+            np.arange(5).astype(np.float64),
+            index=MultiIndex.from_tuples(
+                tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]
+            ),
+        ),
+        "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]),
+        "cat": Series(Categorical(["foo", "bar", "baz"])),
+        "dt": Series(date_range("20130101", periods=5)),
+        "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")),
+        "period": Series([Period("2000Q1")] * 5),
+        "string": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
+    }
+
+    mixed_dup_df = DataFrame(data)
+    mixed_dup_df.columns = list("ABCDA")
+    frame = {
+        "float": DataFrame({"A": series["float"], "B": series["float"] + 1}),
+        "int": DataFrame({"A": series["int"], "B": series["int"] + 1}),
+        "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}),
+        "mi": DataFrame(
+            {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)},
+            index=MultiIndex.from_tuples(
+                tuple(
+                    zip(
+                        *[
+                            ["bar", "bar", "baz", "baz", "baz"],
+                            ["one", "two", "one", "two", "three"],
+                        ]
+                    )
+                ),
+                names=["first", "second"],
+            ),
+        ),
+        "dup": DataFrame(
+            np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]
+        ),
+        "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}),
+        "cat_and_float": DataFrame(
+            {
+                "A": Categorical(["foo", "bar", "baz"]),
+                "B": np.arange(3).astype(np.int64),
+            }
+        ),
+        "mixed_dup": mixed_dup_df,
+        "dt_mixed_tzs": DataFrame(
+            {
+                "A": Timestamp("20130102", tz="US/Eastern"),
+                "B": Timestamp("20130603", tz="CET"),
+            },
+            index=range(5),
+        ),
+        "dt_mixed2_tzs": DataFrame(
+            {
+                "A": Timestamp("20130102", tz="US/Eastern"),
+                "B": Timestamp("20130603", tz="CET"),
+                "C": Timestamp("20130603", tz="UTC"),
+            },
+            index=range(5),
+        ),
+        "string": DataFrame(
+            {
+                "A": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
+                "B": Series(["one", "two", "one", "two", "three"], dtype="string"),
+            }
+        ),
+    }
+
+    cat = {
+        "int8": Categorical(list("abcdefg")),
+        "int16": Categorical(np.arange(1000)),
+        "int32": Categorical(np.arange(10000)),
+    }
+
+    timestamp = {
+        "normal": Timestamp("2011-01-01"),
+        "nat": NaT,
+        "tz": Timestamp("2011-01-01", tz="US/Eastern"),
+    }
+
+    off = {
+        "DateOffset": DateOffset(years=1),
+        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
+        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
+        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
+        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
+        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
+        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
+        "MonthBegin": MonthBegin(1),
+        "MonthEnd": MonthEnd(1),
+        "QuarterBegin": QuarterBegin(1),
+        "QuarterEnd": QuarterEnd(1),
+        "Day": Day(1),
+        "YearBegin": YearBegin(1),
+        "YearEnd": YearEnd(1),
+        "Week": Week(1),
+        "Week_Tues": Week(2, normalize=False, weekday=1),
+        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
+        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
+        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
+        "Easter": Easter(),
+        "Hour": Hour(1),
+        "Minute": Minute(1),
+    }
+
+    return {
+        "series": series,
+        "frame": frame,
+        "index": index,
+        "scalars": scalars,
+        "mi": mi,
+        "sp_series": {"float": _create_sp_series(), "ts": _create_sp_tsseries()},
+        "sp_frame": {"float": _create_sp_frame()},
+        "cat": cat,
+        "timestamp": timestamp,
+        "offsets": off,
+    }
+
+
+def platform_name():
+    return "_".join(
+        [
+            str(pandas.__version__),
+            str(pl.machine()),
+            str(pl.system().lower()),
+            str(pl.python_version()),
+        ]
+    )
+
+
+def write_legacy_pickles(output_dir):
+    version = pandas.__version__
+
+    print(
+        "This script generates a storage file for the current arch, system, "
+        "and python version"
+    )
+    print(f"  pandas version: {version}")
+    print(f"  output dir    : {output_dir}")
+    print("  storage format: pickle")
+
+    pth = f"{platform_name()}.pickle"
+
+    with open(os.path.join(output_dir, pth), "wb") as fh:
+        pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
+
+    print(f"created pickle file: {pth}")
+
+
+def write_legacy_file():
+    # force our cwd to be the first searched
+    sys.path.insert(0, "")
+
+    if not 3 <= len(sys.argv) <= 4:
+        sys.exit(
+            "Specify output directory and storage type: generate_legacy_"
+            "storage_files.py <output_dir> <storage_type> "
+        )
+
+    output_dir = str(sys.argv[1])
+    storage_type = str(sys.argv[2])
+
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+
+    if storage_type == "pickle":
+        write_legacy_pickles(output_dir=output_dir)
+    else:
+        sys.exit("storage_type must be one of {'pickle'}")
+
+
+if __name__ == "__main__":
+    write_legacy_file()
--- a/lib/python3.11/site-packages/pandas/tests/io/json/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py
@ -0,0 +1,9 @@
+import pytest
+
+
+@pytest.fixture(params=["split", "records", "index", "columns", "values"])
+def orient(request):
+    """
+    Fixture for orients excluding the table format.
+    """
+    return request.param
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py
@ -0,0 +1,130 @@
+from io import (
+    BytesIO,
+    StringIO,
+)
+
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_compression_roundtrip(compression):
+    df = pd.DataFrame(
+        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
+        index=["A", "B"],
+        columns=["X", "Y", "Z"],
+    )
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
+
+        # explicitly ensure file was compressed.
+        with tm.decompress_file(path, compression) as fh:
+            result = fh.read().decode("utf8")
+            data = StringIO(result)
+        tm.assert_frame_equal(df, pd.read_json(data))
+
+
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
+    uncompressed_df = pd.read_json(uncompressed_path)
+
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
+    compressed_df = pd.read_json(compressed_path, compression="zip")
+
+    tm.assert_frame_equal(uncompressed_df, compressed_df)
+
+
+@td.skip_if_not_us_locale
+@pytest.mark.single_cpu
+def test_with_s3_url(compression, s3_public_bucket, s3so):
+    # Bucket created in tests/io/conftest.py
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        with open(path, "rb") as f:
+            s3_public_bucket.put_object(Key="test-1", Body=f)
+
+    roundtripped_df = pd.read_json(
+        f"s3://{s3_public_bucket.name}/test-1",
+        compression=compression,
+        storage_options=s3so,
+    )
+    tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_lines_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+        roundtripped_df = pd.read_json(path, lines=True, compression=compression)
+        tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_chunksize_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+
+        with pd.read_json(
+            path, lines=True, chunksize=1, compression=compression
+        ) as res:
+            roundtripped_df = pd.concat(res)
+        tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_write_unsupported_compression_type():
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(path, compression="unsupported")
+
+
+def test_read_unsupported_compression_type():
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            pd.read_json(path, compression="unsupported")
+
+
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+@pytest.mark.parametrize("to_infer", [True, False])
+@pytest.mark.parametrize("read_infer", [True, False])
+def test_to_json_compression(
+    compression_only, read_infer, to_infer, compression_to_extension, infer_string
+):
+    with pd.option_context("future.infer_string", infer_string):
+        # see gh-15008
+        compression = compression_only
+
+        # We'll complete file extension subsequently.
+        filename = "test."
+        filename += compression_to_extension[compression]
+
+        df = pd.DataFrame({"A": [1]})
+
+        to_compression = "infer" if to_infer else compression
+        read_compression = "infer" if read_infer else compression
+
+        with tm.ensure_clean(filename) as path:
+            df.to_json(path, compression=to_compression)
+            result = pd.read_json(path, compression=read_compression)
+            tm.assert_frame_equal(result, df)
+
+
+def test_to_json_compression_mode(compression):
+    # GH 39985 (read_json does not support user-provided binary files)
+    expected = pd.DataFrame({"A": [1]})
+
+    with BytesIO() as buffer:
+        expected.to_json(buffer, compression=compression)
+        # df = pd.read_json(buffer, compression=compression)
+        # tm.assert_frame_equal(expected, df)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py
@ -0,0 +1,21 @@
+"""
+Tests for the deprecated keyword arguments for `read_json`.
+"""
+from io import StringIO
+
+import pandas as pd
+import pandas._testing as tm
+
+from pandas.io.json import read_json
+
+
+def test_good_kwargs():
+    df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
+
+    with tm.assert_produces_warning(None):
+        data1 = StringIO(df.to_json(orient="split"))
+        tm.assert_frame_equal(df, read_json(data1, orient="split"))
+        data2 = StringIO(df.to_json(orient="columns"))
+        tm.assert_frame_equal(df, read_json(data2, orient="columns"))
+        data3 = StringIO(df.to_json(orient="index"))
+        tm.assert_frame_equal(df, read_json(data3, orient="index"))
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema.py
@ -0,0 +1,873 @@
+"""Tests for Table Schema integration."""
+from collections import OrderedDict
+from io import StringIO
+import json
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    PeriodDtype,
+)
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.json._table_schema import (
+    as_json_table_type,
+    build_table_schema,
+    convert_json_field_to_pandas_type,
+    convert_pandas_type_to_json_field,
+    set_default_names,
+)
+
+
+@pytest.fixture
+def df_schema():
+    return DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "c"],
+            "C": pd.date_range("2016-01-01", freq="d", periods=4),
+            "D": pd.timedelta_range("1h", periods=4, freq="min"),
+        },
+        index=pd.Index(range(4), name="idx"),
+    )
+
+
+@pytest.fixture
+def df_table():
+    return DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "c"],
+            "C": pd.date_range("2016-01-01", freq="d", periods=4),
+            "D": pd.timedelta_range("1h", periods=4, freq="min"),
+            "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
+            "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
+            "G": [1.0, 2.0, 3, 4.0],
+            "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
+        },
+        index=pd.Index(range(4), name="idx"),
+    )
+
+
+class TestBuildSchema:
+    def test_build_table_schema(self, df_schema, using_infer_string):
+        result = build_table_schema(df_schema, version=False)
+        expected = {
+            "fields": [
+                {"name": "idx", "type": "integer"},
+                {"name": "A", "type": "integer"},
+                {"name": "B", "type": "string"},
+                {"name": "C", "type": "datetime"},
+                {"name": "D", "type": "duration"},
+            ],
+            "primaryKey": ["idx"],
+        }
+        if using_infer_string:
+            expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"}
+        assert result == expected
+        result = build_table_schema(df_schema)
+        assert "pandas_version" in result
+
+    def test_series(self):
+        s = pd.Series([1, 2, 3], name="foo")
+        result = build_table_schema(s, version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "foo", "type": "integer"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+        result = build_table_schema(s)
+        assert "pandas_version" in result
+
+    def test_series_unnamed(self):
+        result = build_table_schema(pd.Series([1, 2, 3]), version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "values", "type": "integer"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+
+    def test_multiindex(self, df_schema, using_infer_string):
+        df = df_schema
+        idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)])
+        df.index = idx
+
+        result = build_table_schema(df, version=False)
+        expected = {
+            "fields": [
+                {"name": "level_0", "type": "string"},
+                {"name": "level_1", "type": "integer"},
+                {"name": "A", "type": "integer"},
+                {"name": "B", "type": "string"},
+                {"name": "C", "type": "datetime"},
+                {"name": "D", "type": "duration"},
+            ],
+            "primaryKey": ["level_0", "level_1"],
+        }
+        if using_infer_string:
+            expected["fields"][0] = {
+                "name": "level_0",
+                "type": "string",
+                "extDtype": "str",
+            }
+            expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"}
+        assert result == expected
+
+        df.index.names = ["idx0", None]
+        expected["fields"][0]["name"] = "idx0"
+        expected["primaryKey"] = ["idx0", "level_1"]
+        result = build_table_schema(df, version=False)
+        assert result == expected
+
+
+class TestTableSchemaType:
+    @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_data(self, int_type):
+        int_data = [1, 2, 3]
+        assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer"
+
+    @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_data(self, float_type):
+        float_data = [1.0, 2.0, 3.0]
+        assert (
+            as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number"
+        )
+
+    @pytest.mark.parametrize("bool_type", [bool, np.bool_])
+    def test_as_json_table_type_bool_data(self, bool_type):
+        bool_data = [True, False]
+        assert (
+            as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean"
+        )
+
+    @pytest.mark.parametrize(
+        "date_data",
+        [
+            pd.to_datetime(["2016"]),
+            pd.to_datetime(["2016"], utc=True),
+            pd.Series(pd.to_datetime(["2016"])),
+            pd.Series(pd.to_datetime(["2016"], utc=True)),
+            pd.period_range("2016", freq="Y", periods=3),
+        ],
+    )
+    def test_as_json_table_type_date_data(self, date_data):
+        assert as_json_table_type(date_data.dtype) == "datetime"
+
+    @pytest.mark.parametrize(
+        "str_data",
+        [pd.Series(["a", "b"], dtype=object), pd.Index(["a", "b"], dtype=object)],
+    )
+    def test_as_json_table_type_string_data(self, str_data):
+        assert as_json_table_type(str_data.dtype) == "string"
+
+    @pytest.mark.parametrize(
+        "cat_data",
+        [
+            pd.Categorical(["a"]),
+            pd.Categorical([1]),
+            pd.Series(pd.Categorical([1])),
+            pd.CategoricalIndex([1]),
+            pd.Categorical([1]),
+        ],
+    )
+    def test_as_json_table_type_categorical_data(self, cat_data):
+        assert as_json_table_type(cat_data.dtype) == "any"
+
+    # ------
+    # dtypes
+    # ------
+    @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_dtypes(self, int_dtype):
+        assert as_json_table_type(int_dtype) == "integer"
+
+    @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_dtypes(self, float_dtype):
+        assert as_json_table_type(float_dtype) == "number"
+
+    @pytest.mark.parametrize("bool_dtype", [bool, np.bool_])
+    def test_as_json_table_type_bool_dtypes(self, bool_dtype):
+        assert as_json_table_type(bool_dtype) == "boolean"
+
+    @pytest.mark.parametrize(
+        "date_dtype",
+        [
+            np.dtype("<M8[ns]"),
+            PeriodDtype("D"),
+            DatetimeTZDtype("ns", "US/Central"),
+        ],
+    )
+    def test_as_json_table_type_date_dtypes(self, date_dtype):
+        # TODO: datedate.date? datetime.time?
+        assert as_json_table_type(date_dtype) == "datetime"
+
+    @pytest.mark.parametrize("td_dtype", [np.dtype("<m8[ns]")])
+    def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
+        assert as_json_table_type(td_dtype) == "duration"
+
+    @pytest.mark.parametrize("str_dtype", [object])  # TODO(GH#14904) flesh out dtypes?
+    def test_as_json_table_type_string_dtypes(self, str_dtype):
+        assert as_json_table_type(str_dtype) == "string"
+
+    def test_as_json_table_type_categorical_dtypes(self):
+        assert as_json_table_type(pd.Categorical(["a"]).dtype) == "any"
+        assert as_json_table_type(CategoricalDtype()) == "any"
+
+
+class TestTableOrient:
+    def test_build_series(self):
+        s = pd.Series([1, 2], name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                (
+                    "data",
+                    [
+                        OrderedDict([("id", 0), ("a", 1)]),
+                        OrderedDict([("id", 1), ("a", 2)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_read_json_from_to_json_results(self):
+        # GH32383
+        df = DataFrame(
+            {
+                "_id": {"row_0": 0},
+                "category": {"row_0": "Goods"},
+                "recommender_id": {"row_0": 3},
+                "recommender_name_jp": {"row_0": "浦田"},
+                "recommender_name_en": {"row_0": "Urata"},
+                "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
+                "name_en": {"row_0": "Hakata Dolls Matsuo"},
+            }
+        )
+
+        result1 = pd.read_json(StringIO(df.to_json()))
+        result2 = DataFrame.from_dict(json.loads(df.to_json()))
+        tm.assert_frame_equal(result1, df)
+        tm.assert_frame_equal(result2, df)
+
+    def test_to_json(self, df_table, using_infer_string):
+        df = df_table
+        df.index.name = "idx"
+        result = df.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "idx", "type": "integer"},
+            {"name": "A", "type": "integer"},
+            {"name": "B", "type": "string"},
+            {"name": "C", "type": "datetime"},
+            {"name": "D", "type": "duration"},
+            {
+                "constraints": {"enum": ["a", "b", "c"]},
+                "name": "E",
+                "ordered": False,
+                "type": "any",
+            },
+            {
+                "constraints": {"enum": ["a", "b", "c"]},
+                "name": "F",
+                "ordered": True,
+                "type": "any",
+            },
+            {"name": "G", "type": "number"},
+            {"name": "H", "type": "datetime", "tz": "US/Central"},
+        ]
+
+        if using_infer_string:
+            fields[2] = {"name": "B", "type": "string", "extDtype": "str"}
+
+        schema = {"fields": fields, "primaryKey": ["idx"]}
+        data = [
+            OrderedDict(
+                [
+                    ("idx", 0),
+                    ("A", 1),
+                    ("B", "a"),
+                    ("C", "2016-01-01T00:00:00.000"),
+                    ("D", "P0DT1H0M0S"),
+                    ("E", "a"),
+                    ("F", "a"),
+                    ("G", 1.0),
+                    ("H", "2016-01-01T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 1),
+                    ("A", 2),
+                    ("B", "b"),
+                    ("C", "2016-01-02T00:00:00.000"),
+                    ("D", "P0DT1H1M0S"),
+                    ("E", "b"),
+                    ("F", "b"),
+                    ("G", 2.0),
+                    ("H", "2016-01-02T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 2),
+                    ("A", 3),
+                    ("B", "c"),
+                    ("C", "2016-01-03T00:00:00.000"),
+                    ("D", "P0DT1H2M0S"),
+                    ("E", "c"),
+                    ("F", "c"),
+                    ("G", 3.0),
+                    ("H", "2016-01-03T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 3),
+                    ("A", 4),
+                    ("B", "c"),
+                    ("C", "2016-01-04T00:00:00.000"),
+                    ("D", "P0DT1H3M0S"),
+                    ("E", "c"),
+                    ("F", "c"),
+                    ("G", 4.0),
+                    ("H", "2016-01-04T06:00:00.000Z"),
+                ]
+            ),
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_to_json_float_index(self):
+        data = pd.Series(1, index=[1.0, 2.0])
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        expected = OrderedDict(
+            [
+                (
+                    "schema",
+                    {
+                        "fields": [
+                            {"name": "index", "type": "number"},
+                            {"name": "values", "type": "integer"},
+                        ],
+                        "primaryKey": ["index"],
+                    },
+                ),
+                (
+                    "data",
+                    [
+                        OrderedDict([("index", 1.0), ("values", 1)]),
+                        OrderedDict([("index", 2.0), ("values", 1)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_to_json_period_index(self):
+        idx = pd.period_range("2016", freq="Q-JAN", periods=2)
+        data = pd.Series(1, idx)
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"freq": "QE-JAN", "name": "index", "type": "datetime"},
+            {"name": "values", "type": "integer"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["index"]}
+        data = [
+            OrderedDict([("index", "2015-11-01T00:00:00.000"), ("values", 1)]),
+            OrderedDict([("index", "2016-02-01T00:00:00.000"), ("values", 1)]),
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_to_json_categorical_index(self):
+        data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        expected = OrderedDict(
+            [
+                (
+                    "schema",
+                    {
+                        "fields": [
+                            {
+                                "name": "index",
+                                "type": "any",
+                                "constraints": {"enum": ["a", "b"]},
+                                "ordered": False,
+                            },
+                            {"name": "values", "type": "integer"},
+                        ],
+                        "primaryKey": ["index"],
+                    },
+                ),
+                (
+                    "data",
+                    [
+                        OrderedDict([("index", "a"), ("values", 1)]),
+                        OrderedDict([("index", "b"), ("values", 1)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_date_format_raises(self, df_table):
+        msg = (
+            "Trying to write with `orient='table'` and `date_format='epoch'`. Table "
+            "Schema requires dates to be formatted with `date_format='iso'`"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df_table.to_json(orient="table", date_format="epoch")
+
+        # others work
+        df_table.to_json(orient="table", date_format="iso")
+        df_table.to_json(orient="table")
+
+    def test_convert_pandas_type_to_json_field_int(self, index_or_series):
+        kind = index_or_series
+        data = [1, 2, 3]
+        result = convert_pandas_type_to_json_field(kind(data, name="name"))
+        expected = {"name": "name", "type": "integer"}
+        assert result == expected
+
+    def test_convert_pandas_type_to_json_field_float(self, index_or_series):
+        kind = index_or_series
+        data = [1.0, 2.0, 3.0]
+        result = convert_pandas_type_to_json_field(kind(data, name="name"))
+        expected = {"name": "name", "type": "number"}
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
+    )
+    @pytest.mark.parametrize("wrapper", [None, pd.Series])
+    def test_convert_pandas_type_to_json_field_datetime(
+        self, dt_args, extra_exp, wrapper
+    ):
+        data = [1.0, 2.0, 3.0]
+        data = pd.to_datetime(data, **dt_args)
+        if wrapper is pd.Series:
+            data = pd.Series(data, name="values")
+        result = convert_pandas_type_to_json_field(data)
+        expected = {"name": "values", "type": "datetime"}
+        expected.update(extra_exp)
+        assert result == expected
+
+    def test_convert_pandas_type_to_json_period_range(self):
+        arr = pd.period_range("2016", freq="Y-DEC", periods=4)
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {"name": "values", "type": "datetime", "freq": "YE-DEC"}
+        assert result == expected
+
+    @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
+    @pytest.mark.parametrize("ordered", [True, False])
+    def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
+        data = ["a", "b", "c"]
+        if kind is pd.Categorical:
+            arr = pd.Series(kind(data, ordered=ordered), name="cats")
+        elif kind is pd.CategoricalIndex:
+            arr = kind(data, ordered=ordered, name="cats")
+
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {
+            "name": "cats",
+            "type": "any",
+            "constraints": {"enum": data},
+            "ordered": ordered,
+        }
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "inp,exp",
+        [
+            ({"type": "integer"}, "int64"),
+            ({"type": "number"}, "float64"),
+            ({"type": "boolean"}, "bool"),
+            ({"type": "duration"}, "timedelta64"),
+            ({"type": "datetime"}, "datetime64[ns]"),
+            ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
+            ({"type": "any"}, "object"),
+            (
+                {
+                    "type": "any",
+                    "constraints": {"enum": ["a", "b", "c"]},
+                    "ordered": False,
+                },
+                CategoricalDtype(categories=["a", "b", "c"], ordered=False),
+            ),
+            (
+                {
+                    "type": "any",
+                    "constraints": {"enum": ["a", "b", "c"]},
+                    "ordered": True,
+                },
+                CategoricalDtype(categories=["a", "b", "c"], ordered=True),
+            ),
+            ({"type": "string"}, None),
+        ],
+    )
+    def test_convert_json_field_to_pandas_type(self, inp, exp):
+        field = {"name": "foo"}
+        field.update(inp)
+        assert convert_json_field_to_pandas_type(field) == exp
+
+    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
+    def test_convert_json_field_to_pandas_type_raises(self, inp):
+        field = {"type": inp}
+        with pytest.raises(
+            ValueError, match=f"Unsupported or invalid field type: {inp}"
+        ):
+            convert_json_field_to_pandas_type(field)
+
+    def test_categorical(self):
+        s = pd.Series(pd.Categorical(["a", "b", "a"]))
+        s.index.name = "idx"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "idx", "type": "integer"},
+            {
+                "constraints": {"enum": ["a", "b"]},
+                "name": "values",
+                "ordered": False,
+                "type": "any",
+            },
+        ]
+
+        expected = OrderedDict(
+            [
+                ("schema", {"fields": fields, "primaryKey": ["idx"]}),
+                (
+                    "data",
+                    [
+                        OrderedDict([("idx", 0), ("values", "a")]),
+                        OrderedDict([("idx", 1), ("values", "b")]),
+                        OrderedDict([("idx", 2), ("values", "a")]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "idx,nm,prop",
+        [
+            (pd.Index([1]), "index", "name"),
+            (pd.Index([1], name="myname"), "myname", "name"),
+            (
+                pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
+                ["level_0", "level_1"],
+                "names",
+            ),
+            (
+                pd.MultiIndex.from_product(
+                    [("a", "b"), ("c", "d")], names=["n1", "n2"]
+                ),
+                ["n1", "n2"],
+                "names",
+            ),
+            (
+                pd.MultiIndex.from_product(
+                    [("a", "b"), ("c", "d")], names=["n1", None]
+                ),
+                ["n1", "level_1"],
+                "names",
+            ),
+        ],
+    )
+    def test_set_names_unset(self, idx, nm, prop):
+        data = pd.Series(1, idx)
+        result = set_default_names(data)
+        assert getattr(result.index, prop) == nm
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            pd.Index([], name="index"),
+            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
+            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
+        ],
+    )
+    def test_warns_non_roundtrippable_names(self, idx):
+        # GH 19130
+        df = DataFrame(index=idx)
+        df.index.name = "index"
+        with tm.assert_produces_warning():
+            set_default_names(df)
+
+    def test_timestamp_in_columns(self):
+        df = DataFrame(
+            [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
+        )
+        result = df.to_json(orient="table")
+        js = json.loads(result)
+        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000"
+        assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
+
+    @pytest.mark.parametrize(
+        "case",
+        [
+            pd.Series([1], index=pd.Index([1], name="a"), name="a"),
+            DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
+            DataFrame(
+                {"A": [1]},
+                index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
+            ),
+        ],
+    )
+    def test_overlapping_names(self, case):
+        with pytest.raises(ValueError, match="Overlapping"):
+            case.to_json(orient="table")
+
+    def test_mi_falsey_name(self):
+        # GH 16203
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((4, 4)),
+            index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]),
+        )
+        result = [x["name"] for x in build_table_schema(df)["fields"]]
+        assert result == ["level_0", "level_1", 0, 1, 2, 3]
+
+
+class TestTableOrientReader:
+    @pytest.mark.parametrize(
+        "index_nm",
+        [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"ints": [1, 2, 3, 4]},
+            {"objects": ["a", "b", "c", "d"]},
+            {"objects": ["1", "2", "3", "4"]},
+            {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
+            {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
+            {
+                "ordered_cats": pd.Series(
+                    pd.Categorical(["a", "b", "c", "c"], ordered=True)
+                )
+            },
+            {"floats": [1.0, 2.0, 3.0, 4.0]},
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"bools": [True, False, False, True]},
+            {
+                "timezones": pd.date_range(
+                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                )  # added in # GH 35973
+            },
+        ],
+    )
+    def test_read_json_table_orient(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
+    @pytest.mark.parametrize(
+        "vals",
+        [{"timedeltas": pd.timedelta_range("1h", periods=4, freq="min")}],
+    )
+    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        with pytest.raises(NotImplementedError, match="can not yet read "):
+            pd.read_json(out, orient="table")
+
+    @pytest.mark.parametrize(
+        "index_nm",
+        [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"ints": [1, 2, 3, 4]},
+            {"objects": ["a", "b", "c", "d"]},
+            {"objects": ["1", "2", "3", "4"]},
+            {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
+            {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
+            {
+                "ordered_cats": pd.Series(
+                    pd.Categorical(["a", "b", "c", "c"], ordered=True)
+                )
+            },
+            {"floats": [1.0, 2.0, 3.0, 4.0]},
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"bools": [True, False, False, True]},
+            {
+                "timezones": pd.date_range(
+                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                )  # added in # GH 35973
+            },
+        ],
+    )
+    def test_read_json_table_period_orient(self, index_nm, vals, recwarn):
+        df = DataFrame(
+            vals,
+            index=pd.Index(
+                (pd.Period(f"2022Q{q}") for q in range(1, 5)), name=index_nm
+            ),
+        )
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            pd.Index(range(4)),
+            pd.date_range(
+                "2020-08-30",
+                freq="d",
+                periods=4,
+            )._with_freq(None),
+            pd.date_range(
+                "2020-08-30", freq="d", periods=4, tz="US/Central"
+            )._with_freq(None),
+            pd.MultiIndex.from_product(
+                [
+                    pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
+                    ["x", "y"],
+                ],
+            ),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
+            {
+                "timezones": pd.date_range(
+                    "2020-08-30", freq="d", periods=4, tz="Europe/London"
+                )
+            },
+        ],
+    )
+    def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
+        # GH 35973
+        df = DataFrame(vals, index=idx)
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    def test_comprehensive(self):
+        df = DataFrame(
+            {
+                "A": [1, 2, 3, 4],
+                "B": ["a", "b", "c", "c"],
+                "C": pd.date_range("2016-01-01", freq="d", periods=4),
+                # 'D': pd.timedelta_range('1h', periods=4, freq='min'),
+                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
+                "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
+                "G": [1.1, 2.2, 3.3, 4.4],
+                "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
+                "I": [True, False, False, True],
+            },
+            index=pd.Index(range(4), name="idx"),
+        )
+
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize(
+        "index_names",
+        [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"], ["index", "foo"]],
+    )
+    def test_multiindex(self, index_names):
+        # GH 18912
+        df = DataFrame(
+            [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]],
+            index=[["A", "B"], ["Null", "Eins"]],
+            columns=["Aussprache", "Griechisch", "Args"],
+        )
+        df.index.names = index_names
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    def test_empty_frame_roundtrip(self):
+        # GH 21287
+        df = DataFrame(columns=["a", "b", "c"])
+        expected = df.copy()
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(expected, result)
+
+    def test_read_json_orient_table_old_schema_version(self):
+        df_json = """
+        {
+            "schema":{
+                "fields":[
+                    {"name":"index","type":"integer"},
+                    {"name":"a","type":"string"}
+                ],
+                "primaryKey":["index"],
+                "pandas_version":"0.20.0"
+            },
+            "data":[
+                {"index":0,"a":1},
+                {"index":1,"a":2.0},
+                {"index":2,"a":"s"}
+            ]
+        }
+        """
+        expected = DataFrame({"a": [1, 2.0, "s"]})
+        result = pd.read_json(StringIO(df_json), orient="table")
+        tm.assert_frame_equal(expected, result)
+
+    @pytest.mark.parametrize("freq", ["M", "2M", "Q", "2Q", "Y", "2Y"])
+    def test_read_json_table_orient_period_depr_freq(self, freq, recwarn):
+        # GH#9586
+        df = DataFrame(
+            {"ints": [1, 2]},
+            index=pd.PeriodIndex(["2020-01", "2021-06"], freq=freq),
+        )
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
@ -0,0 +1,317 @@
+"""Tests for ExtensionDtype Table Schema integration."""
+
+from collections import OrderedDict
+import datetime as dt
+import decimal
+from io import StringIO
+import json
+
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    Index,
+    array,
+    read_json,
+)
+import pandas._testing as tm
+from pandas.core.arrays.integer import Int64Dtype
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.series import Series
+from pandas.tests.extension.date import (
+    DateArray,
+    DateDtype,
+)
+from pandas.tests.extension.decimal.array import (
+    DecimalArray,
+    DecimalDtype,
+)
+
+from pandas.io.json._table_schema import (
+    as_json_table_type,
+    build_table_schema,
+)
+
+
+class TestBuildSchema:
+    def test_build_table_schema(self):
+        df = DataFrame(
+            {
+                "A": DateArray([dt.date(2021, 10, 10)]),
+                "B": DecimalArray([decimal.Decimal(10)]),
+                "C": array(["pandas"], dtype="string"),
+                "D": array([10], dtype="Int64"),
+            }
+        )
+        result = build_table_schema(df, version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "A", "type": "any", "extDtype": "DateDtype"},
+                {"name": "B", "type": "number", "extDtype": "decimal"},
+                {"name": "C", "type": "string", "extDtype": "string"},
+                {"name": "D", "type": "integer", "extDtype": "Int64"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+        result = build_table_schema(df)
+        assert "pandas_version" in result
+
+
+class TestTableSchemaType:
+    @pytest.mark.parametrize(
+        "date_data",
+        [
+            DateArray([dt.date(2021, 10, 10)]),
+            DateArray(dt.date(2021, 10, 10)),
+            Series(DateArray(dt.date(2021, 10, 10))),
+        ],
+    )
+    def test_as_json_table_type_ext_date_array_dtype(self, date_data):
+        assert as_json_table_type(date_data.dtype) == "any"
+
+    def test_as_json_table_type_ext_date_dtype(self):
+        assert as_json_table_type(DateDtype()) == "any"
+
+    @pytest.mark.parametrize(
+        "decimal_data",
+        [
+            DecimalArray([decimal.Decimal(10)]),
+            Series(DecimalArray([decimal.Decimal(10)])),
+        ],
+    )
+    def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):
+        assert as_json_table_type(decimal_data.dtype) == "number"
+
+    def test_as_json_table_type_ext_decimal_dtype(self):
+        assert as_json_table_type(DecimalDtype()) == "number"
+
+    @pytest.mark.parametrize(
+        "string_data",
+        [
+            array(["pandas"], dtype="string"),
+            Series(array(["pandas"], dtype="string")),
+        ],
+    )
+    def test_as_json_table_type_ext_string_array_dtype(self, string_data):
+        assert as_json_table_type(string_data.dtype) == "string"
+
+    def test_as_json_table_type_ext_string_dtype(self):
+        assert as_json_table_type(StringDtype()) == "string"
+
+    @pytest.mark.parametrize(
+        "integer_data",
+        [
+            array([10], dtype="Int64"),
+            Series(array([10], dtype="Int64")),
+        ],
+    )
+    def test_as_json_table_type_ext_integer_array_dtype(self, integer_data):
+        assert as_json_table_type(integer_data.dtype) == "integer"
+
+    def test_as_json_table_type_ext_integer_dtype(self):
+        assert as_json_table_type(Int64Dtype()) == "integer"
+
+
+class TestTableOrient:
+    @pytest.fixture
+    def da(self):
+        return DateArray([dt.date(2021, 10, 10)])
+
+    @pytest.fixture
+    def dc(self):
+        return DecimalArray([decimal.Decimal(10)])
+
+    @pytest.fixture
+    def sa(self):
+        return array(["pandas"], dtype="string")
+
+    @pytest.fixture
+    def ia(self):
+        return array([10], dtype="Int64")
+
+    @pytest.fixture
+    def df(self, da, dc, sa, ia):
+        return DataFrame(
+            {
+                "A": da,
+                "B": dc,
+                "C": sa,
+                "D": ia,
+            }
+        )
+
+    def test_build_date_series(self, da):
+        s = Series(da, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "any", "extDtype": "DateDtype"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000")])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_decimal_series(self, dc):
+        s = Series(dc, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "number", "extDtype": "decimal"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_string_series(self, sa):
+        s = Series(sa, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "string", "extDtype": "string"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "pandas")])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_int64_series(self, ia):
+        s = Series(ia, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "integer", "extDtype": "Int64"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10)])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_to_json(self, df):
+        df = df.copy()
+        df.index.name = "idx"
+        result = df.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            OrderedDict({"name": "idx", "type": "integer"}),
+            OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
+            OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
+            OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
+            OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
+        ]
+
+        schema = OrderedDict({"fields": fields, "primaryKey": ["idx"]})
+        data = [
+            OrderedDict(
+                [
+                    ("idx", 0),
+                    ("A", "2021-10-10T00:00:00.000"),
+                    ("B", 10.0),
+                    ("C", "pandas"),
+                    ("D", 10),
+                ]
+            )
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_json_ext_dtype_reading_roundtrip(self):
+        # GH#40255
+        df = DataFrame(
+            {
+                "a": Series([2, NA], dtype="Int64"),
+                "b": Series([1.5, NA], dtype="Float64"),
+                "c": Series([True, NA], dtype="boolean"),
+            },
+            index=Index([1, NA], dtype="Int64"),
+        )
+        expected = df.copy()
+        data_json = df.to_json(orient="table", indent=4)
+        result = read_json(StringIO(data_json), orient="table")
+        tm.assert_frame_equal(result, expected)
+
+    def test_json_ext_dtype_reading(self):
+        # GH#40255
+        data_json = """{
+            "schema":{
+                "fields":[
+                    {
+                        "name":"a",
+                        "type":"integer",
+                        "extDtype":"Int64"
+                    }
+                ],
+            },
+            "data":[
+                {
+                    "a":2
+                },
+                {
+                    "a":null
+                }
+            ]
+        }"""
+        result = read_json(StringIO(data_json), orient="table")
+        expected = DataFrame({"a": Series([2, NA], dtype="Int64")})
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py
@ -0,0 +1,907 @@
+import json
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    json_normalize,
+)
+import pandas._testing as tm
+
+from pandas.io.json._normalize import nested_to_record
+
+
+@pytest.fixture
+def deep_nested():
+    # deeply nested data
+    return [
+        {
+            "country": "USA",
+            "states": [
+                {
+                    "name": "California",
+                    "cities": [
+                        {"name": "San Francisco", "pop": 12345},
+                        {"name": "Los Angeles", "pop": 12346},
+                    ],
+                },
+                {
+                    "name": "Ohio",
+                    "cities": [
+                        {"name": "Columbus", "pop": 1234},
+                        {"name": "Cleveland", "pop": 1236},
+                    ],
+                },
+            ],
+        },
+        {
+            "country": "Germany",
+            "states": [
+                {"name": "Bayern", "cities": [{"name": "Munich", "pop": 12347}]},
+                {
+                    "name": "Nordrhein-Westfalen",
+                    "cities": [
+                        {"name": "Duesseldorf", "pop": 1238},
+                        {"name": "Koeln", "pop": 1239},
+                    ],
+                },
+            ],
+        },
+    ]
+
+
+@pytest.fixture
+def state_data():
+    return [
+        {
+            "counties": [
+                {"name": "Dade", "population": 12345},
+                {"name": "Broward", "population": 40000},
+                {"name": "Palm Beach", "population": 60000},
+            ],
+            "info": {"governor": "Rick Scott"},
+            "shortname": "FL",
+            "state": "Florida",
+        },
+        {
+            "counties": [
+                {"name": "Summit", "population": 1234},
+                {"name": "Cuyahoga", "population": 1337},
+            ],
+            "info": {"governor": "John Kasich"},
+            "shortname": "OH",
+            "state": "Ohio",
+        },
+    ]
+
+
+@pytest.fixture
+def author_missing_data():
+    return [
+        {"info": None},
+        {
+            "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+            "author_name": {"first": "Jane", "last_name": "Doe"},
+        },
+    ]
+
+
+@pytest.fixture
+def missing_metadata():
+    return [
+        {
+            "name": "Alice",
+            "addresses": [
+                {
+                    "number": 9562,
+                    "street": "Morris St.",
+                    "city": "Massillon",
+                    "state": "OH",
+                    "zip": 44646,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Foo York City"}]},
+        },
+        {
+            "addresses": [
+                {
+                    "number": 8449,
+                    "street": "Spring St.",
+                    "city": "Elizabethton",
+                    "state": "TN",
+                    "zip": 37643,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Barmingham"}]},
+        },
+    ]
+
+
+@pytest.fixture
+def max_level_test_input_data():
+    """
+    input data to test json_normalize with max_level param
+    """
+    return [
+        {
+            "CreatedBy": {"Name": "User001"},
+            "Lookup": {
+                "TextField": "Some text",
+                "UserField": {"Id": "ID001", "Name": "Name001"},
+            },
+            "Image": {"a": "b"},
+        }
+    ]
+
+
+class TestJSONNormalize:
+    def test_simple_records(self):
+        recs = [
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 4, "b": 5, "c": 6},
+            {"a": 7, "b": 8, "c": 9},
+            {"a": 10, "b": 11, "c": 12},
+        ]
+
+        result = json_normalize(recs)
+        expected = DataFrame(recs)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, "counties")
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, "counties", meta="state")
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_fields_list_type_normalize(self):
+        parse_metadata_fields_list_type = [
+            {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}
+        ]
+        result = json_normalize(
+            parse_metadata_fields_list_type,
+            record_path=["values"],
+            meta=[["metadata", "listdata"]],
+        )
+        expected = DataFrame(
+            {0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_empty_array(self):
+        result = json_normalize([])
+        expected = DataFrame()
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "data, record_path, exception_type",
+        [
+            ([{"a": 0}, {"a": 1}], None, None),
+            ({"a": [{"a": 0}, {"a": 1}]}, "a", None),
+            ('{"a": [{"a": 0}, {"a": 1}]}', None, NotImplementedError),
+            (None, None, NotImplementedError),
+        ],
+    )
+    def test_accepted_input(self, data, record_path, exception_type):
+        if exception_type is not None:
+            with pytest.raises(exception_type, match=""):
+                json_normalize(data, record_path=record_path)
+        else:
+            result = json_normalize(data, record_path=record_path)
+            expected = DataFrame([0, 1], columns=["a"])
+            tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize_with_separator(self, deep_nested):
+        # GH 14883
+        result = json_normalize({"A": {"A": 1, "B": 2}})
+        expected = DataFrame([[1, 2]], columns=["A.A", "A.B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="_")
+        expected = DataFrame([[1, 2]], columns=["A_A", "A_B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="\u03c3")
+        expected = DataFrame([[1, 2]], columns=["A\u03c3A", "A\u03c3B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize(
+            deep_nested,
+            ["states", "cities"],
+            meta=["country", ["states", "name"]],
+            sep="_",
+        )
+        expected = Index(["name", "pop", "country", "states_name"]).sort_values()
+        assert result.columns.sort_values().equals(expected)
+
+    def test_normalize_with_multichar_separator(self):
+        # GH #43831
+        data = {"a": [1, 2], "b": {"b_1": 2, "b_2": (3, 4)}}
+        result = json_normalize(data, sep="__")
+        expected = DataFrame([[[1, 2], 2, (3, 4)]], columns=["a", "b__b_1", "b__b_2"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({"A": [1, 2]}, "A", record_prefix="Prefix.")
+        expected = DataFrame([[1], [2]], columns=["Prefix.0"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_nested_object_record_path(self):
+        # GH 22706
+        data = {
+            "state": "Florida",
+            "info": {
+                "governor": "Rick Scott",
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+        }
+        result = json_normalize(data, record_path=["info", "counties"])
+        expected = DataFrame(
+            [["Dade", 12345], ["Broward", 40000], ["Palm Beach", 60000]],
+            columns=["name", "population"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_more_deeply_nested(self, deep_nested):
+        result = json_normalize(
+            deep_nested, ["states", "cities"], meta=["country", ["states", "name"]]
+        )
+        ex_data = {
+            "country": ["USA"] * 4 + ["Germany"] * 3,
+            "states.name": [
+                "California",
+                "California",
+                "Ohio",
+                "Ohio",
+                "Bayern",
+                "Nordrhein-Westfalen",
+                "Nordrhein-Westfalen",
+            ],
+            "name": [
+                "San Francisco",
+                "Los Angeles",
+                "Columbus",
+                "Cleveland",
+                "Munich",
+                "Duesseldorf",
+                "Koeln",
+            ],
+            "pop": [12345, 12346, 1234, 1236, 12347, 1238, 1239],
+        }
+
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_shallow_nested(self):
+        data = [
+            {
+                "state": "Florida",
+                "shortname": "FL",
+                "info": {"governor": "Rick Scott"},
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+            {
+                "state": "Ohio",
+                "shortname": "OH",
+                "info": {"governor": "John Kasich"},
+                "counties": [
+                    {"name": "Summit", "population": 1234},
+                    {"name": "Cuyahoga", "population": 1337},
+                ],
+            },
+        ]
+
+        result = json_normalize(
+            data, "counties", ["state", "shortname", ["info", "governor"]]
+        )
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL", "FL", "FL", "OH", "OH"],
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+            "population": [12345, 40000, 60000, 1234, 1337],
+        }
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_nested_meta_path_with_nested_record_path(self, state_data):
+        # GH 27220
+        result = json_normalize(
+            data=state_data,
+            record_path=["counties"],
+            meta=["state", "shortname", ["info", "governor"]],
+            errors="ignore",
+        )
+
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "population": [12345, 40000, 60000, 1234, 1337],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL"] * 3 + ["OH"] * 2,
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+        }
+
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_meta_name_conflict(self):
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        msg = r"Conflicting metadata name (foo|bar), need distinguishing prefix"
+        with pytest.raises(ValueError, match=msg):
+            json_normalize(data, "data", meta=["foo", "bar"])
+
+        result = json_normalize(data, "data", meta=["foo", "bar"], meta_prefix="meta")
+
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+
+    def test_meta_parameter_not_modified(self):
+        # GH 18610
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        COLUMNS = ["foo", "bar"]
+        result = json_normalize(data, "data", meta=COLUMNS, meta_prefix="meta")
+
+        assert COLUMNS == ["foo", "bar"]
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+
+    def test_record_prefix(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(
+            state_data, "counties", meta="state", record_prefix="county_"
+        )
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+        expected = expected.rename(columns=lambda x: "county_" + x)
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_non_ascii_key(self):
+        testjson = (
+            b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
+            b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
+        ).decode("utf8")
+
+        testdata = {
+            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
+            "sub.A": [1, 3],
+            "sub.B": [2, 4],
+        }
+        expected = DataFrame(testdata)
+
+        result = json_normalize(json.loads(testjson))
+        tm.assert_frame_equal(result, expected)
+
+    def test_missing_field(self, author_missing_data):
+        # GH20030:
+        result = json_normalize(author_missing_data)
+        ex_data = [
+            {
+                "info": np.nan,
+                "info.created_at": np.nan,
+                "info.last_updated": np.nan,
+                "author_name.first": np.nan,
+                "author_name.last_name": np.nan,
+            },
+            {
+                "info": None,
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+            },
+        ]
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "max_level,expected",
+        [
+            (
+                0,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+        ],
+    )
+    def test_max_level_with_records_path(self, max_level, expected):
+        # GH23843: Enhanced JSON normalize
+        test_input = [
+            {
+                "CreatedBy": {"Name": "User001"},
+                "Lookup": [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                ],
+                "Image": {"a": "b"},
+                "tags": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        result = json_normalize(
+            test_input,
+            record_path=["Lookup"],
+            meta=[["CreatedBy"], ["Image"]],
+            max_level=max_level,
+        )
+        expected_df = DataFrame(data=expected, columns=result.columns.values)
+        tm.assert_equal(expected_df, result)
+
+    def test_nested_flattening_consistent(self):
+        # see gh-21537
+        df1 = json_normalize([{"A": {"B": 1}}])
+        df2 = json_normalize({"dummy": [{"A": {"B": 1}}]}, "dummy")
+
+        # They should be the same.
+        tm.assert_frame_equal(df1, df2)
+
+    def test_nonetype_record_path(self, nulls_fixture):
+        # see gh-30148
+        # should not raise TypeError
+        result = json_normalize(
+            [
+                {"state": "Texas", "info": nulls_fixture},
+                {"state": "Florida", "info": [{"i": 2}]},
+            ],
+            record_path=["info"],
+        )
+        expected = DataFrame({"i": 2}, index=[0])
+        tm.assert_equal(result, expected)
+
+    @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"'])
+    def test_non_list_record_path_errors(self, value):
+        # see gh-30148, GH 26284
+        parsed_value = json.loads(value)
+        test_input = {"state": "Texas", "info": parsed_value}
+        test_path = "info"
+        msg = (
+            f"{test_input} has non list value {parsed_value} for path {test_path}. "
+            "Must be list or null."
+        )
+        with pytest.raises(TypeError, match=msg):
+            json_normalize([test_input], record_path=[test_path])
+
+    def test_meta_non_iterable(self):
+        # GH 31507
+        data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
+
+        result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
+        expected = DataFrame(
+            {"one": [1], "two": [2], "id": np.array([99], dtype=object)}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_generator(self, state_data):
+        # GH35923 Fix pd.json_normalize to not skip the first element of a
+        # generator input
+        def generator_data():
+            yield from state_data[0]["counties"]
+
+        result = json_normalize(generator_data())
+        expected = DataFrame(state_data[0]["counties"])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_top_column_with_leading_underscore(self):
+        # 49861
+        data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
+        result = json_normalize(data, sep="_")
+        expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestNestedToRecord:
+    def test_flat_stays_flat(self):
+        recs = [{"flat1": 1, "flat2": 2}, {"flat3": 3, "flat2": 4}]
+        result = nested_to_record(recs)
+        expected = recs
+        assert result == expected
+
+    def test_one_level_deep_flattens(self):
+        data = {"flat1": 1, "dict1": {"c": 1, "d": 2}}
+
+        result = nested_to_record(data)
+        expected = {"dict1.c": 1, "dict1.d": 2, "flat1": 1}
+
+        assert result == expected
+
+    def test_nested_flattens(self):
+        data = {
+            "flat1": 1,
+            "dict1": {"c": 1, "d": 2},
+            "nested": {"e": {"c": 1, "d": 2}, "d": 2},
+        }
+
+        result = nested_to_record(data)
+        expected = {
+            "dict1.c": 1,
+            "dict1.d": 2,
+            "flat1": 1,
+            "nested.d": 2,
+            "nested.e.c": 1,
+            "nested.e.d": 2,
+        }
+
+        assert result == expected
+
+    def test_json_normalize_errors(self, missing_metadata):
+        # GH14583:
+        # If meta keys are not always present a new option to set
+        # errors='ignore' has been implemented
+
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path="addresses",
+                meta="name",
+                errors="raise",
+            )
+
+    def test_missing_meta(self, missing_metadata):
+        # GH25468
+        # If metadata is nullable with errors set to ignore, the null values
+        # should be numpy.nan values
+        result = json_normalize(
+            data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
+        )
+        ex_data = [
+            [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
+            [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
+        ]
+        columns = ["number", "street", "city", "state", "zip", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_missing_nested_meta(self):
+        # GH44312
+        # If errors="ignore" and nested metadata is null, we should return nan
+        data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
+        result = json_normalize(
+            data,
+            record_path="value",
+            meta=["meta", ["nested_meta", "leaf"]],
+            errors="ignore",
+        )
+        ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
+        columns = ["rec", "meta", "nested_meta.leaf"]
+        expected = DataFrame(ex_data, columns=columns).astype(
+            {"nested_meta.leaf": object}
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # If errors="raise" and nested metadata is null, we should raise with the
+        # key of the first missing level
+        with pytest.raises(KeyError, match="'leaf' not found"):
+            json_normalize(
+                data,
+                record_path="value",
+                meta=["meta", ["nested_meta", "leaf"]],
+                errors="raise",
+            )
+
+    def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
+        # GH41876
+        # Ensure errors='raise' works as intended even when a record_path of length
+        # greater than one is passed in
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path=["previous_residences", "cities"],
+                meta="name",
+                errors="raise",
+            )
+
+    def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata):
+        # GH41876
+        # Ensure errors='ignore' works as intended even when a record_path of length
+        # greater than one is passed in
+        result = json_normalize(
+            data=missing_metadata,
+            record_path=["previous_residences", "cities"],
+            meta="name",
+            errors="ignore",
+        )
+        ex_data = [
+            ["Foo York City", "Alice"],
+            ["Barmingham", np.nan],
+        ]
+        columns = ["city_name", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_donot_drop_nonevalues(self):
+        # GH21356
+        data = [
+            {"info": None, "author_name": {"first": "Smith", "last_name": "Appleseed"}},
+            {
+                "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+                "author_name": {"first": "Jane", "last_name": "Doe"},
+            },
+        ]
+        result = nested_to_record(data)
+        expected = [
+            {
+                "info": None,
+                "author_name.first": "Smith",
+                "author_name.last_name": "Appleseed",
+            },
+            {
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+            },
+        ]
+
+        assert result == expected
+
+    def test_nonetype_top_level_bottom_level(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "country": {
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "id": None,
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    }
+                }
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+
+    def test_nonetype_multiple_levels(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "id": None,
+                "country": {
+                    "id": None,
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    },
+                },
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.id": None,
+            "location.country.id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "max_level, expected",
+        [
+            (
+                None,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField.Id": "ID001",
+                        "Lookup.UserField.Name": "Name001",
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+            (
+                0,
+                [
+                    {
+                        "CreatedBy": {"Name": "User001"},
+                        "Lookup": {
+                            "TextField": "Some text",
+                            "UserField": {"Id": "ID001", "Name": "Name001"},
+                        },
+                        "Image": {"a": "b"},
+                    }
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField": {"Id": "ID001", "Name": "Name001"},
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+        ],
+    )
+    def test_with_max_level(self, max_level, expected, max_level_test_input_data):
+        # GH23843: Enhanced JSON normalize
+        output = nested_to_record(max_level_test_input_data, max_level=max_level)
+        assert output == expected
+
+    def test_with_large_max_level(self):
+        # GH23843: Enhanced JSON normalize
+        max_level = 100
+        input_data = [
+            {
+                "CreatedBy": {
+                    "user": {
+                        "name": {"firstname": "Leo", "LastName": "Thomson"},
+                        "family_tree": {
+                            "father": {
+                                "name": "Father001",
+                                "father": {
+                                    "Name": "Father002",
+                                    "father": {
+                                        "name": "Father003",
+                                        "father": {"Name": "Father004"},
+                                    },
+                                },
+                            }
+                        },
+                    }
+                }
+            }
+        ]
+        expected = [
+            {
+                "CreatedBy.user.name.firstname": "Leo",
+                "CreatedBy.user.name.LastName": "Thomson",
+                "CreatedBy.user.family_tree.father.name": "Father001",
+                "CreatedBy.user.family_tree.father.father.Name": "Father002",
+                "CreatedBy.user.family_tree.father.father.father.name": "Father003",
+                "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004",  # noqa: E501
+            }
+        ]
+        output = nested_to_record(input_data, max_level=max_level)
+        assert output == expected
+
+    def test_series_non_zero_index(self):
+        # GH 19020
+        data = {
+            0: {"id": 1, "name": "Foo", "elements": {"a": 1}},
+            1: {"id": 2, "name": "Bar", "elements": {"b": 2}},
+            2: {"id": 3, "name": "Baz", "elements": {"c": 3}},
+        }
+        s = Series(data)
+        s.index = [1, 2, 3]
+        result = json_normalize(s)
+        expected = DataFrame(
+            {
+                "id": [1, 2, 3],
+                "name": ["Foo", "Bar", "Baz"],
+                "elements.a": [1.0, np.nan, np.nan],
+                "elements.b": [np.nan, 2.0, np.nan],
+                "elements.c": [np.nan, np.nan, 3.0],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_readlines.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_readlines.py
@ -0,0 +1,543 @@
+from collections.abc import Iterator
+from io import StringIO
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    read_json,
+)
+import pandas._testing as tm
+
+from pandas.io.json._json import JsonReader
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.fixture
+def lines_json_df():
+    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    return df.to_json(lines=True, orient="records")
+
+
+@pytest.fixture(params=["ujson", "pyarrow"])
+def engine(request):
+    if request.param == "pyarrow":
+        pytest.importorskip("pyarrow.json")
+    return request.param
+
+
+def test_read_jsonl():
+    # GH9180
+    result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
+    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_engine_pyarrow(datapath, engine):
+    result = read_json(
+        datapath("io", "json", "data", "line_delimited.json"),
+        lines=True,
+        engine=engine,
+    )
+    expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_datetime(request, engine):
+    # GH33787
+    if engine == "pyarrow":
+        # GH 48893
+        reason = "Pyarrow only supports a file path as an input and line delimited json"
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    df = DataFrame(
+        [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
+        columns=["accounts", "date", "name"],
+    )
+    json_line = df.to_json(lines=True, orient="records")
+
+    if engine == "pyarrow":
+        result = read_json(StringIO(json_line), engine=engine)
+    else:
+        result = read_json(StringIO(json_line), engine=engine)
+    expected = DataFrame(
+        [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]],
+        columns=["accounts", "date", "name"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_unicode_chars():
+    # GH15132: non-ascii unicode characters
+    # \u201d == RIGHT DOUBLE QUOTATION MARK
+
+    # simulate file handle
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    json = StringIO(json)
+    result = read_json(json, lines=True)
+    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # simulate string
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    result = read_json(StringIO(json), lines=True)
+    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_to_jsonl():
+    # GH9180
+    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
+    assert result == expected
+
+    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
+    assert result == expected
+    tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+
+    # GH15096: escaped characters in columns and data
+    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
+    assert result == expected
+    tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+
+
+def test_to_jsonl_count_new_lines():
+    # GH36888
+    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")
+    expected_new_lines_count = 2
+    assert actual_new_lines_count == expected_new_lines_count
+
+
+@pytest.mark.parametrize("chunksize", [1, 1.0])
+def test_readjson_chunks(request, lines_json_df, chunksize, engine):
+    # Basic test that read_json(chunks=True) gives the same result as
+    # read_json(chunks=False)
+    # GH17048: memory usage when lines=True
+
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    unchunked = read_json(StringIO(lines_json_df), lines=True)
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+    ) as reader:
+        chunked = pd.concat(reader)
+
+    tm.assert_frame_equal(chunked, unchunked)
+
+
+def test_readjson_chunksize_requires_lines(lines_json_df, engine):
+    msg = "chunksize can only be passed if lines=True"
+    with pytest.raises(ValueError, match=msg):
+        with read_json(
+            StringIO(lines_json_df), lines=False, chunksize=2, engine=engine
+        ) as _:
+            pass
+
+
+def test_readjson_chunks_series(request, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    # Test reading line-format JSON to Series with chunksize param
+    s = pd.Series({"A": 1, "B": 2})
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    unchunked = read_json(strio, lines=True, typ="Series", engine=engine)
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    with read_json(
+        strio, lines=True, typ="Series", chunksize=1, engine=engine
+    ) as reader:
+        chunked = pd.concat(reader)
+
+    tm.assert_series_equal(chunked, unchunked)
+
+
+def test_readjson_each_chunk(request, lines_json_df, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    # Other tests check that the final result of read_json(chunksize=True)
+    # is correct. This checks the intermediate chunks.
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=2, engine=engine
+    ) as reader:
+        chunks = list(reader)
+    assert chunks[0].shape == (2, 2)
+    assert chunks[1].shape == (1, 2)
+
+
+def test_readjson_chunks_from_file(request, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    with tm.ensure_clean("test.json") as path:
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
+            chunked = pd.concat(reader)
+        unchunked = read_json(path, lines=True, engine=engine)
+        tm.assert_frame_equal(unchunked, chunked)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_readjson_chunks_closes(chunksize):
+    with tm.ensure_clean("test.json") as path:
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        reader = JsonReader(
+            path,
+            orient=None,
+            typ="frame",
+            dtype=True,
+            convert_axes=True,
+            convert_dates=True,
+            keep_default_dates=True,
+            precise_float=False,
+            date_unit=None,
+            encoding=None,
+            lines=True,
+            chunksize=chunksize,
+            compression=None,
+            nrows=None,
+        )
+        with reader:
+            reader.read()
+        assert (
+            reader.handles.handle.closed
+        ), f"didn't close stream with chunksize = {chunksize}"
+
+
+@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
+def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
+    msg = r"'chunksize' must be an integer >=1"
+
+    with pytest.raises(ValueError, match=msg):
+        with read_json(
+            StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+        ) as _:
+            pass
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 2])
+def test_readjson_chunks_multiple_empty_lines(chunksize):
+    j = """
+
+    {"A":1,"B":4}
+
+
+
+    {"A":2,"B":5}
+
+
+
+
+
+
+
+    {"A":3,"B":6}
+    """
+    orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    test = read_json(StringIO(j), lines=True, chunksize=chunksize)
+    if chunksize is not None:
+        with test:
+            test = pd.concat(test)
+    tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
+
+
+def test_readjson_unicode(request, monkeypatch, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    with tm.ensure_clean("test.json") as path:
+        monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
+        with open(path, "w", encoding="utf-8") as f:
+            f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')
+
+        result = read_json(path, engine=engine)
+        expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows", [1, 2])
+def test_readjson_nrows(nrows, engine):
+    # GH 33916
+    # Test reading line-format JSON to Series with nrows param
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+    result = read_json(StringIO(jsonl), lines=True, nrows=nrows)
+    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
+def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
+    # GH 33916
+    # Test reading line-format JSON to Series with nrows and chunksize param
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+
+    if engine != "pyarrow":
+        with read_json(
+            StringIO(jsonl), lines=True, nrows=nrows, chunksize=chunksize, engine=engine
+        ) as reader:
+            chunked = pd.concat(reader)
+    else:
+        with read_json(
+            jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine
+        ) as reader:
+            chunked = pd.concat(reader)
+    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
+    tm.assert_frame_equal(chunked, expected)
+
+
+def test_readjson_nrows_requires_lines(engine):
+    # GH 33916
+    # Test ValueError raised if nrows is set without setting lines in read_json
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+    msg = "nrows can only be passed if lines=True"
+    with pytest.raises(ValueError, match=msg):
+        read_json(jsonl, lines=False, nrows=2, engine=engine)
+
+
+def test_readjson_lines_chunks_fileurl(request, datapath, engine):
+    # GH 27135
+    # Test reading line-format JSON from file url
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    df_list_expected = [
+        DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
+        DataFrame([[3, 4]], columns=["a", "b"], index=[1]),
+        DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
+    ]
+    os_path = datapath("io", "json", "data", "line_delimited.json")
+    file_url = Path(os_path).as_uri()
+    with read_json(file_url, lines=True, chunksize=1, engine=engine) as url_reader:
+        for index, chuck in enumerate(url_reader):
+            tm.assert_frame_equal(chuck, df_list_expected[index])
+
+
+def test_chunksize_is_incremental():
+    # See https://github.com/pandas-dev/pandas/issues/34548
+    jsonl = (
+        """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}\n"""
+        * 1000
+    )
+
+    class MyReader:
+        def __init__(self, contents) -> None:
+            self.read_count = 0
+            self.stringio = StringIO(contents)
+
+        def read(self, *args):
+            self.read_count += 1
+            return self.stringio.read(*args)
+
+        def __iter__(self) -> Iterator:
+            self.read_count += 1
+            return iter(self.stringio)
+
+    reader = MyReader(jsonl)
+    assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
+    assert reader.read_count > 10
+
+
+@pytest.mark.parametrize("orient_", ["split", "index", "table"])
+def test_to_json_append_orient(orient_):
+    # GH 35849
+    # Test ValueError when orient is not 'records'
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        r"mode='a' \(append\) is only supported when "
+        "lines is True and orient is 'records'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", orient=orient_)
+
+
+def test_to_json_append_lines():
+    # GH 35849
+    # Test ValueError when lines is not True
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        r"mode='a' \(append\) is only supported when "
+        "lines is True and orient is 'records'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", lines=False, orient="records")
+
+
+@pytest.mark.parametrize("mode_", ["r", "x"])
+def test_to_json_append_mode(mode_):
+    # GH 35849
+    # Test ValueError when mode is not supported option
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        f"mode={mode_} is not a valid option."
+        "Only 'w' and 'a' are currently supported."
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode=mode_, lines=False, orient="records")
+
+
+def test_to_json_append_output_consistent_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing same columns, new rows
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+
+    expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_inconsistent_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing one new column, one old column, new rows
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+
+    expected = DataFrame(
+        {
+            "col1": [1, 2, None, None],
+            "col2": ["a", "b", "e", "f"],
+            "col3": [np.nan, np.nan, "!", "#"],
+        }
+    )
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_different_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing same, differing and new columns
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+    df4 = DataFrame({"col4": [True, False]})
+
+    expected = DataFrame(
+        {
+            "col1": [1, 2, 3, 4, None, None, None, None],
+            "col2": ["a", "b", "c", "d", "e", "f", np.nan, np.nan],
+            "col3": [np.nan, np.nan, np.nan, np.nan, "!", "#", np.nan, np.nan],
+            "col4": [None, None, None, None, None, None, True, False],
+        }
+    ).astype({"col4": "float"})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df4.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_different_columns_reordered():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing specific result column order.
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+    df4 = DataFrame({"col4": [True, False]})
+
+    # df4, df3, df2, df1 (in that order)
+    expected = DataFrame(
+        {
+            "col4": [True, False, None, None, None, None, None, None],
+            "col2": [np.nan, np.nan, "e", "f", "c", "d", "a", "b"],
+            "col3": [np.nan, np.nan, "!", "#", np.nan, np.nan, np.nan, np.nan],
+            "col1": [None, None, None, None, 3, 4, 1, 2],
+        }
+    ).astype({"col4": "float"})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df4.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df1.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_chunksize.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_chunksize.py
@ -0,0 +1,382 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas._libs import parsers as libparsers
+from pandas.errors import DtypeWarning
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.mark.parametrize("index_col", [0, "index"])
+def test_read_chunksize_with_index(all_parsers, index_col):
+    parser = all_parsers
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+
+    expected = DataFrame(
+        [
+            ["foo", 2, 3, 4, 5],
+            ["bar", 7, 8, 9, 10],
+            ["baz", 12, 13, 14, 15],
+            ["qux", 12, 13, 14, 15],
+            ["foo2", 12, 13, 14, 15],
+            ["bar2", 12, 13, 14, 15],
+        ],
+        columns=["index", "A", "B", "C", "D"],
+    )
+    expected = expected.set_index("index")
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader:
+                list(reader)
+        return
+
+    with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader:
+        chunks = list(reader)
+    tm.assert_frame_equal(chunks[0], expected[:2])
+    tm.assert_frame_equal(chunks[1], expected[2:4])
+    tm.assert_frame_equal(chunks[2], expected[4:])
+
+
+@pytest.mark.parametrize("chunksize", [1.3, "foo", 0])
+def test_read_chunksize_bad(all_parsers, chunksize):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    msg = r"'chunksize' must be an integer >=1"
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+
+    with pytest.raises(ValueError, match=msg):
+        with parser.read_csv(StringIO(data), chunksize=chunksize) as _:
+            pass
+
+
+@pytest.mark.parametrize("chunksize", [2, 8])
+def test_read_chunksize_and_nrows(all_parsers, chunksize):
+    # see gh-15755
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0, "nrows": 5}
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+        return
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), chunksize=chunksize, **kwargs) as reader:
+        tm.assert_frame_equal(concat(reader), expected)
+
+
+def test_read_chunksize_and_nrows_changing_size(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0, "nrows": 5}
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+        return
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+    with parser.read_csv(StringIO(data), chunksize=8, **kwargs) as reader:
+        tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2])
+        tm.assert_frame_equal(reader.get_chunk(size=4), expected.iloc[2:5])
+
+        with pytest.raises(StopIteration, match=""):
+            reader.get_chunk(size=3)
+
+
+def test_get_chunk_passed_chunksize(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+1,2,3"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with parser.read_csv(StringIO(data), chunksize=2) as reader:
+                reader.get_chunk()
+        return
+
+    with parser.read_csv(StringIO(data), chunksize=2) as reader:
+        result = reader.get_chunk()
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}])
+def test_read_chunksize_compat(all_parsers, kwargs):
+    # see gh-12185
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader:
+                concat(reader)
+        return
+
+    with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader:
+        via_reader = concat(reader)
+    tm.assert_frame_equal(via_reader, result)
+
+
+def test_read_chunksize_jagged_names(all_parsers):
+    # see gh-23509
+    parser = all_parsers
+    data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)])
+
+    expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10])
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with parser.read_csv(
+                StringIO(data), names=range(10), chunksize=4
+            ) as reader:
+                concat(reader)
+        return
+
+    with parser.read_csv(StringIO(data), names=range(10), chunksize=4) as reader:
+        result = concat(reader)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_chunk_begins_with_newline_whitespace(all_parsers):
+    # see gh-10022
+    parser = all_parsers
+    data = "\n hello\nworld\n"
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame([" hello", "world"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch):
+    # mainly an issue with the C parser
+    heuristic = 2**3
+    parser = all_parsers
+    integers = [str(i) for i in range(heuristic - 1)]
+    data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers)
+
+    # Coercions should work without warnings.
+    with monkeypatch.context() as m:
+        m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
+        result = parser.read_csv(StringIO(data))
+
+    assert type(result.a[0]) is np.float64
+    assert result.a.dtype == float
+
+
+def test_warn_if_chunks_have_mismatched_type(all_parsers, using_infer_string):
+    warning_type = None
+    parser = all_parsers
+    size = 10000
+
+    # see gh-3866: if chunks are different types and can't
+    # be coerced using numerical types, then issue warning.
+    if parser.engine == "c" and parser.low_memory:
+        warning_type = DtypeWarning
+        # Use larger size to hit warning path
+        size = 499999
+
+    integers = [str(i) for i in range(size)]
+    data = "a\n" + "\n".join(integers + ["a", "b"] + integers)
+
+    buf = StringIO(data)
+
+    if parser.engine == "pyarrow":
+        df = parser.read_csv(
+            buf,
+        )
+    else:
+        df = parser.read_csv_check_warnings(
+            warning_type,
+            r"Columns \(0\) have mixed types. "
+            "Specify dtype option on import or set low_memory=False.",
+            buf,
+        )
+    if parser.engine == "c" and parser.low_memory:
+        assert df.a.dtype == object
+    elif using_infer_string:
+        assert df.a.dtype == "str"
+    else:
+        assert df.a.dtype == object
+
+
+@pytest.mark.parametrize("iterator", [True, False])
+def test_empty_with_nrows_chunksize(all_parsers, iterator):
+    # see gh-9535
+    parser = all_parsers
+    expected = DataFrame(columns=["foo", "bar"])
+
+    nrows = 10
+    data = StringIO("foo,bar\n")
+
+    if parser.engine == "pyarrow":
+        msg = (
+            "The '(nrows|chunksize)' option is not supported with the 'pyarrow' engine"
+        )
+        with pytest.raises(ValueError, match=msg):
+            if iterator:
+                with parser.read_csv(data, chunksize=nrows) as reader:
+                    next(iter(reader))
+            else:
+                parser.read_csv(data, nrows=nrows)
+        return
+
+    if iterator:
+        with parser.read_csv(data, chunksize=nrows) as reader:
+            result = next(iter(reader))
+    else:
+        result = parser.read_csv(data, nrows=nrows)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_memory_growth_chunksize(all_parsers):
+    # see gh-24805
+    #
+    # Let's just make sure that we don't crash
+    # as we iteratively process all chunks.
+    parser = all_parsers
+
+    with tm.ensure_clean() as path:
+        with open(path, "w", encoding="utf-8") as f:
+            for i in range(1000):
+                f.write(str(i) + "\n")
+
+        if parser.engine == "pyarrow":
+            msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                with parser.read_csv(path, chunksize=20) as result:
+                    for _ in result:
+                        pass
+            return
+
+        with parser.read_csv(path, chunksize=20) as result:
+            for _ in result:
+                pass
+
+
+def test_chunksize_with_usecols_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                names=["a", "b"],
+                chunksize=2,
+                usecols=[0, 1],
+                header=None,
+            )
+        return
+
+    result_chunks = parser.read_csv(
+        StringIO(data),
+        names=["a", "b"],
+        chunksize=2,
+        usecols=[0, 1],
+        header=None,
+    )
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6]}),
+        DataFrame({"a": [9], "b": [10]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
+
+
+def test_chunksize_second_block_shorter(all_parsers):
+    # GH#21211
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+5,6,7,8
+9,10,11
+"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), chunksize=2)
+        return
+
+    result_chunks = parser.read_csv(StringIO(data), chunksize=2)
+
+    expected_frames = [
+        DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+        DataFrame({"a": [9], "b": [10], "c": [11], "d": [np.nan]}, index=[2]),
+    ]
+
+    for i, result in enumerate(result_chunks):
+        tm.assert_frame_equal(result, expected_frames[i])
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_common_basic.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_common_basic.py
@ -0,0 +1,983 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from datetime import datetime
+from inspect import signature
+from io import StringIO
+import os
+from pathlib import Path
+import sys
+
+import numpy as np
+import pytest
+
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+    ParserWarning,
+)
+
+from pandas import (
+    DataFrame,
+    Index,
+    Timestamp,
+    compat,
+)
+import pandas._testing as tm
+
+from pandas.io.parsers import TextFileReader
+from pandas.io.parsers.c_parser_wrapper import CParserWrapper
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_override_set_noconvert_columns():
+    # see gh-17351
+    #
+    # Usecols needs to be sorted in _set_noconvert_columns based
+    # on the test_usecols_with_parse_dates test from test_usecols.py
+    class MyTextFileReader(TextFileReader):
+        def __init__(self) -> None:
+            self._currow = 0
+            self.squeeze = False
+
+    class MyCParserWrapper(CParserWrapper):
+        def _set_noconvert_columns(self):
+            if self.usecols_dtype == "integer":
+                # self.usecols is a set, which is documented as unordered
+                # but in practice, a CPython set of integers is sorted.
+                # In other implementations this assumption does not hold.
+                # The following code simulates a different order, which
+                # before GH 17351 would cause the wrong columns to be
+                # converted via the parse_dates parameter
+                self.usecols = list(self.usecols)
+                self.usecols.reverse()
+            return CParserWrapper._set_noconvert_columns(self)
+
+    data = """a,b,c,d,e
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
+
+    parse_dates = [[1, 2]]
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    parser = MyTextFileReader()
+    parser.options = {
+        "usecols": [0, 2, 3],
+        "parse_dates": parse_dates,
+        "delimiter": ",",
+    }
+    parser.engine = "c"
+    parser._engine = MyCParserWrapper(StringIO(data), **parser.options)
+
+    result = parser.read()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_local(all_parsers, csv1):
+    prefix = "file:///" if compat.is_platform_windows() else "file://"
+    parser = all_parsers
+
+    fname = prefix + str(os.path.abspath(csv1))
+    result = parser.read_csv(fname, index_col=0, parse_dates=True)
+    # TODO: make unit check more specific
+    if parser.engine == "pyarrow":
+        result.index = result.index.as_unit("ns")
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738],
+            [1.047916, -0.041232, -0.16181208307, 0.212549],
+            [0.498581, 0.731168, -0.537677223318, 1.346270],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469],
+            [0.836649, 0.246462, 0.588542635376, 1.062782],
+            [-0.157161, 1.340307, 1.1957779562, -1.097007],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+                datetime(2000, 1, 10),
+                datetime(2000, 1, 11),
+            ],
+            name="index",
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_1000_sep(all_parsers):
+    parser = all_parsers
+    data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+    expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]})
+
+    if parser.engine == "pyarrow":
+        msg = "The 'thousands' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep="|", thousands=",")
+        return
+
+    result = parser.read_csv(StringIO(data), sep="|", thousands=",")
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_unnamed_columns(all_parsers):
+    data = """A,B,C,,
+1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
+        dtype=np.int64,
+        columns=["A", "B", "C", "Unnamed: 3", "Unnamed: 4"],
+    )
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_csv_mixed_type(all_parsers):
+    data = """A,B,C
+a,1,2
+b,3,4
+c,4,5
+"""
+    parser = all_parsers
+    expected = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]})
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_low_memory_no_rows_with_index(all_parsers):
+    # see gh-21141
+    parser = all_parsers
+
+    if not parser.low_memory:
+        pytest.skip("This is a low-memory specific test")
+
+    data = """A,B,C
+1,1,1,2
+2,2,3,4
+3,3,4,5
+"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0)
+        return
+
+    result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0)
+    expected = DataFrame(columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_dataframe(all_parsers, csv1):
+    parser = all_parsers
+    result = parser.read_csv(csv1, index_col=0, parse_dates=True)
+    # TODO: make unit check more specific
+    if parser.engine == "pyarrow":
+        result.index = result.index.as_unit("ns")
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738],
+            [1.047916, -0.041232, -0.16181208307, 0.212549],
+            [0.498581, 0.731168, -0.537677223318, 1.346270],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469],
+            [0.836649, 0.246462, 0.588542635376, 1.062782],
+            [-0.157161, 1.340307, 1.1957779562, -1.097007],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+                datetime(2000, 1, 10),
+                datetime(2000, 1, 11),
+            ],
+            name="index",
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows", [3, 3.0])
+def test_read_nrows(all_parsers, nrows):
+    # see gh-10476
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    expected = DataFrame(
+        [["foo", 2, 3, 4, 5], ["bar", 7, 8, 9, 10], ["baz", 12, 13, 14, 15]],
+        columns=["index", "A", "B", "C", "D"],
+    )
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), nrows=nrows)
+        return
+
+    result = parser.read_csv(StringIO(data), nrows=nrows)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows", [1.2, "foo", -1])
+def test_read_nrows_bad(all_parsers, nrows):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    msg = r"'nrows' must be an integer >=0"
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), nrows=nrows)
+
+
+def test_nrows_skipfooter_errors(all_parsers):
+    msg = "'skipfooter' not supported with 'nrows'"
+    data = "a\n1\n2\n3\n4\n5\n6"
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=1, nrows=5)
+
+
+@skip_pyarrow
+def test_missing_trailing_delimiters(all_parsers):
+    parser = all_parsers
+    data = """A,B,C,D
+1,2,3,4
+1,3,3,
+1,4,5"""
+
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4], [1, 3, 3, np.nan], [1, 4, 5, np.nan]],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_skip_initial_space(all_parsers):
+    data = (
+        '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, '
+        "1.00361,  1.12551, 330.65659, 0355626618.16711,  73.48821, "
+        "314.11625,  1917.09447,   179.71425,  80.000, 240.000, -350,  "
+        "70.06056, 344.98370, 1,   1, -0.689265, -0.692787,  "
+        "0.212036,    14.7674,   41.605,   -9999.0,   -9999.0,   "
+        "-9999.0,   -9999.0,   -9999.0,  -9999.0, 000, 012, 128"
+    )
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                names=list(range(33)),
+                header=None,
+                na_values=["-9999.0"],
+                skipinitialspace=True,
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data),
+        names=list(range(33)),
+        header=None,
+        na_values=["-9999.0"],
+        skipinitialspace=True,
+    )
+    expected = DataFrame(
+        [
+            [
+                "09-Apr-2012",
+                "01:10:18.300",
+                2456026.548822908,
+                12849,
+                1.00361,
+                1.12551,
+                330.65659,
+                355626618.16711,
+                73.48821,
+                314.11625,
+                1917.09447,
+                179.71425,
+                80.0,
+                240.0,
+                -350,
+                70.06056,
+                344.9837,
+                1,
+                1,
+                -0.689265,
+                -0.692787,
+                0.212036,
+                14.7674,
+                41.605,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                0,
+                12,
+                128,
+            ]
+        ]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_trailing_delimiters(all_parsers):
+    # see gh-2442
+    data = """A,B,C
+1,2,3,
+4,5,6,
+7,8,9,"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=False)
+
+    expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_escapechar(all_parsers):
+    # https://stackoverflow.com/questions/13824840/feature-request-for-
+    # pandas-read-csv
+    data = '''SEARCH_TERM,ACTUAL_URL
+"bra tv board","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
+"tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"
+"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals series","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"'''
+
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8"
+    )
+
+    assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals series'
+
+    tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
+
+
+def test_ignore_leading_whitespace(all_parsers):
+    # see gh-3374, gh-6607
+    parser = all_parsers
+    data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9"
+
+    if parser.engine == "pyarrow":
+        msg = "the 'pyarrow' engine does not support regex separators"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep=r"\s+")
+        return
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+
+    expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
+def test_uneven_lines_with_usecols(all_parsers, usecols):
+    # see gh-12203
+    parser = all_parsers
+    data = r"""a,b,c
+0,1,2
+3,4,5,6,7
+8,9,10"""
+
+    if usecols is None:
+        # Make sure that an error is still raised
+        # when the "usecols" parameter is not provided.
+        msg = r"Expected \d+ fields in line \d+, saw \d+"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data))
+    else:
+        expected = DataFrame({"a": [0, 3, 8], "b": [1, 4, 9]})
+
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        # First, check to see that the response of parser when faced with no
+        # provided columns raises the correct error, with or without usecols.
+        ("", {}, None),
+        ("", {"usecols": ["X"]}, None),
+        (
+            ",,",
+            {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]},
+            DataFrame(columns=["X"], index=[0], dtype=np.float64),
+        ),
+        (
+            "",
+            {"names": ["Dummy", "X", "Dummy_2"], "usecols": ["X"]},
+            DataFrame(columns=["X"]),
+        ),
+    ],
+)
+def test_read_empty_with_usecols(all_parsers, data, kwargs, expected):
+    # see gh-12493
+    parser = all_parsers
+
+    if expected is None:
+        msg = "No columns to parse from file"
+        with pytest.raises(EmptyDataError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        # gh-8661, gh-8679: this should ignore six lines, including
+        # lines with trailing whitespace and blank lines.
+        (
+            {
+                "header": None,
+                "delim_whitespace": True,
+                "skiprows": [0, 1, 2, 3, 5, 6],
+                "skip_blank_lines": True,
+            },
+            DataFrame([[1.0, 2.0, 4.0], [5.1, np.nan, 10.0]]),
+        ),
+        # gh-8983: test skipping set of rows after a row with trailing spaces.
+        (
+            {
+                "delim_whitespace": True,
+                "skiprows": [1, 2, 3, 5, 6],
+                "skip_blank_lines": True,
+            },
+            DataFrame({"A": [1.0, 5.1], "B": [2.0, np.nan], "C": [4.0, 10]}),
+        ),
+    ],
+)
+def test_trailing_spaces(all_parsers, kwargs, expected):
+    data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"  # noqa: E501
+    parser = all_parsers
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+        return
+
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_raise_on_sep_with_delim_whitespace(all_parsers):
+    # see gh-6607
+    data = "a b c\n1 2 3"
+    parser = all_parsers
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+    with pytest.raises(ValueError, match="you can only specify one"):
+        with tm.assert_produces_warning(
+            FutureWarning, match=depr_msg, check_stacklevel=False
+        ):
+            parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
+
+
+def test_read_filepath_or_buffer(all_parsers):
+    # see gh-43366
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match="Expected file path name or file-like"):
+        parser.read_csv(filepath_or_buffer=b"input")
+
+
+@pytest.mark.parametrize("delim_whitespace", [True, False])
+def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
+    # see gh-9710
+    parser = all_parsers
+    data = """\
+MyColumn
+a
+b
+a
+b\n"""
+
+    expected = DataFrame({"MyColumn": list("abab")})
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(
+                    StringIO(data),
+                    skipinitialspace=True,
+                    delim_whitespace=delim_whitespace,
+                )
+        return
+
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "sep,skip_blank_lines,exp_data",
+    [
+        (",", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]),
+        (r"\s+", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]),
+        (
+            ",",
+            False,
+            [
+                [1.0, 2.0, 4.0],
+                [np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan],
+                [5.0, np.nan, 10.0],
+                [np.nan, np.nan, np.nan],
+                [-70.0, 0.4, 1.0],
+            ],
+        ),
+    ],
+)
+def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
+    parser = all_parsers
+    data = """\
+A,B,C
+1,2.,4.
+
+
+5.,NaN,10.0
+
+-70,.4,1
+"""
+
+    if sep == r"\s+":
+        data = data.replace(",", "  ")
+
+        if parser.engine == "pyarrow":
+            msg = "the 'pyarrow' engine does not support regex separators"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(
+                    StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines
+                )
+            return
+
+    result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
+    expected = DataFrame(exp_data, columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_whitespace_lines(all_parsers):
+    parser = all_parsers
+    data = """
+
+\t  \t\t
+\t
+A,B,C
+\t    1,2.,4.
+5.,NaN,10.0
+"""
+    expected = DataFrame([[1, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"])
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            """   A   B   C   D
+a   1   2   3   4
+b   1   2   3   4
+c   1   2   3   4
+""",
+            DataFrame(
+                [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                columns=["A", "B", "C", "D"],
+                index=["a", "b", "c"],
+            ),
+        ),
+        (
+            "    a b c\n1 2 3 \n4 5  6\n 7 8 9",
+            DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]),
+        ),
+    ],
+)
+def test_whitespace_regex_separator(all_parsers, data, expected):
+    # see gh-6607
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        msg = "the 'pyarrow' engine does not support regex separators"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep=r"\s+")
+        return
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_sub_character(all_parsers, csv_dir_path):
+    # see gh-16893
+    filename = os.path.join(csv_dir_path, "sub_char.csv")
+    expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
+
+    parser = all_parsers
+    result = parser.read_csv(filename)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("filename", ["sé-es-vé.csv", "ru-sй.csv", "中文文件名.csv"])
+def test_filename_with_special_chars(all_parsers, filename):
+    # see gh-15086.
+    parser = all_parsers
+    df = DataFrame({"a": [1, 2, 3]})
+
+    with tm.ensure_clean(filename) as path:
+        df.to_csv(path, index=False)
+
+        result = parser.read_csv(path)
+        tm.assert_frame_equal(result, df)
+
+
+def test_read_table_same_signature_as_read_csv(all_parsers):
+    # GH-34976
+    parser = all_parsers
+
+    table_sign = signature(parser.read_table)
+    csv_sign = signature(parser.read_csv)
+
+    assert table_sign.parameters.keys() == csv_sign.parameters.keys()
+    assert table_sign.return_annotation == csv_sign.return_annotation
+
+    for key, csv_param in csv_sign.parameters.items():
+        table_param = table_sign.parameters[key]
+        if key == "sep":
+            assert csv_param.default == ","
+            assert table_param.default == "\t"
+            assert table_param.annotation == csv_param.annotation
+            assert table_param.kind == csv_param.kind
+            continue
+
+        assert table_param == csv_param
+
+
+def test_read_table_equivalency_to_read_csv(all_parsers):
+    # see gh-21948
+    # As of 0.25.0, read_table is undeprecated
+    parser = all_parsers
+    data = "a\tb\n1\t2\n3\t4"
+    expected = parser.read_csv(StringIO(data), sep="\t")
+    result = parser.read_table(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("read_func", ["read_csv", "read_table"])
+def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
+    # GH#41069
+    parser = all_parsers
+    data = "a b\n0 1"
+
+    sys.setprofile(lambda *a, **k: None)
+    result = getattr(parser, read_func)(StringIO(data))
+    sys.setprofile(None)
+
+    expected = DataFrame({"a b": ["0 1"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_first_row_bom(all_parsers):
+    # see gh-26545
+    parser = all_parsers
+    data = '''\ufeff"Head1"\t"Head2"\t"Head3"'''
+
+    result = parser.read_csv(StringIO(data), delimiter="\t")
+    expected = DataFrame(columns=["Head1", "Head2", "Head3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_first_row_bom_unquoted(all_parsers):
+    # see gh-36343
+    parser = all_parsers
+    data = """\ufeffHead1\tHead2\tHead3"""
+
+    result = parser.read_csv(StringIO(data), delimiter="\t")
+    expected = DataFrame(columns=["Head1", "Head2", "Head3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows", range(1, 6))
+def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
+    # GH 28071
+    ref = DataFrame(
+        [[np.nan, np.nan], [np.nan, np.nan], [1, 2], [np.nan, np.nan], [3, 4]],
+        columns=list("ab"),
+    )
+    csv = "\nheader\n\na,b\n\n\n1,2\n\n3,4"
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'nrows' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False
+            )
+        return
+
+    df = parser.read_csv(StringIO(csv), header=3, nrows=nrows, skip_blank_lines=False)
+    tm.assert_frame_equal(df, ref[:nrows])
+
+
+@skip_pyarrow
+def test_no_header_two_extra_columns(all_parsers):
+    # GH 26218
+    column_names = ["one", "two", "three"]
+    ref = DataFrame([["foo", "bar", "baz"]], columns=column_names)
+    stream = StringIO("foo,bar,baz,bam,blah")
+    parser = all_parsers
+    df = parser.read_csv_check_warnings(
+        ParserWarning,
+        "Length of header or names does not match length of data. "
+        "This leads to a loss of data with index_col=False.",
+        stream,
+        header=None,
+        names=column_names,
+        index_col=False,
+    )
+    tm.assert_frame_equal(df, ref)
+
+
+def test_read_csv_names_not_accepting_sets(all_parsers):
+    # GH 34946
+    data = """\
+    1,2,3
+    4,5,6\n"""
+    parser = all_parsers
+    with pytest.raises(ValueError, match="Names should be an ordered collection."):
+        parser.read_csv(StringIO(data), names=set("QAZ"))
+
+
+def test_read_table_delim_whitespace_default_sep(all_parsers):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_table(f, delim_whitespace=True)
+        return
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_table(f, delim_whitespace=True)
+    expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("delimiter", [",", "\t"])
+def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+    msg = (
+        "Specified a delimiter with both sep and "
+        "delim_whitespace=True; you can only specify one."
+    )
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(f, delim_whitespace=True, sep=delimiter)
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)
+
+
+def test_read_csv_delimiter_and_sep_no_default(all_parsers):
+    # GH#39823
+    f = StringIO("a,b\n1,2")
+    parser = all_parsers
+    msg = "Specified a sep and a delimiter; you can only specify one."
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(f, sep=" ", delimiter=".")
+
+
+@pytest.mark.parametrize("kwargs", [{"delimiter": "\n"}, {"sep": "\n"}])
+def test_read_csv_line_break_as_separator(kwargs, all_parsers):
+    # GH#43528
+    parser = all_parsers
+    data = """a,b,c
+1,2,3
+    """
+    msg = (
+        r"Specified \\n as separator or delimiter. This forces the python engine "
+        r"which does not accept a line terminator. Hence it is not allowed to use "
+        r"the line terminator as separator."
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+@pytest.mark.parametrize("delimiter", [",", "\t"])
+def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
+    # GH: 35958
+    f = StringIO("a  b  c\n1 -2 -3\n4  5   6")
+    parser = all_parsers
+    msg = (
+        "Specified a delimiter with both sep and "
+        "delim_whitespace=True; you can only specify one."
+    )
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        with pytest.raises(ValueError, match=msg):
+            parser.read_table(f, delim_whitespace=True, sep=delimiter)
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
+
+
+@skip_pyarrow
+def test_dict_keys_as_names(all_parsers):
+    # GH: 36928
+    data = "1,2"
+
+    keys = {"a": int, "b": int}.keys()
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), names=keys)
+    expected = DataFrame({"a": [1], "b": [2]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+@xfail_pyarrow  # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
+def test_encoding_surrogatepass(all_parsers):
+    # GH39017
+    parser = all_parsers
+    content = b"\xed\xbd\xbf"
+    decoded = content.decode("utf-8", errors="surrogatepass")
+    expected = DataFrame({decoded: [decoded]}, index=[decoded * 2])
+    expected.index.name = decoded * 2
+
+    with tm.ensure_clean() as path:
+        Path(path).write_bytes(
+            content * 2 + b"," + content + b"\n" + content * 2 + b"," + content
+        )
+        df = parser.read_csv(path, encoding_errors="surrogatepass", index_col=0)
+        tm.assert_frame_equal(df, expected)
+        with pytest.raises(UnicodeDecodeError, match="'utf-8' codec can't decode byte"):
+            parser.read_csv(path)
+
+
+def test_malformed_second_line(all_parsers):
+    # see GH14782
+    parser = all_parsers
+    data = "\na\nb\n"
+    result = parser.read_csv(StringIO(data), skip_blank_lines=False, header=1)
+    expected = DataFrame({"a": ["b"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_short_single_line(all_parsers):
+    # GH 47566
+    parser = all_parsers
+    columns = ["a", "b", "c"]
+    data = "1,2"
+    result = parser.read_csv(StringIO(data), header=None, names=columns)
+    expected = DataFrame({"a": [1], "b": [2], "c": [np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Length mismatch: Expected axis has 2 elements
+def test_short_multi_line(all_parsers):
+    # GH 47566
+    parser = all_parsers
+    columns = ["a", "b", "c"]
+    data = "1,2\n1,2"
+    result = parser.read_csv(StringIO(data), header=None, names=columns)
+    expected = DataFrame({"a": [1, 1], "b": [2, 2], "c": [np.nan, np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_seek(all_parsers):
+    # GH48646
+    parser = all_parsers
+    prefix = "### DATA\n"
+    content = "nkey,value\ntables,rectangular\n"
+    with tm.ensure_clean() as path:
+        Path(path).write_text(prefix + content, encoding="utf-8")
+        with open(path, encoding="utf-8") as file:
+            file.readline()
+            actual = parser.read_csv(file)
+        expected = parser.read_csv(StringIO(content))
+    tm.assert_frame_equal(actual, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_data_list.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_data_list.py
@ -0,0 +1,91 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.parsers import TextParser
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@xfail_pyarrow
+def test_read_data_list(all_parsers):
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+    data = "A,B,C\nfoo,1,2,3\nbar,4,5,6"
+
+    data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]]
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    with TextParser(data_list, chunksize=2, **kwargs) as parser:
+        result = parser.read()
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_reader_list(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    lines = list(csv.reader(StringIO(data)))
+    with TextParser(lines, chunksize=2, **kwargs) as reader:
+        chunks = list(reader)
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    tm.assert_frame_equal(chunks[0], expected[:2])
+    tm.assert_frame_equal(chunks[1], expected[2:4])
+    tm.assert_frame_equal(chunks[2], expected[4:])
+
+
+def test_reader_list_skiprows(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    lines = list(csv.reader(StringIO(data)))
+    with TextParser(lines, chunksize=2, skiprows=[1], **kwargs) as reader:
+        chunks = list(reader)
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    tm.assert_frame_equal(chunks[0], expected[1:3])
+
+
+def test_read_csv_parse_simple_list(all_parsers):
+    parser = all_parsers
+    data = """foo
+bar baz
+qux foo
+foo
+bar"""
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame(["foo", "bar baz", "qux foo", "foo", "bar"])
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_decimal.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_decimal.py
@ -0,0 +1,72 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.mark.parametrize(
+    "data,thousands,decimal",
+    [
+        (
+            """A|B|C
+1|2,334.01|5
+10|13|10.
+""",
+            ",",
+            ".",
+        ),
+        (
+            """A|B|C
+1|2.334,01|5
+10|13|10,
+""",
+            ".",
+            ",",
+        ),
+    ],
+)
+def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal):
+    parser = all_parsers
+    expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]})
+
+    if parser.engine == "pyarrow":
+        msg = "The 'thousands' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), sep="|", thousands=thousands, decimal=decimal
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data), sep="|", thousands=thousands, decimal=decimal
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_euro_decimal_format(all_parsers):
+    parser = all_parsers
+    data = """Id;Number1;Number2;Text1;Text2;Number3
+1;1521,1541;187101,9543;ABC;poi;4,738797819
+2;121,12;14897,76;DEF;uyt;0,377320872
+3;878,158;108013,434;GHI;rez;2,735694704"""
+
+    result = parser.read_csv(StringIO(data), sep=";", decimal=",")
+    expected = DataFrame(
+        [
+            [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819],
+            [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872],
+            [3, 878.158, 108013.434, "GHI", "rez", 2.735694704],
+        ],
+        columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"],
+    )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_file_buffer_url.py
@ -0,0 +1,478 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import (
+    BytesIO,
+    StringIO,
+)
+import os
+import platform
+from urllib.error import URLError
+import uuid
+
+import numpy as np
+import pytest
+
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+)
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.network
+@pytest.mark.single_cpu
+def test_url(all_parsers, csv_dir_path, httpserver):
+    parser = all_parsers
+    kwargs = {"sep": "\t"}
+
+    local_path = os.path.join(csv_dir_path, "salaries.csv")
+    with open(local_path, encoding="utf-8") as f:
+        httpserver.serve_content(content=f.read())
+
+    url_result = parser.read_csv(httpserver.url, **kwargs)
+
+    local_result = parser.read_csv(local_path, **kwargs)
+    tm.assert_frame_equal(url_result, local_result)
+
+
+@pytest.mark.slow
+def test_local_file(all_parsers, csv_dir_path):
+    parser = all_parsers
+    kwargs = {"sep": "\t"}
+
+    local_path = os.path.join(csv_dir_path, "salaries.csv")
+    local_result = parser.read_csv(local_path, **kwargs)
+    url = "file://localhost/" + local_path
+
+    try:
+        url_result = parser.read_csv(url, **kwargs)
+        tm.assert_frame_equal(url_result, local_result)
+    except URLError:
+        # Fails on some systems.
+        pytest.skip("Failing on: " + " ".join(platform.uname()))
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
+def test_path_path_lib(all_parsers):
+    parser = all_parsers
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
+    )
+    result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0))
+    tm.assert_frame_equal(df, result)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
+def test_path_local_path(all_parsers):
+    parser = all_parsers
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD")),
+        index=Index([f"i-{i}" for i in range(30)]),
+    )
+    result = tm.round_trip_localpath(
+        df.to_csv, lambda p: parser.read_csv(p, index_col=0)
+    )
+    tm.assert_frame_equal(df, result)
+
+
+def test_nonexistent_path(all_parsers):
+    # gh-2428: pls no segfault
+    # gh-14086: raise more helpful FileNotFoundError
+    # GH#29233 "File foo" instead of "File b'foo'"
+    parser = all_parsers
+    path = f"{uuid.uuid4()}.csv"
+
+    msg = r"\[Errno 2\]"
+    with pytest.raises(FileNotFoundError, match=msg) as e:
+        parser.read_csv(path)
+    assert path == e.value.filename
+
+
+@td.skip_if_windows  # os.chmod does not work in windows
+def test_no_permission(all_parsers):
+    # GH 23784
+    parser = all_parsers
+
+    msg = r"\[Errno 13\]"
+    with tm.ensure_clean() as path:
+        os.chmod(path, 0)  # make file unreadable
+
+        # verify that this process cannot open the file (not running as sudo)
+        try:
+            with open(path, encoding="utf-8"):
+                pass
+            pytest.skip("Running as sudo.")
+        except PermissionError:
+            pass
+
+        with pytest.raises(PermissionError, match=msg) as e:
+            parser.read_csv(path)
+        assert path == e.value.filename
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected,msg",
+    [
+        # gh-10728: WHITESPACE_LINE
+        (
+            "a,b,c\n4,5,6\n ",
+            {},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # gh-10548: EAT_LINE_COMMENT
+        (
+            "a,b,c\n4,5,6\n#comment",
+            {"comment": "#"},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_CRNL_NOP
+        (
+            "a,b,c\n4,5,6\n\r",
+            {},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_COMMENT
+        (
+            "a,b,c\n4,5,6#comment",
+            {"comment": "#"},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # SKIP_LINE
+        (
+            "a,b,c\n4,5,6\nskipme",
+            {"skiprows": [2]},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_LINE_COMMENT
+        (
+            "a,b,c\n4,5,6\n#comment",
+            {"comment": "#", "skip_blank_lines": False},
+            DataFrame([[4, 5, 6]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # IN_FIELD
+        (
+            "a,b,c\n4,5,6\n ",
+            {"skip_blank_lines": False},
+            DataFrame([["4", 5, 6], [" ", None, None]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # EAT_CRNL
+        (
+            "a,b,c\n4,5,6\n\r",
+            {"skip_blank_lines": False},
+            DataFrame([[4, 5, 6], [None, None, None]], columns=["a", "b", "c"]),
+            None,
+        ),
+        # ESCAPED_CHAR
+        (
+            "a,b,c\n4,5,6\n\\",
+            {"escapechar": "\\"},
+            None,
+            "(EOF following escape character)|(unexpected end of data)",
+        ),
+        # ESCAPE_IN_QUOTED_FIELD
+        (
+            'a,b,c\n4,5,6\n"\\',
+            {"escapechar": "\\"},
+            None,
+            "(EOF inside string starting at row 2)|(unexpected end of data)",
+        ),
+        # IN_QUOTED_FIELD
+        (
+            'a,b,c\n4,5,6\n"',
+            {"escapechar": "\\"},
+            None,
+            "(EOF inside string starting at row 2)|(unexpected end of data)",
+        ),
+    ],
+    ids=[
+        "whitespace-line",
+        "eat-line-comment",
+        "eat-crnl-nop",
+        "eat-comment",
+        "skip-line",
+        "eat-line-comment",
+        "in-field",
+        "eat-crnl",
+        "escaped-char",
+        "escape-in-quoted-field",
+        "in-quoted-field",
+    ],
+)
+def test_eof_states(all_parsers, data, kwargs, expected, msg, request):
+    # see gh-10728, gh-10548
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and "comment" in kwargs:
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+        return
+
+    if parser.engine == "pyarrow" and "\r" not in data:
+        # pandas.errors.ParserError: CSV parse error: Expected 3 columns, got 1:
+        # ValueError: skiprows argument must be an integer when using engine='pyarrow'
+        # AssertionError: Regex pattern did not match.
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    if expected is None:
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_temporary_file(all_parsers):
+    # see gh-13398
+    parser = all_parsers
+    data = "0 0"
+
+    with tm.ensure_clean(mode="w+", return_filelike=True) as new_file:
+        new_file.write(data)
+        new_file.flush()
+        new_file.seek(0)
+
+        if parser.engine == "pyarrow":
+            msg = "the 'pyarrow' engine does not support regex separators"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(new_file, sep=r"\s+", header=None)
+            return
+
+        result = parser.read_csv(new_file, sep=r"\s+", header=None)
+
+        expected = DataFrame([[0, 0]])
+        tm.assert_frame_equal(result, expected)
+
+
+def test_internal_eof_byte(all_parsers):
+    # see gh-5500
+    parser = all_parsers
+    data = "a,b\n1\x1a,2"
+
+    expected = DataFrame([["1\x1a", 2]], columns=["a", "b"])
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_internal_eof_byte_to_file(all_parsers):
+    # see gh-16559
+    parser = all_parsers
+    data = b'c1,c2\r\n"test \x1a    test", test\r\n'
+    expected = DataFrame([["test \x1a    test", " test"]], columns=["c1", "c2"])
+    path = f"__{uuid.uuid4()}__.csv"
+
+    with tm.ensure_clean(path) as path:
+        with open(path, "wb") as f:
+            f.write(data)
+
+        result = parser.read_csv(path)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_file_handle_string_io(all_parsers):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = all_parsers
+    data = "a,b\n1,2"
+
+    fh = StringIO(data)
+    parser.read_csv(fh)
+    assert not fh.closed
+
+
+def test_file_handles_with_open(all_parsers, csv1):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = all_parsers
+
+    for mode in ["r", "rb"]:
+        with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f:
+            parser.read_csv(f)
+            assert not f.closed
+
+
+def test_invalid_file_buffer_class(all_parsers):
+    # see gh-15337
+    class InvalidBuffer:
+        pass
+
+    parser = all_parsers
+    msg = "Invalid file path or buffer object type"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(InvalidBuffer())
+
+
+def test_invalid_file_buffer_mock(all_parsers):
+    # see gh-15337
+    parser = all_parsers
+    msg = "Invalid file path or buffer object type"
+
+    class Foo:
+        pass
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(Foo())
+
+
+def test_valid_file_buffer_seems_invalid(all_parsers):
+    # gh-16135: we want to ensure that "tell" and "seek"
+    # aren't actually being used when we call `read_csv`
+    #
+    # Thus, while the object may look "invalid" (these
+    # methods are attributes of the `StringIO` class),
+    # it is still a valid file-object for our purposes.
+    class NoSeekTellBuffer(StringIO):
+        def tell(self):
+            raise AttributeError("No tell method")
+
+        def seek(self, pos, whence=0):
+            raise AttributeError("No seek method")
+
+    data = "a\n1"
+    parser = all_parsers
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(NoSeekTellBuffer(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("io_class", [StringIO, BytesIO])
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_read_csv_file_handle(all_parsers, io_class, encoding):
+    """
+    Test whether read_csv does not close user-provided file handles.
+
+    GH 36980
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    content = "a,b\n1,2"
+    handle = io_class(content.encode("utf-8") if io_class == BytesIO else content)
+
+    tm.assert_frame_equal(parser.read_csv(handle, encoding=encoding), expected)
+    assert not handle.closed
+
+
+def test_memory_map_compression(all_parsers, compression):
+    """
+    Support memory map for compressed files.
+
+    GH 37621
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    with tm.ensure_clean() as path:
+        expected.to_csv(path, index=False, compression=compression)
+
+        if parser.engine == "pyarrow":
+            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(path, memory_map=True, compression=compression)
+            return
+
+        result = parser.read_csv(path, memory_map=True, compression=compression)
+
+    tm.assert_frame_equal(
+        result,
+        expected,
+    )
+
+
+def test_context_manager(all_parsers, datapath):
+    # make sure that opened files are closed
+    parser = all_parsers
+
+    path = datapath("io", "data", "csv", "iris.csv")
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(path, chunksize=1)
+        return
+
+    reader = parser.read_csv(path, chunksize=1)
+    assert not reader.handles.handle.closed
+    try:
+        with reader:
+            next(reader)
+            assert False
+    except AssertionError:
+        assert reader.handles.handle.closed
+
+
+def test_context_manageri_user_provided(all_parsers, datapath):
+    # make sure that user-provided handles are not closed
+    parser = all_parsers
+
+    with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path:
+        if parser.engine == "pyarrow":
+            msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(path, chunksize=1)
+            return
+
+        reader = parser.read_csv(path, chunksize=1)
+        assert not reader.handles.handle.closed
+        try:
+            with reader:
+                next(reader)
+                assert False
+        except AssertionError:
+            assert not reader.handles.handle.closed
+
+
+@skip_pyarrow  # ParserError: Empty CSV file
+def test_file_descriptor_leak(all_parsers, using_copy_on_write):
+    # GH 31488
+    parser = all_parsers
+    with tm.ensure_clean() as path:
+        with pytest.raises(EmptyDataError, match="No columns to parse from file"):
+            parser.read_csv(path)
+
+
+def test_memory_map(all_parsers, csv_dir_path):
+    mmap_file = os.path.join(csv_dir_path, "test_mmap.csv")
+    parser = all_parsers
+
+    expected = DataFrame(
+        {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]}
+    )
+
+    if parser.engine == "pyarrow":
+        msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(mmap_file, memory_map=True)
+        return
+
+    result = parser.read_csv(mmap_file, memory_map=True)
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_float.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_float.py
@ -0,0 +1,79 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.compat import is_platform_linux
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@skip_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
+def test_float_parser(all_parsers):
+    # see gh-9565
+    parser = all_parsers
+    data = "45e-1,4.5,45.,inf,-inf"
+    result = parser.read_csv(StringIO(data), header=None)
+
+    expected = DataFrame([[float(s) for s in data.split(",")]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_scientific_no_exponent(all_parsers_all_precisions):
+    # see gh-12215
+    df = DataFrame.from_dict({"w": ["2e"], "x": ["3E"], "y": ["42e"], "z": ["632E"]})
+    data = df.to_csv(index=False)
+    parser, precision = all_parsers_all_precisions
+
+    df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision)
+    tm.assert_frame_equal(df_roundtrip, df)
+
+
+@pytest.mark.parametrize(
+    "neg_exp",
+    [
+        -617,
+        -100000,
+        pytest.param(-99999999999999999, marks=pytest.mark.skip_ubsan),
+    ],
+)
+def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
+    # GH#38753
+    parser, precision = all_parsers_all_precisions
+
+    data = f"data\n10E{neg_exp}"
+    result = parser.read_csv(StringIO(data), float_precision=precision)
+    expected = DataFrame({"data": [0.0]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.skip_ubsan
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
+@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
+def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
+    # GH#38753
+    parser, precision = all_parsers_all_precisions
+    data = f"data\n10E{exp}"
+    result = parser.read_csv(StringIO(data), float_precision=precision)
+    if precision == "round_trip":
+        if exp == 999999999999999999 and is_platform_linux():
+            mark = pytest.mark.xfail(reason="GH38794, on Linux gives object result")
+            request.applymarker(mark)
+
+        value = np.inf if exp > 0 else 0.0
+        expected = DataFrame({"data": [value]})
+    else:
+        expected = DataFrame({"data": [f"10E{exp}"]})
+
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_index.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_index.py
@ -0,0 +1,304 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from datetime import datetime
+from io import StringIO
+import os
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            """foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+""",
+            {"index_col": 0, "names": ["index", "A", "B", "C", "D"]},
+            DataFrame(
+                [
+                    [2, 3, 4, 5],
+                    [7, 8, 9, 10],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                ],
+                index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"),
+                columns=["A", "B", "C", "D"],
+            ),
+        ),
+        (
+            """foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+""",
+            {"index_col": [0, 1], "names": ["index1", "index2", "A", "B", "C", "D"]},
+            DataFrame(
+                [
+                    [2, 3, 4, 5],
+                    [7, 8, 9, 10],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                    [12, 13, 14, 15],
+                ],
+                index=MultiIndex.from_tuples(
+                    [
+                        ("foo", "one"),
+                        ("foo", "two"),
+                        ("foo", "three"),
+                        ("bar", "one"),
+                        ("bar", "two"),
+                    ],
+                    names=["index1", "index2"],
+                ),
+                columns=["A", "B", "C", "D"],
+            ),
+        ),
+    ],
+)
+def test_pass_names_with_index(all_parsers, data, kwargs, expected):
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
+def test_multi_index_no_level_names(
+    request, all_parsers, index_col, using_infer_string
+):
+    data = """index1,index2,A,B,C,D
+foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+"""
+    headless_data = "\n".join(data.split("\n")[1:])
+
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    result = parser.read_csv(
+        StringIO(headless_data), index_col=index_col, header=None, names=names
+    )
+    expected = parser.read_csv(StringIO(data), index_col=index_col)
+
+    # No index names in headless data.
+    expected.index.names = [None] * 2
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_multi_index_no_level_names_implicit(all_parsers):
+    parser = all_parsers
+    data = """A,B,C,D
+foo,one,2,3,4,5
+foo,two,7,8,9,10
+foo,three,12,13,14,15
+bar,one,12,13,14,15
+bar,two,12,13,14,15
+"""
+
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=MultiIndex.from_tuples(
+            [
+                ("foo", "one"),
+                ("foo", "two"),
+                ("foo", "three"),
+                ("bar", "one"),
+                ("bar", "two"),
+            ]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+@pytest.mark.parametrize(
+    "data,expected,header",
+    [
+        ("a,b", DataFrame(columns=["a", "b"]), [0]),
+        (
+            "a,b\nc,d",
+            DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])),
+            [0, 1],
+        ),
+    ],
+)
+@pytest.mark.parametrize("round_trip", [True, False])
+def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
+    # see gh-14545
+    parser = all_parsers
+    data = expected.to_csv(index=False) if round_trip else data
+
+    result = parser.read_csv(StringIO(data), header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.columns are different
+def test_no_unnamed_index(all_parsers):
+    parser = all_parsers
+    data = """ id c0 c1 c2
+0 1 0 a b
+1 2 0 c d
+2 2 2 e f
+"""
+    result = parser.read_csv(StringIO(data), sep=" ")
+    expected = DataFrame(
+        [[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]],
+        columns=["Unnamed: 0", "id", "c0", "c1", "c2"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_duplicate_index_explicit(all_parsers):
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo,12,13,14,15
+bar,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=0)
+
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_read_duplicate_index_implicit(all_parsers):
+    data = """A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo,12,13,14,15
+bar,12,13,14,15
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        [
+            [2, 3, 4, 5],
+            [7, 8, 9, 10],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+            [12, 13, 14, 15],
+        ],
+        columns=["A", "B", "C", "D"],
+        index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_read_csv_no_index_name(all_parsers, csv_dir_path):
+    parser = all_parsers
+    csv2 = os.path.join(csv_dir_path, "test2.csv")
+    result = parser.read_csv(csv2, index_col=0, parse_dates=True)
+
+    expected = DataFrame(
+        [
+            [0.980269, 3.685731, -0.364216805298, -1.159738, "foo"],
+            [1.047916, -0.041232, -0.16181208307, 0.212549, "bar"],
+            [0.498581, 0.731168, -0.537677223318, 1.346270, "baz"],
+            [1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"],
+            [-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+        index=Index(
+            [
+                datetime(2000, 1, 3),
+                datetime(2000, 1, 4),
+                datetime(2000, 1, 5),
+                datetime(2000, 1, 6),
+                datetime(2000, 1, 7),
+            ]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+def test_empty_with_index(all_parsers):
+    # see gh-10184
+    data = "x,y"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=0)
+
+    expected = DataFrame(columns=["y"], index=Index([], name="x"))
+    tm.assert_frame_equal(result, expected)
+
+
+# CSV parse error: Empty CSV file or block: cannot infer number of columns
+@skip_pyarrow
+def test_empty_with_multi_index(all_parsers):
+    # see gh-10467
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=["x", "y"])
+
+    expected = DataFrame(
+        columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"])
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+# CSV parse error: Empty CSV file or block: cannot infer number of columns
+@skip_pyarrow
+def test_empty_with_reversed_multi_index(all_parsers):
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=[1, 0])
+
+    expected = DataFrame(
+        columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"])
+    )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_inf.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_inf.py
@ -0,0 +1,78 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    option_context,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_inf_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,inf
+b,-inf
+c,+Inf
+d,-Inf
+e,INF
+f,-INF
+g,+INf
+h,-INf
+i,inF
+j,-inF"""
+    expected = DataFrame(
+        {"A": [float("inf"), float("-inf")] * 5},
+        index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_infinity_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,Infinity
+b,-Infinity
+c,+Infinity
+"""
+    expected = DataFrame(
+        {"A": [float("infinity"), float("-infinity"), float("+infinity")]},
+        index=["a", "b", "c"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_with_use_inf_as_na(all_parsers):
+    # https://github.com/pandas-dev/pandas/issues/35493
+    parser = all_parsers
+    data = "1.0\nNaN\n3.0"
+    msg = "use_inf_as_na option is deprecated"
+    warn = FutureWarning
+    if parser.engine == "pyarrow":
+        warn = (FutureWarning, DeprecationWarning)
+
+    with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
+        with option_context("use_inf_as_na", True):
+            result = parser.read_csv(StringIO(data), header=None)
+    expected = DataFrame([1.0, np.nan, 3.0])
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_ints.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_ints.py
@ -0,0 +1,231 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_int_conversion(all_parsers):
+    data = """A,B
+1.0,1
+2.0,2
+3.0,3
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            "A,B\nTrue,1\nFalse,2\nTrue,3",
+            {},
+            DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]),
+        ),
+        (
+            "A,B\nYES,1\nno,2\nyes,3\nNo,3\nYes,3",
+            {"true_values": ["yes", "Yes", "YES"], "false_values": ["no", "NO", "No"]},
+            DataFrame(
+                [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]],
+                columns=["A", "B"],
+            ),
+        ),
+        (
+            "A,B\nTRUE,1\nFALSE,2\nTRUE,3",
+            {},
+            DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]),
+        ),
+        (
+            "A,B\nfoo,bar\nbar,foo",
+            {"true_values": ["foo"], "false_values": ["bar"]},
+            DataFrame([[True, False], [False, True]], columns=["A", "B"]),
+        ),
+    ],
+)
+def test_parse_bool(all_parsers, data, kwargs, expected):
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_integers_above_fp_precision(all_parsers):
+    data = """Numbers
+17007000002000191
+17007000002000191
+17007000002000191
+17007000002000191
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000192
+17007000002000194"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        {
+            "Numbers": [
+                17007000002000191,
+                17007000002000191,
+                17007000002000191,
+                17007000002000191,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000192,
+                17007000002000194,
+            ]
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("sep", [" ", r"\s+"])
+def test_integer_overflow_bug(all_parsers, sep):
+    # see gh-2601
+    data = "65248E10 11\n55555E55 22\n"
+    parser = all_parsers
+    if parser.engine == "pyarrow" and sep != " ":
+        msg = "the 'pyarrow' engine does not support regex separators"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), header=None, sep=sep)
+        return
+
+    result = parser.read_csv(StringIO(data), header=None, sep=sep)
+    expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_int64_min_issues(all_parsers):
+    # see gh-2599
+    parser = all_parsers
+    data = "A,B\n0,0\n0,"
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame({"A": [0, 0], "B": [0, np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
+def test_int64_overflow(all_parsers, conv, request):
+    data = """ID
+00013007854817840016671868
+00013007854817840016749251
+00013007854817840016754630
+00013007854817840016781876
+00013007854817840017028824
+00013007854817840017963235
+00013007854817840018860166"""
+    parser = all_parsers
+
+    if conv is None:
+        # 13007854817840016671868 > UINT64_MAX, so this
+        # will overflow and return object as the dtype.
+        if parser.engine == "pyarrow":
+            mark = pytest.mark.xfail(reason="parses to float64")
+            request.applymarker(mark)
+
+        result = parser.read_csv(StringIO(data))
+        expected = DataFrame(
+            [
+                "00013007854817840016671868",
+                "00013007854817840016749251",
+                "00013007854817840016754630",
+                "00013007854817840016781876",
+                "00013007854817840017028824",
+                "00013007854817840017963235",
+                "00013007854817840018860166",
+            ],
+            columns=["ID"],
+        )
+        tm.assert_frame_equal(result, expected)
+    else:
+        # 13007854817840016671868 > UINT64_MAX, so attempts
+        # to cast to either int64 or uint64 will result in
+        # an OverflowError being raised.
+        msg = "|".join(
+            [
+                "Python int too large to convert to C long",
+                "long too big to convert",
+                "int too big to convert",
+            ]
+        )
+        err = OverflowError
+        if parser.engine == "pyarrow":
+            err = ValueError
+            msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+
+        with pytest.raises(err, match=msg):
+            parser.read_csv(StringIO(data), converters={"ID": conv})
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+@pytest.mark.parametrize(
+    "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
+)
+def test_int64_uint64_range(all_parsers, val):
+    # These numbers fall right inside the int64-uint64
+    # range, so they should be parsed as string.
+    parser = all_parsers
+    result = parser.read_csv(StringIO(str(val)), header=None)
+
+    expected = DataFrame([val])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+@pytest.mark.parametrize(
+    "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
+)
+def test_outside_int64_uint64_range(all_parsers, val):
+    # These numbers fall just outside the int64-uint64
+    # range, so they should be parsed as string.
+    parser = all_parsers
+    result = parser.read_csv(StringIO(str(val)), header=None)
+
+    expected = DataFrame([str(val)])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # gets float64 dtype instead of object
+@pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
+def test_numeric_range_too_wide(all_parsers, exp_data):
+    # No numerical dtype can hold both negative and uint64
+    # values, so they should be cast as string.
+    parser = all_parsers
+    data = "\n".join(exp_data)
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_integer_precision(all_parsers):
+    # Gh 7072
+    s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765
+5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(s), header=None)[4]
+    expected = Series([4321583677327450765, 4321113141090630389], name=4)
+    tm.assert_series_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_iterator.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_iterator.py
@ -0,0 +1,134 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+def test_iterator(all_parsers):
+    # see gh-6607
+    data = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+qux,12,13,14,15
+foo2,12,13,14,15
+bar2,12,13,14,15
+"""
+    parser = all_parsers
+    kwargs = {"index_col": 0}
+
+    expected = parser.read_csv(StringIO(data), **kwargs)
+
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), iterator=True, **kwargs)
+        return
+
+    with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader:
+        first_chunk = reader.read(3)
+        tm.assert_frame_equal(first_chunk, expected[:3])
+
+        last_chunk = reader.read(5)
+    tm.assert_frame_equal(last_chunk, expected[3:])
+
+
+def test_iterator2(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), iterator=True)
+        return
+
+    with parser.read_csv(StringIO(data), iterator=True) as reader:
+        result = list(reader)
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result[0], expected)
+
+
+def test_iterator_stop_on_chunksize(all_parsers):
+    # gh-3967: stopping iteration when chunksize is specified
+    parser = all_parsers
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), chunksize=1)
+        return
+
+    with parser.read_csv(StringIO(data), chunksize=1) as reader:
+        result = list(reader)
+
+    assert len(result) == 3
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(concat(result), expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
+)
+def test_iterator_skipfooter_errors(all_parsers, kwargs):
+    msg = "'skipfooter' not supported for iteration"
+    parser = all_parsers
+    data = "a\n1\n2"
+
+    if parser.engine == "pyarrow":
+        msg = (
+            "The '(chunksize|iterator)' option is not supported with the "
+            "'pyarrow' engine"
+        )
+
+    with pytest.raises(ValueError, match=msg):
+        with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _:
+            pass
+
+
+def test_iteration_open_handle(all_parsers):
+    parser = all_parsers
+    kwargs = {"header": None}
+
+    with tm.ensure_clean() as path:
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
+
+        with open(path, encoding="utf-8") as f:
+            for line in f:
+                if "CCC" in line:
+                    break
+
+            result = parser.read_csv(f, **kwargs)
+            expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]})
+            tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_read_errors.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_read_errors.py
@ -0,0 +1,320 @@
+"""
+Tests that work on the Python, C and PyArrow engines but do not have a
+specific classification into the other test modules.
+"""
+import codecs
+import csv
+from io import StringIO
+import os
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from pandas.compat import PY311
+from pandas.errors import (
+    EmptyDataError,
+    ParserError,
+    ParserWarning,
+)
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_empty_decimal_marker(all_parsers):
+    data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+    # Parsers support only length-1 decimals
+    msg = "Only length-1 decimal markers supported"
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = (
+            "only single character unicode strings can be "
+            "converted to Py_UCS4, got length 0"
+        )
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), decimal="")
+
+
+def test_bad_stream_exception(all_parsers, csv_dir_path):
+    # see gh-13652
+    #
+    # This test validates that both the Python engine and C engine will
+    # raise UnicodeDecodeError instead of C engine raising ParserError
+    # and swallowing the exception that caused read to fail.
+    path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv")
+    codec = codecs.lookup("utf-8")
+    utf8 = codecs.lookup("utf-8")
+    parser = all_parsers
+    msg = "'utf-8' codec can't decode byte"
+
+    # Stream must be binary UTF8.
+    with open(path, "rb") as handle, codecs.StreamRecoder(
+        handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter
+    ) as stream:
+        with pytest.raises(UnicodeDecodeError, match=msg):
+            parser.read_csv(stream)
+
+
+def test_malformed(all_parsers):
+    # see gh-6607
+    parser = all_parsers
+    data = """ignore
+A,B,C
+1,2,3 # comment
+1,2,3,4,5
+2,3,4
+"""
+    msg = "Expected 3 fields in line 4, saw 5"
+    err = ParserError
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        err = ValueError
+    with pytest.raises(err, match=msg):
+        parser.read_csv(StringIO(data), header=1, comment="#")
+
+
+@pytest.mark.parametrize("nrows", [5, 3, None])
+def test_malformed_chunks(all_parsers, nrows):
+    data = """ignore
+A,B,C
+skip
+1,2,3
+3,5,10 # comment
+1,2,3,4,5
+2,3,4
+"""
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The 'iterator' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                header=1,
+                comment="#",
+                iterator=True,
+                chunksize=1,
+                skiprows=[2],
+            )
+        return
+
+    msg = "Expected 3 fields in line 6, saw 5"
+    with parser.read_csv(
+        StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2]
+    ) as reader:
+        with pytest.raises(ParserError, match=msg):
+            reader.read(nrows)
+
+
+@xfail_pyarrow  # does not raise
+def test_catch_too_many_names(all_parsers):
+    # see gh-5156
+    data = """\
+1,2,3
+4,,6
+7,8,9
+10,11,12\n"""
+    parser = all_parsers
+    msg = (
+        "Too many columns specified: expected 4 and found 3"
+        if parser.engine == "c"
+        else "Number of passed names did not match "
+        "number of header fields in the file"
+    )
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
+def test_raise_on_no_columns(all_parsers, nrows):
+    parser = all_parsers
+    data = "\n" * nrows
+
+    msg = "No columns to parse from file"
+    with pytest.raises(EmptyDataError, match=msg):
+        parser.read_csv(StringIO(data))
+
+
+def test_unexpected_keyword_parameter_exception(all_parsers):
+    # GH-34976
+    parser = all_parsers
+
+    msg = "{}\\(\\) got an unexpected keyword argument 'foo'"
+    with pytest.raises(TypeError, match=msg.format("read_csv")):
+        parser.read_csv("foo.csv", foo=1)
+    with pytest.raises(TypeError, match=msg.format("read_table")):
+        parser.read_table("foo.tsv", foo=1)
+
+
+def test_suppress_error_output(all_parsers):
+    # see gh-15925
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    expected = DataFrame({"a": [1, 4]})
+
+    result = parser.read_csv(StringIO(data), on_bad_lines="skip")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_error_bad_lines(all_parsers):
+    # see gh-15925
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+
+    msg = "Expected 1 fields in line 3, saw 3"
+
+    if parser.engine == "pyarrow":
+        # "CSV parse error: Expected 1 columns, got 3: 1,2,3"
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), on_bad_lines="error")
+
+
+def test_warn_bad_lines(all_parsers):
+    # see gh-15925
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    expected = DataFrame({"a": [1, 4]})
+    match_msg = "Skipping line"
+
+    expected_warning = ParserWarning
+    if parser.engine == "pyarrow":
+        match_msg = "Expected 1 columns, but found 3: 1,2,3"
+        expected_warning = (ParserWarning, DeprecationWarning)
+
+    with tm.assert_produces_warning(
+        expected_warning, match=match_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data), on_bad_lines="warn")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_wrong_num_columns(all_parsers):
+    # Too few columns.
+    data = """A,B,C,D,E,F
+1,2,3,4,5,6
+6,7,8,9,10,11,12
+11,12,13,14,15,16
+"""
+    parser = all_parsers
+    msg = "Expected 6 fields in line 3, saw 7"
+
+    if parser.engine == "pyarrow":
+        # Expected 6 columns, got 7: 6,7,8,9,10,11,12
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data))
+
+
+def test_null_byte_char(request, all_parsers):
+    # see gh-2741
+    data = "\x00,foo"
+    names = ["a", "b"]
+    parser = all_parsers
+
+    if parser.engine == "c" or (parser.engine == "python" and PY311):
+        if parser.engine == "python" and PY311:
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason="In Python 3.11, this is read as an empty character not null"
+                )
+            )
+        expected = DataFrame([[np.nan, "foo"]], columns=names)
+        out = parser.read_csv(StringIO(data), names=names)
+        tm.assert_frame_equal(out, expected)
+    else:
+        if parser.engine == "pyarrow":
+            # CSV parse error: Empty CSV file or block: "
+            # cannot infer number of columns"
+            pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+        else:
+            msg = "NULL byte detected"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), names=names)
+
+
+@pytest.mark.filterwarnings("always::ResourceWarning")
+def test_open_file(request, all_parsers):
+    # GH 39024
+    parser = all_parsers
+
+    msg = "Could not determine delimiter"
+    err = csv.Error
+    if parser.engine == "c":
+        msg = "the 'c' engine does not support sep=None with delim_whitespace=False"
+        err = ValueError
+    elif parser.engine == "pyarrow":
+        msg = (
+            "the 'pyarrow' engine does not support sep=None with delim_whitespace=False"
+        )
+        err = ValueError
+
+    with tm.ensure_clean() as path:
+        file = Path(path)
+        file.write_bytes(b"\xe4\na\n1")
+
+        with tm.assert_produces_warning(None):
+            # should not trigger a ResourceWarning
+            with pytest.raises(err, match=msg):
+                parser.read_csv(file, sep=None, encoding_errors="replace")
+
+
+def test_invalid_on_bad_line(all_parsers):
+    parser = all_parsers
+    data = "a\n1\n1,2,3\n4\n5,6,7"
+    with pytest.raises(ValueError, match="Argument abc is invalid for on_bad_lines"):
+        parser.read_csv(StringIO(data), on_bad_lines="abc")
+
+
+def test_bad_header_uniform_error(all_parsers):
+    parser = all_parsers
+    data = "+++123456789...\ncol1,col2,col3,col4\n1,2,3,4\n"
+    msg = "Expected 2 fields in line 2, saw 4"
+    if parser.engine == "c":
+        msg = (
+            "Could not construct index. Requested to use 1 "
+            "number of columns, but 3 left to parse."
+        )
+    elif parser.engine == "pyarrow":
+        # "CSV parse error: Expected 1 columns, got 4: col1,col2,col3,col4"
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), index_col=0, on_bad_lines="error")
+
+
+def test_on_bad_lines_warn_correct_formatting(all_parsers):
+    # see gh-15925
+    parser = all_parsers
+    data = """1,2
+a,b
+a,b,c
+a,b,d
+a,b
+"""
+    expected = DataFrame({"1": "a", "2": ["b"] * 2})
+    match_msg = "Skipping line"
+
+    expected_warning = ParserWarning
+    if parser.engine == "pyarrow":
+        match_msg = "Expected 2 columns, but found 3: a,b,c"
+        expected_warning = (ParserWarning, DeprecationWarning)
+
+    with tm.assert_produces_warning(
+        expected_warning, match=match_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data), on_bad_lines="warn")
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_verbose.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/common/test_verbose.py
@ -0,0 +1,81 @@
+"""
+Tests that work on both the Python and C engines but do not have a
+specific classification into the other test modules.
+"""
+from io import StringIO
+
+import pytest
+
+import pandas._testing as tm
+
+depr_msg = "The 'verbose' keyword in pd.read_csv is deprecated"
+
+
+def test_verbose_read(all_parsers, capsys):
+    parser = all_parsers
+    data = """a,b,c,d
+one,1,2,3
+one,1,2,3
+,1,2,3
+one,1,2,3
+,1,2,3
+,1,2,3
+one,1,2,3
+two,1,2,3"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'verbose' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(StringIO(data), verbose=True)
+        return
+
+    # Engines are verbose in different ways.
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        parser.read_csv(StringIO(data), verbose=True)
+    captured = capsys.readouterr()
+
+    if parser.engine == "c":
+        assert "Tokenization took:" in captured.out
+        assert "Parser memory cleanup took:" in captured.out
+    else:  # Python engine
+        assert captured.out == "Filled 3 NA values in column a\n"
+
+
+def test_verbose_read2(all_parsers, capsys):
+    parser = all_parsers
+    data = """a,b,c,d
+one,1,2,3
+two,1,2,3
+three,1,2,3
+four,1,2,3
+five,1,2,3
+,1,2,3
+seven,1,2,3
+eight,1,2,3"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'verbose' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(StringIO(data), verbose=True, index_col=0)
+        return
+
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        parser.read_csv(StringIO(data), verbose=True, index_col=0)
+    captured = capsys.readouterr()
+
+    # Engines are verbose in different ways.
+    if parser.engine == "c":
+        assert "Tokenization took:" in captured.out
+        assert "Parser memory cleanup took:" in captured.out
+    else:  # Python engine
+        assert captured.out == "Filled 1 NA values in column a\n"
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/conftest.py
@ -0,0 +1,337 @@
+from __future__ import annotations
+
+import os
+
+import pytest
+
+from pandas.compat import HAS_PYARROW
+from pandas.compat._optional import VERSIONS
+
+from pandas import (
+    read_csv,
+    read_table,
+)
+import pandas._testing as tm
+
+
+class BaseParser:
+    engine: str | None = None
+    low_memory = True
+    float_precision_choices: list[str | None] = []
+
+    def update_kwargs(self, kwargs):
+        kwargs = kwargs.copy()
+        kwargs.update({"engine": self.engine, "low_memory": self.low_memory})
+
+        return kwargs
+
+    def read_csv(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_csv(*args, **kwargs)
+
+    def read_csv_check_warnings(
+        self,
+        warn_type: type[Warning],
+        warn_msg: str,
+        *args,
+        raise_on_extra_warnings=True,
+        check_stacklevel: bool = True,
+        **kwargs,
+    ):
+        # We need to check the stacklevel here instead of in the tests
+        # since this is where read_csv is called and where the warning
+        # should point to.
+        kwargs = self.update_kwargs(kwargs)
+        with tm.assert_produces_warning(
+            warn_type,
+            match=warn_msg,
+            raise_on_extra_warnings=raise_on_extra_warnings,
+            check_stacklevel=check_stacklevel,
+        ):
+            return read_csv(*args, **kwargs)
+
+    def read_table(self, *args, **kwargs):
+        kwargs = self.update_kwargs(kwargs)
+        return read_table(*args, **kwargs)
+
+    def read_table_check_warnings(
+        self,
+        warn_type: type[Warning],
+        warn_msg: str,
+        *args,
+        raise_on_extra_warnings=True,
+        **kwargs,
+    ):
+        # We need to check the stacklevel here instead of in the tests
+        # since this is where read_table is called and where the warning
+        # should point to.
+        kwargs = self.update_kwargs(kwargs)
+        with tm.assert_produces_warning(
+            warn_type, match=warn_msg, raise_on_extra_warnings=raise_on_extra_warnings
+        ):
+            return read_table(*args, **kwargs)
+
+
+class CParser(BaseParser):
+    engine = "c"
+    float_precision_choices = [None, "high", "round_trip"]
+
+
+class CParserHighMemory(CParser):
+    low_memory = False
+
+
+class CParserLowMemory(CParser):
+    low_memory = True
+
+
+class PythonParser(BaseParser):
+    engine = "python"
+    float_precision_choices = [None]
+
+
+class PyArrowParser(BaseParser):
+    engine = "pyarrow"
+    float_precision_choices = [None]
+
+
+@pytest.fixture
+def csv_dir_path(datapath):
+    """
+    The directory path to the data files needed for parser tests.
+    """
+    return datapath("io", "parser", "data")
+
+
+@pytest.fixture
+def csv1(datapath):
+    """
+    The path to the data file "test1.csv" needed for parser tests.
+    """
+    return os.path.join(datapath("io", "data", "csv"), "test1.csv")
+
+
+_cParserHighMemory = CParserHighMemory
+_cParserLowMemory = CParserLowMemory
+_pythonParser = PythonParser
+_pyarrowParser = PyArrowParser
+
+_py_parsers_only = [_pythonParser]
+_c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
+_pyarrow_parsers_only = [
+    pytest.param(
+        _pyarrowParser,
+        marks=[
+            pytest.mark.single_cpu,
+            pytest.mark.skipif(not HAS_PYARROW, reason="pyarrow is not installed"),
+        ],
+    )
+]
+
+_all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
+
+_py_parser_ids = ["python"]
+_c_parser_ids = ["c_high", "c_low"]
+_pyarrow_parsers_ids = ["pyarrow"]
+
+_all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids]
+
+
+@pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
+def all_parsers(request):
+    """
+    Fixture all of the CSV parsers.
+    """
+    parser = request.param()
+    if parser.engine == "pyarrow":
+        pytest.importorskip("pyarrow", VERSIONS["pyarrow"])
+        # Try finding a way to disable threads all together
+        # for more stable CI runs
+        import pyarrow
+
+        pyarrow.set_cpu_count(1)
+    return parser
+
+
+@pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids)
+def c_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the C engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids)
+def python_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Python engine.
+    """
+    return request.param()
+
+
+@pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids)
+def pyarrow_parser_only(request):
+    """
+    Fixture all of the CSV parsers using the Pyarrow engine.
+    """
+    return request.param()
+
+
+def _get_all_parser_float_precision_combinations():
+    """
+    Return all allowable parser and float precision
+    combinations and corresponding ids.
+    """
+    params = []
+    ids = []
+    for parser, parser_id in zip(_all_parsers, _all_parser_ids):
+        if hasattr(parser, "values"):
+            # Wrapped in pytest.param, get the actual parser back
+            parser = parser.values[0]
+        for precision in parser.float_precision_choices:
+            # Re-wrap in pytest.param for pyarrow
+            mark = (
+                [
+                    pytest.mark.single_cpu,
+                    pytest.mark.skipif(
+                        not HAS_PYARROW, reason="pyarrow is not installed"
+                    ),
+                ]
+                if parser.engine == "pyarrow"
+                else ()
+            )
+            param = pytest.param((parser(), precision), marks=mark)
+            params.append(param)
+            ids.append(f"{parser_id}-{precision}")
+
+    return {"params": params, "ids": ids}
+
+
+@pytest.fixture(
+    params=_get_all_parser_float_precision_combinations()["params"],
+    ids=_get_all_parser_float_precision_combinations()["ids"],
+)
+def all_parsers_all_precisions(request):
+    """
+    Fixture for all allowable combinations of parser
+    and float precision
+    """
+    return request.param
+
+
+_utf_values = [8, 16, 32]
+
+_encoding_seps = ["", "-", "_"]
+_encoding_prefixes = ["utf", "UTF"]
+
+_encoding_fmts = [
+    f"{prefix}{sep}{{0}}" for sep in _encoding_seps for prefix in _encoding_prefixes
+]
+
+
+@pytest.fixture(params=_utf_values)
+def utf_value(request):
+    """
+    Fixture for all possible integer values for a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(params=_encoding_fmts)
+def encoding_fmt(request):
+    """
+    Fixture for all possible string formats of a UTF encoding.
+    """
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        ("-1,0", -1.0),
+        ("-1,2e0", -1.2),
+        ("-1e0", -1.0),
+        ("+1e0", 1.0),
+        ("+1e+0", 1.0),
+        ("+1e-1", 0.1),
+        ("+,1e1", 1.0),
+        ("+1,e0", 1.0),
+        ("-,1e1", -1.0),
+        ("-1,e0", -1.0),
+        ("0,1", 0.1),
+        ("1,", 1.0),
+        (",1", 0.1),
+        ("-,1", -0.1),
+        ("1_,", 1.0),
+        ("1_234,56", 1234.56),
+        ("1_234,56e0", 1234.56),
+        # negative cases; must not parse as float
+        ("_", "_"),
+        ("-_", "-_"),
+        ("-_1", "-_1"),
+        ("-_1e0", "-_1e0"),
+        ("_1", "_1"),
+        ("_1,", "_1,"),
+        ("_1,_", "_1,_"),
+        ("_1e0", "_1e0"),
+        ("1,2e_1", "1,2e_1"),
+        ("1,2e1_0", "1,2e1_0"),
+        ("1,_2", "1,_2"),
+        (",1__2", ",1__2"),
+        (",1e", ",1e"),
+        ("-,1e", "-,1e"),
+        ("1_000,000_000", "1_000,000_000"),
+        ("1,e1_2", "1,e1_2"),
+        ("e11,2", "e11,2"),
+        ("1e11,2", "1e11,2"),
+        ("1,2,2", "1,2,2"),
+        ("1,2_1", "1,2_1"),
+        ("1,2e-10e1", "1,2e-10e1"),
+        ("--1,2", "--1,2"),
+        ("1a_2,1", "1a_2,1"),
+        ("1,2E-1", 0.12),
+        ("1,2E1", 12.0),
+    ]
+)
+def numeric_decimal(request):
+    """
+    Fixture for all numeric formats which should get recognized. The first entry
+    represents the value to read while the second represents the expected result.
+    """
+    return request.param
+
+
+@pytest.fixture
+def pyarrow_xfail(request):
+    """
+    Fixture that xfails a test if the engine is pyarrow.
+
+    Use if failure is do to unsupported keywords or inconsistent results.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+        request.applymarker(mark)
+
+
+@pytest.fixture
+def pyarrow_skip(request):
+    """
+    Fixture that skips a test if the engine is pyarrow.
+
+    Use if failure is do a parsing failure from pyarrow.csv.read_csv
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_categorical.py
@ -0,0 +1,334 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+import os
+
+import numpy as np
+import pytest
+
+from pandas._libs import parsers as libparsers
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Timestamp,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "category",
+        CategoricalDtype(),
+        {"a": "category", "b": "category", "c": CategoricalDtype()},
+    ],
+)
+def test_categorical_dtype(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["a", "a", "b"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
+def test_categorical_dtype_single(all_parsers, dtype, request):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
+    )
+    if parser.engine == "pyarrow":
+        mark = pytest.mark.xfail(
+            strict=False,
+            reason="Flaky test sometimes gives object dtype instead of Categorical",
+        )
+        request.applymarker(mark)
+
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
+def test_categorical_dtype_unsorted(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", "b", "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
+def test_categorical_dtype_missing(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", np.nan, "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
+@pytest.mark.slow
+def test_categorical_dtype_high_cardinality_numeric(all_parsers, monkeypatch):
+    # see gh-18186
+    # was an issue with C parser, due to DEFAULT_BUFFER_HEURISTIC
+    parser = all_parsers
+    heuristic = 2**5
+    data = np.sort([str(i) for i in range(heuristic + 1)])
+    expected = DataFrame({"a": Categorical(data, ordered=True)})
+    with monkeypatch.context() as m:
+        m.setattr(libparsers, "DEFAULT_BUFFER_HEURISTIC", heuristic)
+        actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
+    actual["a"] = actual["a"].cat.reorder_categories(
+        np.sort(actual.a.cat.categories), ordered=True
+    )
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    encoding = "utf-16"
+    sep = "\t"
+
+    expected = parser.read_csv(pth, sep=sep, encoding=encoding)
+    expected = expected.apply(Categorical)
+
+    actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_infer_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
+        DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
+    ]
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), dtype={"b": "category"}, chunksize=2)
+        return
+
+    with parser.read_csv(
+        StringIO(data), dtype={"b": "category"}, chunksize=2
+    ) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    cats = ["a", "b", "c"]
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
+        DataFrame(
+            {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)},
+            index=[2, 3],
+        ),
+    ]
+    dtype = CategoricalDtype(cats)
+
+    if parser.engine == "pyarrow":
+        msg = "The 'chunksize' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)
+        return
+
+    with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_latin1(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+    encoding = "latin-1"
+
+    expected = parser.read_csv(pth, header=None, encoding=encoding)
+    expected[1] = Categorical(expected[1])
+
+    actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"})
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("ordered", [False, True])
+@pytest.mark.parametrize(
+    "categories",
+    [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
+)
+def test_categorical_category_dtype(all_parsers, categories, ordered):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(
+                ["a", "b", "b", "c"], categories=categories, ordered=ordered
+            ),
+        }
+    )
+
+    dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_category_dtype_unsorted(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    dtype = CategoricalDtype(["c", "b", "a"])
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
+        }
+    )
+
+    result = parser.read_csv(StringIO(data), dtype={"b": dtype})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_numeric(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([1, 2, 3])}
+
+    data = "b\n1\n1\n2\n3"
+    expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_datetime(all_parsers):
+    parser = all_parsers
+    dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
+    dtype = {"b": CategoricalDtype(dti)}
+
+    data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timestamp(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([Timestamp("2014")])}
+
+    data = "b\n2014-01-01\n2014-01-01"
+    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timedelta(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(pd.to_timedelta(["1h", "2h", "3h"]))}
+
+    data = "b\n1h\n2h\n3h"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        "b\nTrue\nFalse\nNA\nFalse",
+        "b\ntrue\nfalse\nNA\nfalse",
+        "b\nTRUE\nFALSE\nNA\nFALSE",
+        "b\nTrue\nFalse\nNA\nFALSE",
+    ],
+)
+def test_categorical_dtype_coerces_boolean(all_parsers, data):
+    # see gh-20498
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([False, True])}
+    expected = DataFrame({"b": Categorical([True, False, None, False])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_unexpected_categories(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
+
+    data = "b\nd\na\nc\nd"  # Unexpected c
+    expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@ -0,0 +1,644 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from collections import defaultdict
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserWarning
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Timestamp,
+)
+import pandas._testing as tm
+from pandas.core.arrays import IntegerArray
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+@pytest.mark.parametrize("dtype", [str, object])
+@pytest.mark.parametrize("check_orig", [True, False])
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
+    # see gh-3795, gh-6607
+    parser = all_parsers
+
+    df = DataFrame(
+        np.random.default_rng(2).random((5, 2)).round(4),
+        columns=list("AB"),
+        index=["1A", "1B", "1C", "1D", "1E"],
+    )
+
+    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, dtype=dtype, index_col=0)
+
+        if check_orig:
+            expected = df.copy()
+            result = result.astype(float)
+        elif using_infer_string and dtype is str:
+            expected = df.astype(str)
+        else:
+            expected = df.astype(str).astype(object)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+    expected = DataFrame(
+        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
+    )
+    expected["one"] = expected["one"].astype(np.float64)
+
+    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
+        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+
+def test_raise_on_passed_int_dtype_with_nas(all_parsers):
+    # see gh-2631
+    parser = all_parsers
+    data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+
+    if parser.engine == "c":
+        msg = "Integer column has NA values"
+    elif parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+    else:
+        msg = "Unable to convert column DOY"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
+
+
+def test_dtype_with_converters(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+            )
+        return
+
+    # Dtype spec ignored if converted specified.
+    result = parser.read_csv_check_warnings(
+        ParserWarning,
+        "Both a converter and dtype were specified for column a "
+        "- only the converter will be used.",
+        StringIO(data),
+        dtype={"a": "i8"},
+        converters={"a": lambda x: str(x)},
+    )
+    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
+)
+def test_numeric_dtype(all_parsers, dtype):
+    data = "0\n1"
+    parser = all_parsers
+    expected = DataFrame([0, 1], dtype=dtype)
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_boolean_dtype(all_parsers):
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "a",
+            "True",
+            "TRUE",
+            "true",
+            "1",
+            "1.0",
+            "False",
+            "FALSE",
+            "false",
+            "0",
+            "0.0",
+            "NaN",
+            "nan",
+            "NA",
+            "null",
+            "NULL",
+        ]
+    )
+
+    result = parser.read_csv(StringIO(data), dtype="boolean")
+    expected = DataFrame(
+        {
+            "a": pd.array(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                ],
+                dtype="boolean",
+            )
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_delimiter_with_usecols_and_parse_dates(all_parsers):
+    # GH#35873
+    result = all_parsers.read_csv(
+        StringIO('"dump","-9,1","-9,1",20101010'),
+        engine="python",
+        names=["col", "col1", "col2", "col3"],
+        usecols=["col1", "col2", "col3"],
+        parse_dates=["col3"],
+        decimal=",",
+    )
+    expected = DataFrame(
+        {"col1": [-9.1], "col2": [-9.1], "col3": [Timestamp("2010-10-10")]}
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("thousands", ["_", None])
+def test_decimal_and_exponential(
+    request, python_parser_only, numeric_decimal, thousands
+):
+    # GH#31920
+    decimal_number_check(request, python_parser_only, numeric_decimal, thousands, None)
+
+
+@pytest.mark.parametrize("thousands", ["_", None])
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+def test_1000_sep_decimal_float_precision(
+    request, c_parser_only, numeric_decimal, float_precision, thousands
+):
+    # test decimal and thousand sep handling in across 'float_precision'
+    # parsers
+    decimal_number_check(
+        request, c_parser_only, numeric_decimal, thousands, float_precision
+    )
+    text, value = numeric_decimal
+    text = " " + text + " "
+    if isinstance(value, str):  # the negative cases (parse as text)
+        value = " " + value + " "
+    decimal_number_check(
+        request, c_parser_only, (text, value), thousands, float_precision
+    )
+
+
+def decimal_number_check(request, parser, numeric_decimal, thousands, float_precision):
+    # GH#31920
+    value = numeric_decimal[0]
+    if thousands is None and value in ("1_,", "1_234,56", "1_234,56e0"):
+        request.applymarker(
+            pytest.mark.xfail(reason=f"thousands={thousands} and sep is in {value}")
+        )
+    df = parser.read_csv(
+        StringIO(value),
+        float_precision=float_precision,
+        sep="|",
+        thousands=thousands,
+        decimal=",",
+        header=None,
+    )
+    val = df.iloc[0, 0]
+    assert val == numeric_decimal[1]
+
+
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+def test_skip_whitespace(c_parser_only, float_precision):
+    DATA = """id\tnum\t
+1\t1.2 \t
+1\t 2.1\t
+2\t 1\t
+2\t 1.2 \t
+"""
+    df = c_parser_only.read_csv(
+        StringIO(DATA),
+        float_precision=float_precision,
+        sep="\t",
+        header=0,
+        dtype={1: np.float64},
+    )
+    tm.assert_series_equal(df.iloc[:, 1], pd.Series([1.2, 2.1, 1.0, 1.2], name="num"))
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_true_values_cast_to_bool(all_parsers):
+    # GH#34655
+    text = """a,b
+yes,xxx
+no,yyy
+1,zzz
+0,aaa
+    """
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(text),
+        true_values=["yes"],
+        false_values=["no"],
+        dtype={"a": "boolean"},
+    )
+    expected = DataFrame(
+        {"a": [True, False, True, False], "b": ["xxx", "yyy", "zzz", "aaa"]}
+    )
+    expected["a"] = expected["a"].astype("boolean")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+@pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
+def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
+    # GH#35211
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    dtype_dict = {"a": str, **dtypes}
+    # GH#42462
+    dtype_dict_copy = dtype_dict.copy()
+    result = parser.read_csv(StringIO(data), dtype=dtype_dict)
+    expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
+    assert dtype_dict == dtype_dict_copy, "dtype dict changed"
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
+    # GH#42022
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    result = parser.read_csv(StringIO(data), dtype=str)
+    expected = DataFrame({"a": ["1"], "a.1": ["1"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtype_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        dtype={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nullable_int_dtype(all_parsers, any_int_ea_dtype):
+    # GH 25472
+    parser = all_parsers
+    dtype = any_int_ea_dtype
+
+    data = """a,b,c
+,3,5
+1,,6
+2,4,"""
+    expected = DataFrame(
+        {
+            "a": pd.array([pd.NA, 1, 2], dtype=dtype),
+            "b": pd.array([3, pd.NA, 4], dtype=dtype),
+            "c": pd.array([5, 6, pd.NA], dtype=dtype),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+@pytest.mark.parametrize("default", ["float", "float64"])
+def test_dtypes_defaultdict(all_parsers, default):
+    # GH#41574
+    data = """a,b
+1,2
+"""
+    dtype = defaultdict(lambda: default, a="int64")
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    expected = DataFrame({"a": [1], "b": 2.0})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtypes_defaultdict_mangle_dup_cols(all_parsers):
+    # GH#41574
+    data = """a,b,a,b,b.1
+1,2,3,4,5
+"""
+    dtype = defaultdict(lambda: "float64", a="int64")
+    dtype["b.1"] = "int64"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    expected = DataFrame({"a": [1], "b": [2.0], "a.1": [3], "b.2": [4.0], "b.1": [5]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_dtypes_defaultdict_invalid(all_parsers):
+    # GH#41574
+    data = """a,b
+1,2
+"""
+    dtype = defaultdict(lambda: "invalid_dtype", a="int64")
+    parser = all_parsers
+    with pytest.raises(TypeError, match="not understood"):
+        parser.read_csv(StringIO(data), dtype=dtype)
+
+
+def test_dtype_backend(all_parsers):
+    # GH#36712
+
+    parser = all_parsers
+
+    data = """a,b,c,d,e,f,g,h,i,j
+1,2.5,True,a,,,,,12-31-2019,
+3,4.5,False,b,6,7.5,True,a,12-31-2019,
+"""
+    result = parser.read_csv(
+        StringIO(data), dtype_backend="numpy_nullable", parse_dates=["i"]
+    )
+    expected = DataFrame(
+        {
+            "a": pd.Series([1, 3], dtype="Int64"),
+            "b": pd.Series([2.5, 4.5], dtype="Float64"),
+            "c": pd.Series([True, False], dtype="boolean"),
+            "d": pd.Series(["a", "b"], dtype="string"),
+            "e": pd.Series([pd.NA, 6], dtype="Int64"),
+            "f": pd.Series([pd.NA, 7.5], dtype="Float64"),
+            "g": pd.Series([pd.NA, True], dtype="boolean"),
+            "h": pd.Series([pd.NA, "a"], dtype="string"),
+            "i": pd.Series([Timestamp("2019-12-31")] * 2),
+            "j": pd.Series([pd.NA, pd.NA], dtype="Int64"),
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_backend_and_dtype(all_parsers):
+    # GH#36712
+
+    parser = all_parsers
+
+    data = """a,b
+1,2.5
+,
+"""
+    result = parser.read_csv(
+        StringIO(data), dtype_backend="numpy_nullable", dtype="float64"
+    )
+    expected = DataFrame({"a": [1.0, np.nan], "b": [2.5, np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_backend_string(all_parsers, string_storage):
+    # GH#36712
+    with pd.option_context("mode.string_storage", string_storage):
+        parser = all_parsers
+
+        data = """a,b
+a,x
+b,
+"""
+        result = parser.read_csv(StringIO(data), dtype_backend="numpy_nullable")
+
+        expected = DataFrame(
+            {
+                "a": pd.array(["a", "b"], dtype=pd.StringDtype(string_storage)),
+                "b": pd.array(["x", pd.NA], dtype=pd.StringDtype(string_storage)),
+            },
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_backend_ea_dtype_specified(all_parsers):
+    # GH#491496
+    data = """a,b
+1,2
+"""
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), dtype="Int64", dtype_backend="numpy_nullable"
+    )
+    expected = DataFrame({"a": [1], "b": 2}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_backend_pyarrow(all_parsers, request):
+    # GH#36712
+    pa = pytest.importorskip("pyarrow")
+    parser = all_parsers
+
+    data = """a,b,c,d,e,f,g,h,i,j
+1,2.5,True,a,,,,,12-31-2019,
+3,4.5,False,b,6,7.5,True,a,12-31-2019,
+"""
+    result = parser.read_csv(StringIO(data), dtype_backend="pyarrow", parse_dates=["i"])
+    expected = DataFrame(
+        {
+            "a": pd.Series([1, 3], dtype="int64[pyarrow]"),
+            "b": pd.Series([2.5, 4.5], dtype="float64[pyarrow]"),
+            "c": pd.Series([True, False], dtype="bool[pyarrow]"),
+            "d": pd.Series(["a", "b"], dtype=pd.ArrowDtype(pa.string())),
+            "e": pd.Series([pd.NA, 6], dtype="int64[pyarrow]"),
+            "f": pd.Series([pd.NA, 7.5], dtype="float64[pyarrow]"),
+            "g": pd.Series([pd.NA, True], dtype="bool[pyarrow]"),
+            "h": pd.Series(
+                [pd.NA, "a"],
+                dtype=pd.ArrowDtype(pa.string()),
+            ),
+            "i": pd.Series([Timestamp("2019-12-31")] * 2),
+            "j": pd.Series([pd.NA, pd.NA], dtype="null[pyarrow]"),
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+# pyarrow engine failing:
+# https://github.com/pandas-dev/pandas/issues/56136
+@pytest.mark.usefixtures("pyarrow_xfail")
+def test_ea_int_avoid_overflow(all_parsers):
+    # GH#32134
+    parser = all_parsers
+    data = """a,b
+1,1
+,1
+1582218195625938945,1
+"""
+    result = parser.read_csv(StringIO(data), dtype={"a": "Int64"})
+    expected = DataFrame(
+        {
+            "a": IntegerArray(
+                np.array([1, 1, 1582218195625938945]), np.array([False, True, False])
+            ),
+            "b": 1,
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_string_inference(all_parsers):
+    # GH#54430
+    dtype = pd.StringDtype(na_value=np.nan)
+
+    data = """a,b
+x,1
+y,2
+,3"""
+    parser = all_parsers
+    with pd.option_context("future.infer_string", True):
+        result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        {"a": pd.Series(["x", "y", None], dtype=dtype), "b": [1, 2, 3]},
+        columns=pd.Index(["a", "b"], dtype=dtype),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["O", object, "object", np.object_, str, np.str_])
+def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
+    # GH#56047
+    data = """a,b
+x,a
+y,a
+z,a"""
+    parser = all_parsers
+    with pd.option_context("future.infer_string", True):
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+
+    expected_dtype = pd.StringDtype(na_value=np.nan) if dtype is str else object
+    expected = DataFrame(
+        {
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
+            "b": pd.Series(["a", "a", "a"], dtype=expected_dtype),
+        },
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
+    )
+    tm.assert_frame_equal(result, expected)
+
+    with pd.option_context("future.infer_string", True):
+        result = parser.read_csv(StringIO(data), dtype={"a": dtype})
+
+    expected = DataFrame(
+        {
+            "a": pd.Series(["x", "y", "z"], dtype=expected_dtype),
+            "b": pd.Series(["a", "a", "a"], dtype=pd.StringDtype(na_value=np.nan)),
+        },
+        columns=pd.Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_accurate_parsing_of_large_integers(all_parsers):
+    # GH#52505
+    data = """SYMBOL,MOMENT,ID,ID_DEAL
+AAPL,20230301181139587,1925036343869802844,
+AAPL,20230301181139587,2023552585717889863,2023552585717263358
+NVDA,20230301181139587,2023552585717889863,2023552585717263359
+AMC,20230301181139587,2023552585717889863,2023552585717263360
+AMZN,20230301181139587,2023552585717889759,2023552585717263360
+MSFT,20230301181139587,2023552585717889863,2023552585717263361
+NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
+    orders = all_parsers.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()})
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263358, "ID_DEAL"]) == 1
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263361, "ID_DEAL"]) == 2
+
+
+def test_dtypes_with_usecols(all_parsers):
+    # GH#54868
+
+    parser = all_parsers
+    data = """a,b,c
+1,2,3
+4,5,6"""
+
+    result = parser.read_csv(StringIO(data), usecols=["a", "c"], dtype={"a": object})
+    if parser.engine == "pyarrow":
+        values = [1, 4]
+    else:
+        values = ["1", "4"]
+    expected = DataFrame({"a": pd.Series(values, dtype=object), "c": [3, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_with_dtype_no_rangeindex(all_parsers):
+    data = StringIO("345.5,519.5,0\n519.5,726.5,1")
+    result = all_parsers.read_csv(
+        data,
+        header=None,
+        names=["start", "stop", "bin_id"],
+        dtype={"start": np.float32, "stop": np.float32, "bin_id": np.uint32},
+        index_col="bin_id",
+    ).index
+    expected = pd.Index([0, 1], dtype=np.uint32, name="bin_id")
+    tm.assert_index_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_empty.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/dtypes/test_empty.py
@ -0,0 +1,181 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    concat,
+)
+import pandas._testing as tm
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_dtype_all_columns_empty(all_parsers):
+    # see gh-12048
+    parser = all_parsers
+    result = parser.read_csv(StringIO("A,B"), dtype=str)
+
+    expected = DataFrame({"A": [], "B": []}, dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
+    )
+
+    expected = DataFrame(
+        {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_multi_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two,three"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
+    )
+
+    exp_idx = MultiIndex.from_arrays(
+        [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)],
+        names=["one", "two"],
+    )
+    expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+    expected.index = expected.index.astype(object)
+
+    with pytest.raises(ValueError, match="Duplicate names"):
+        data = ""
+        parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"})
+
+
+@pytest.mark.parametrize(
+    "dtype,expected",
+    [
+        (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
+        (
+            "category",
+            DataFrame({"a": Categorical([]), "b": Categorical([])}),
+        ),
+        (
+            {"a": "category", "b": "category"},
+            DataFrame({"a": Categorical([]), "b": Categorical([])}),
+        ),
+        ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
+        (
+            "timedelta64[ns]",
+            DataFrame(
+                {
+                    "a": Series([], dtype="timedelta64[ns]"),
+                    "b": Series([], dtype="timedelta64[ns]"),
+                },
+            ),
+        ),
+        (
+            {"a": np.int64, "b": np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+            ),
+        ),
+        (
+            {0: np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+            ),
+        ),
+        (
+            {"a": np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+            ),
+        ),
+    ],
+)
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_dtype(all_parsers, dtype, expected):
+    # see gh-14712
+    parser = all_parsers
+    data = "a,b"
+
+    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_c_parser_only.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_c_parser_only.py
@ -0,0 +1,647 @@
+"""
+Tests that apply specifically to the CParser. Unless specifically stated
+as a CParser-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the Python parser can accept
+further arguments when parsing.
+"""
+from decimal import Decimal
+from io import (
+    BytesIO,
+    StringIO,
+    TextIOWrapper,
+)
+import mmap
+import os
+import tarfile
+
+import numpy as np
+import pytest
+
+from pandas.compat.numpy import np_version_gte1p24
+from pandas.errors import (
+    ParserError,
+    ParserWarning,
+)
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    concat,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "malformed",
+    ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"],
+    ids=["words pointer", "stream pointer", "lines pointer"],
+)
+def test_buffer_overflow(c_parser_only, malformed):
+    # see gh-9205: test certain malformed input files that cause
+    # buffer overflows in tokenizer.c
+    msg = "Buffer overflow caught - possible malformed input file."
+    parser = c_parser_only
+
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(malformed))
+
+
+def test_delim_whitespace_custom_terminator(c_parser_only):
+    # See gh-12912
+    data = "a b c~1 2 3~4 5 6~7 8 9"
+    parser = c_parser_only
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_dtype_and_names_error(c_parser_only):
+    # see gh-8833: passing both dtype and names
+    # resulting in an error reporting issue
+    parser = c_parser_only
+    data = """
+1.0 1
+2.0 2
+3.0 3
+"""
+    # base cases
+    result = parser.read_csv(StringIO(data), sep=r"\s+", header=None)
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]])
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+", header=None, names=["a", "b"])
+    expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # fallback casting
+    result = parser.read_csv(
+        StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32}
+    )
+    expected = DataFrame([[1, 1], [2, 2], [3, 3]], columns=["a", "b"])
+    expected["a"] = expected["a"].astype(np.int32)
+    tm.assert_frame_equal(result, expected)
+
+    data = """
+1.0 1
+nan 2
+3.0 3
+"""
+    # fallback casting, but not castable
+    warning = RuntimeWarning if np_version_gte1p24 else None
+    with pytest.raises(ValueError, match="cannot safely convert"):
+        with tm.assert_produces_warning(warning, check_stacklevel=False):
+            parser.read_csv(
+                StringIO(data),
+                sep=r"\s+",
+                header=None,
+                names=["a", "b"],
+                dtype={"a": np.int32},
+            )
+
+
+@pytest.mark.parametrize(
+    "match,kwargs",
+    [
+        # For each of these cases, all of the dtypes are valid, just unsupported.
+        (
+            (
+                "the dtype datetime64 is not supported for parsing, "
+                "pass this column using parse_dates instead"
+            ),
+            {"dtype": {"A": "datetime64", "B": "float64"}},
+        ),
+        (
+            (
+                "the dtype datetime64 is not supported for parsing, "
+                "pass this column using parse_dates instead"
+            ),
+            {"dtype": {"A": "datetime64", "B": "float64"}, "parse_dates": ["B"]},
+        ),
+        (
+            "the dtype timedelta64 is not supported for parsing",
+            {"dtype": {"A": "timedelta64", "B": "float64"}},
+        ),
+        (
+            f"the dtype {tm.ENDIAN}U8 is not supported for parsing",
+            {"dtype": {"A": "U8"}},
+        ),
+    ],
+    ids=["dt64-0", "dt64-1", "td64", f"{tm.ENDIAN}U8"],
+)
+def test_unsupported_dtype(c_parser_only, match, kwargs):
+    parser = c_parser_only
+    df = DataFrame(
+        np.random.default_rng(2).random((5, 2)),
+        columns=list("AB"),
+        index=["1A", "1B", "1C", "1D", "1E"],
+    )
+
+    with tm.ensure_clean("__unsupported_dtype__.csv") as path:
+        df.to_csv(path)
+
+        with pytest.raises(TypeError, match=match):
+            parser.read_csv(path, index_col=0, **kwargs)
+
+
+@td.skip_if_32bit
+@pytest.mark.slow
+# test numbers between 1 and 2
+@pytest.mark.parametrize("num", np.linspace(1.0, 2.0, num=21))
+def test_precise_conversion(c_parser_only, num):
+    parser = c_parser_only
+
+    normal_errors = []
+    precise_errors = []
+
+    def error(val: float, actual_val: Decimal) -> Decimal:
+        return abs(Decimal(f"{val:.100}") - actual_val)
+
+    # 25 decimal digits of precision
+    text = f"a\n{num:.25}"
+
+    normal_val = float(
+        parser.read_csv(StringIO(text), float_precision="legacy")["a"][0]
+    )
+    precise_val = float(parser.read_csv(StringIO(text), float_precision="high")["a"][0])
+    roundtrip_val = float(
+        parser.read_csv(StringIO(text), float_precision="round_trip")["a"][0]
+    )
+    actual_val = Decimal(text[2:])
+
+    normal_errors.append(error(normal_val, actual_val))
+    precise_errors.append(error(precise_val, actual_val))
+
+    # round-trip should match float()
+    assert roundtrip_val == float(text[2:])
+
+    assert sum(precise_errors) <= sum(normal_errors)
+    assert max(precise_errors) <= max(normal_errors)
+
+
+def test_usecols_dtypes(c_parser_only, using_infer_string):
+    parser = c_parser_only
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+    result = parser.read_csv(
+        StringIO(data),
+        usecols=(0, 1, 2),
+        names=("a", "b", "c"),
+        header=None,
+        converters={"a": str},
+        dtype={"b": int, "c": float},
+    )
+    result2 = parser.read_csv(
+        StringIO(data),
+        usecols=(0, 2),
+        names=("a", "b", "c"),
+        header=None,
+        converters={"a": str},
+        dtype={"b": int, "c": float},
+    )
+
+    if using_infer_string:
+        assert (result.dtypes == ["string", int, float]).all()
+        assert (result2.dtypes == ["string", float]).all()
+    else:
+        assert (result.dtypes == [object, int, float]).all()
+        assert (result2.dtypes == [object, float]).all()
+
+
+def test_disable_bool_parsing(c_parser_only):
+    # see gh-2090
+
+    parser = c_parser_only
+    data = """A,B,C
+Yes,No,Yes
+No,Yes,Yes
+Yes,,Yes
+No,No,No"""
+
+    result = parser.read_csv(StringIO(data), dtype=object)
+    assert (result.dtypes == object).all()
+
+    result = parser.read_csv(StringIO(data), dtype=object, na_filter=False)
+    assert result["B"][2] == ""
+
+
+def test_custom_lineterminator(c_parser_only):
+    parser = c_parser_only
+    data = "a,b,c~1,2,3~4,5,6"
+
+    result = parser.read_csv(StringIO(data), lineterminator="~")
+    expected = parser.read_csv(StringIO(data.replace("~", "\n")))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_parse_ragged_csv(c_parser_only):
+    parser = c_parser_only
+    data = """1,2,3
+1,2,3,4
+1,2,3,4,5
+1,2
+1,2,3,4"""
+
+    nice_data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+    result = parser.read_csv(
+        StringIO(data), header=None, names=["a", "b", "c", "d", "e"]
+    )
+
+    expected = parser.read_csv(
+        StringIO(nice_data), header=None, names=["a", "b", "c", "d", "e"]
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+    # too many columns, cause segfault if not careful
+    data = "1,2\n3,4,5"
+
+    result = parser.read_csv(StringIO(data), header=None, names=range(50))
+    expected = parser.read_csv(StringIO(data), header=None, names=range(3)).reindex(
+        columns=range(50)
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_tokenize_CR_with_quoting(c_parser_only):
+    # see gh-3453
+    parser = c_parser_only
+    data = ' a,b,c\r"a,b","e,d","f,f"'
+
+    result = parser.read_csv(StringIO(data), header=None)
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")), header=None)
+    tm.assert_frame_equal(result, expected)
+
+    result = parser.read_csv(StringIO(data))
+    expected = parser.read_csv(StringIO(data.replace("\r", "\n")))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("count", [3 * 2**n for n in range(6)])
+def test_grow_boundary_at_cap(c_parser_only, count):
+    # See gh-12494
+    #
+    # Cause of error was that the C parser
+    # was not increasing the buffer size when
+    # the desired space would fill the buffer
+    # to capacity, which would later cause a
+    # buffer overflow error when checking the
+    # EOF terminator of the CSV stream.
+    # 3 * 2^n commas was observed to break the parser
+    parser = c_parser_only
+
+    with StringIO("," * count) as s:
+        expected = DataFrame(columns=[f"Unnamed: {i}" for i in range(count + 1)])
+        df = parser.read_csv(s)
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_parse_trim_buffers(c_parser_only, encoding):
+    # This test is part of a bugfix for gh-13703. It attempts to
+    # to stress the system memory allocator, to cause it to move the
+    # stream buffer and either let the OS reclaim the region, or let
+    # other memory requests of parser otherwise modify the contents
+    # of memory space, where it was formally located.
+    # This test is designed to cause a `segfault` with unpatched
+    # `tokenizer.c`. Sometimes the test fails on `segfault`, other
+    # times it fails due to memory corruption, which causes the
+    # loaded DataFrame to differ from the expected one.
+
+    # Also force 'utf-8' encoding, so that `_string_convert` would take
+    # a different execution branch.
+
+    parser = c_parser_only
+
+    # Generate a large mixed-type CSV file on-the-fly (one record is
+    # approx 1.5KiB).
+    record_ = (
+        """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,Z"""
+        """ZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,"""
+        """ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,9"""
+        """99,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,"""
+        """9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9."""
+        """99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999."""
+        """99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZ"""
+        """ZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZ"""
+        """ZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,Z"""
+        """ZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,"""
+        """9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,"""
+        """999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,"""
+        """,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999"""
+        """,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9."""
+        """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,"""
+        """,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-Z"""
+        """ZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ"""
+        """,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99"""
+        """,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-"""
+        """9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9"""
+        """.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,"""
+        """,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9."""
+        """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZ"""
+        """ZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ"""
+        """-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZ"""
+        """ZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ"""
+        """,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99"""
+        """,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9"""
+        """.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
+    )
+
+    # Set the number of lines so that a call to `parser_trim_buffers`
+    # is triggered: after a couple of full chunks are consumed a
+    # relatively small 'residual' chunk would cause reallocation
+    # within the parser.
+    chunksize, n_lines = 128, 2 * 128 + 15
+    csv_data = "\n".join([record_] * n_lines) + "\n"
+
+    # We will use StringIO to load the CSV from this text buffer.
+    # pd.read_csv() will iterate over the file in chunks and will
+    # finally read a residual chunk of really small size.
+
+    # Generate the expected output: manually create the dataframe
+    # by splitting by comma and repeating the `n_lines` times.
+    row = tuple(val_ if val_ else np.nan for val_ in record_.split(","))
+    expected = DataFrame(
+        [row for _ in range(n_lines)], dtype=object, columns=None, index=None
+    )
+
+    # Iterate over the CSV file in chunks of `chunksize` lines
+    with parser.read_csv(
+        StringIO(csv_data),
+        header=None,
+        dtype=object,
+        chunksize=chunksize,
+        encoding=encoding,
+    ) as chunks_:
+        result = concat(chunks_, axis=0, ignore_index=True)
+
+    # Check for data corruption if there was no segfault
+    tm.assert_frame_equal(result, expected)
+
+
+def test_internal_null_byte(c_parser_only):
+    # see gh-14012
+    #
+    # The null byte ('\x00') should not be used as a
+    # true line terminator, escape character, or comment
+    # character, only as a placeholder to indicate that
+    # none was specified.
+    #
+    # This test should be moved to test_common.py ONLY when
+    # Python's csv class supports parsing '\x00'.
+    parser = c_parser_only
+
+    names = ["a", "b", "c"]
+    data = "1,2,3\n4,\x00,6\n7,8,9"
+    expected = DataFrame([[1, 2.0, 3], [4, np.nan, 6], [7, 8, 9]], columns=names)
+
+    result = parser.read_csv(StringIO(data), names=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_nrows_large(c_parser_only):
+    # gh-7626 - Read only nrows of data in for large inputs (>262144b)
+    parser = c_parser_only
+    header_narrow = "\t".join(["COL_HEADER_" + str(i) for i in range(10)]) + "\n"
+    data_narrow = "\t".join(["somedatasomedatasomedata1" for _ in range(10)]) + "\n"
+    header_wide = "\t".join(["COL_HEADER_" + str(i) for i in range(15)]) + "\n"
+    data_wide = "\t".join(["somedatasomedatasomedata2" for _ in range(15)]) + "\n"
+    test_input = header_narrow + data_narrow * 1050 + header_wide + data_wide * 2
+
+    df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010)
+
+    assert df.size == 1010 * 10
+
+
+def test_float_precision_round_trip_with_text(c_parser_only):
+    # see gh-15140
+    parser = c_parser_only
+    df = parser.read_csv(StringIO("a"), header=None, float_precision="round_trip")
+    tm.assert_frame_equal(df, DataFrame({0: ["a"]}))
+
+
+def test_large_difference_in_columns(c_parser_only):
+    # see gh-14125
+    parser = c_parser_only
+
+    count = 10000
+    large_row = ("X," * count)[:-1] + "\n"
+    normal_row = "XXXXXX XXXXXX,111111111111111\n"
+    test_input = (large_row + normal_row * 6)[:-1]
+
+    result = parser.read_csv(StringIO(test_input), header=None, usecols=[0])
+    rows = test_input.split("\n")
+
+    expected = DataFrame([row.split(",")[0] for row in rows])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_data_after_quote(c_parser_only):
+    # see gh-15910
+    parser = c_parser_only
+
+    data = 'a\n1\n"b"a'
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame({"a": ["1", "ba"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_whitespace_delimited(c_parser_only):
+    parser = c_parser_only
+    test_input = """\
+1 2
+2 2 3
+3 2 3 # 3 fields
+4 2 3# 3 fields
+5 2 # 2 fields
+6 2# 2 fields
+7 # 1 field, NaN
+8# 1 field, NaN
+9 2 3 # skipped line
+# comment"""
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line", check_stacklevel=False
+    ):
+        df = parser.read_csv(
+            StringIO(test_input),
+            comment="#",
+            header=None,
+            delimiter="\\s+",
+            skiprows=0,
+            on_bad_lines="warn",
+        )
+    expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]])
+    tm.assert_frame_equal(df, expected)
+
+
+def test_file_like_no_next(c_parser_only):
+    # gh-16530: the file-like need not have a "next" or "__next__"
+    # attribute despite having an "__iter__" attribute.
+    #
+    # NOTE: This is only true for the C engine, not Python engine.
+    class NoNextBuffer(StringIO):
+        def __next__(self):
+            raise AttributeError("No next method")
+
+        next = __next__
+
+    parser = c_parser_only
+    data = "a\n1"
+
+    expected = DataFrame({"a": [1]})
+    result = parser.read_csv(NoNextBuffer(data))
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_buffer_rd_bytes_bad_unicode(c_parser_only):
+    # see gh-22748
+    t = BytesIO(b"\xB0")
+    t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape")
+    msg = "'utf-8' codec can't encode character"
+    with pytest.raises(UnicodeError, match=msg):
+        c_parser_only.read_csv(t, encoding="UTF-8")
+
+
+@pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"])
+def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix):
+    # see gh-16530
+    #
+    # Unfortunately, Python's CSV library can't handle
+    # tarfile objects (expects string, not bytes when
+    # iterating through a file-like).
+    parser = c_parser_only
+    tar_path = os.path.join(csv_dir_path, "tar_csv" + tar_suffix)
+
+    with tarfile.open(tar_path, "r") as tar:
+        data_file = tar.extractfile("tar_data.csv")
+
+        out = parser.read_csv(data_file)
+        expected = DataFrame({"a": [1]})
+        tm.assert_frame_equal(out, expected)
+
+
+def test_chunk_whitespace_on_boundary(c_parser_only):
+    # see gh-9735: this issue is C parser-specific (bug when
+    # parsing whitespace and characters at chunk boundary)
+    #
+    # This test case has a field too large for the Python parser / CSV library.
+    parser = c_parser_only
+
+    chunk1 = "a" * (1024 * 256 - 2) + "\na"
+    chunk2 = "\n a"
+    result = parser.read_csv(StringIO(chunk1 + chunk2), header=None)
+
+    expected = DataFrame(["a" * (1024 * 256 - 2), "a", " a"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_file_handles_mmap(c_parser_only, csv1):
+    # gh-14418
+    #
+    # Don't close user provided file handles.
+    parser = c_parser_only
+
+    with open(csv1, encoding="utf-8") as f:
+        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
+            parser.read_csv(m)
+            assert not m.closed
+
+
+def test_file_binary_mode(c_parser_only):
+    # see gh-23779
+    parser = c_parser_only
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]])
+
+    with tm.ensure_clean() as path:
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("1,2,3\n4,5,6")
+
+        with open(path, "rb") as f:
+            result = parser.read_csv(f, header=None)
+            tm.assert_frame_equal(result, expected)
+
+
+def test_unix_style_breaks(c_parser_only):
+    # GH 11020
+    parser = c_parser_only
+    with tm.ensure_clean() as path:
+        with open(path, "w", newline="\n", encoding="utf-8") as f:
+            f.write("blah\n\ncol_1,col_2,col_3\n\n")
+        result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
+    expected = DataFrame(columns=["col_1", "col_2", "col_3"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("float_precision", [None, "legacy", "high", "round_trip"])
+@pytest.mark.parametrize(
+    "data,thousands,decimal",
+    [
+        (
+            """A|B|C
+1|2,334.01|5
+10|13|10.
+""",
+            ",",
+            ".",
+        ),
+        (
+            """A|B|C
+1|2.334,01|5
+10|13|10,
+""",
+            ".",
+            ",",
+        ),
+    ],
+)
+def test_1000_sep_with_decimal(
+    c_parser_only, data, thousands, decimal, float_precision
+):
+    parser = c_parser_only
+    expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]})
+
+    result = parser.read_csv(
+        StringIO(data),
+        sep="|",
+        thousands=thousands,
+        decimal=decimal,
+        float_precision=float_precision,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_float_precision_options(c_parser_only):
+    # GH 17154, 36228
+    parser = c_parser_only
+    s = "foo\n243.164\n"
+    df = parser.read_csv(StringIO(s))
+    df2 = parser.read_csv(StringIO(s), float_precision="high")
+
+    tm.assert_frame_equal(df, df2)
+
+    df3 = parser.read_csv(StringIO(s), float_precision="legacy")
+
+    assert not df.iloc[0, 0] == df3.iloc[0, 0]
+
+    msg = "Unrecognized float_precision option: junk"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(s), float_precision="junk")
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_comment.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_comment.py
@ -0,0 +1,227 @@
+"""
+Tests that comments are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("na_values", [None, ["NaN"]])
+def test_comment(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+1,2.,4.#hello world
+5.,NaN,10.0
+"""
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", na_values=na_values)
+        return
+    result = parser.read_csv(StringIO(data), comment="#", na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}]
+)
+def test_line_comment(all_parsers, read_kwargs, request):
+    parser = all_parsers
+    data = """# empty
+A,B,C
+1,2.,4.#hello world
+#ignore this line
+5.,NaN,10.0
+"""
+    warn = None
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+
+    if read_kwargs.get("delim_whitespace"):
+        data = data.replace(",", " ")
+        warn = FutureWarning
+    elif read_kwargs.get("lineterminator"):
+        data = data.replace("\n", read_kwargs.get("lineterminator"))
+
+    read_kwargs["comment"] = "#"
+    if parser.engine == "pyarrow":
+        if "lineterminator" in read_kwargs:
+            msg = (
+                "The 'lineterminator' option is not supported with the 'pyarrow' engine"
+            )
+        else:
+            msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                warn, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(StringIO(data), **read_kwargs)
+        return
+    elif parser.engine == "python" and read_kwargs.get("lineterminator"):
+        msg = r"Custom line terminators not supported in python parser \(yet\)"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                warn, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(StringIO(data), **read_kwargs)
+        return
+
+    with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), **read_kwargs)
+
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows(all_parsers):
+    parser = all_parsers
+    data = """# empty
+random line
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # This should ignore the first four lines (including comments).
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", skiprows=4)
+        return
+
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Header should begin at the second non-comment line.
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", header=1)
+        return
+    result = parser.read_csv(StringIO(data), comment="#", header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_skiprows_header(all_parsers):
+    parser = all_parsers
+    data = """# empty
+# second empty line
+# third empty line
+X,Y,Z
+1,2,3
+A,B,C
+1,2.,4.
+5.,NaN,10.0
+"""
+    # Skiprows should skip the first 4 lines (including comments),
+    # while header should start from the second non-commented line,
+    # starting with line 5.
+    expected = DataFrame(
+        [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
+    )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
+        return
+
+    result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
+def test_custom_comment_char(all_parsers, comment_char):
+    parser = all_parsers
+    data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data.replace("#", comment_char)), comment=comment_char
+            )
+        return
+    result = parser.read_csv(
+        StringIO(data.replace("#", comment_char)), comment=comment_char
+    )
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", ["infer", None])
+def test_comment_first_line(all_parsers, header):
+    # see gh-4623
+    parser = all_parsers
+    data = "# notes\na,b,c\n# more notes\n1,2,3"
+
+    if header is None:
+        expected = DataFrame({0: ["a", "1"], 1: ["b", "2"], 2: ["c", "3"]})
+    else:
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", header=header)
+        return
+    result = parser.read_csv(StringIO(data), comment="#", header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_comment_char_in_default_value(all_parsers, request):
+    # GH#34002
+    if all_parsers.engine == "c":
+        reason = "see gh-34002: works on the python engine but not the c engine"
+        # NA value containing comment char is interpreted as comment
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=AssertionError))
+    parser = all_parsers
+
+    data = (
+        "# this is a comment\n"
+        "col1,col2,col3,col4\n"
+        "1,2,3,4#inline comment\n"
+        "4,5#,6,10\n"
+        "7,8,#N/A,11\n"
+    )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
+        return
+    result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
+    expected = DataFrame(
+        {
+            "col1": [1, 4, 7],
+            "col2": [2, 5, 8],
+            "col3": [3.0, np.nan, np.nan],
+            "col4": [4.0, np.nan, 11.0],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_compression.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_compression.py
@ -0,0 +1,211 @@
+"""
+Tests compressed data parsing functionality for all
+of the parsers defined in parsers.py
+"""
+
+import os
+from pathlib import Path
+import tarfile
+import zipfile
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.fixture(params=[True, False])
+def buffer(request):
+    return request.param
+
+
+@pytest.fixture
+def parser_and_data(all_parsers, csv1):
+    parser = all_parsers
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+    expected = parser.read_csv(csv1)
+
+    return parser, data, expected
+
+
+@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
+def test_zip(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("test_file.zip") as path:
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            tmp.writestr("test_file", data)
+
+        if compression == "zip2":
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression="zip")
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("compression", ["zip", "infer"])
+def test_zip_error_multiple_files(parser_and_data, compression):
+    parser, data, expected = parser_and_data
+
+    with tm.ensure_clean("combined_zip.zip") as path:
+        inner_file_names = ["test_file", "second_file"]
+
+        with zipfile.ZipFile(path, mode="w") as tmp:
+            for file_name in inner_file_names:
+                tmp.writestr(file_name, data)
+
+        with pytest.raises(ValueError, match="Multiple files"):
+            parser.read_csv(path, compression=compression)
+
+
+def test_zip_error_no_files(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with zipfile.ZipFile(path, mode="w"):
+            pass
+
+        with pytest.raises(ValueError, match="Zero files"):
+            parser.read_csv(path, compression="zip")
+
+
+def test_zip_error_invalid_zip(parser_and_data):
+    parser, _, _ = parser_and_data
+
+    with tm.ensure_clean() as path:
+        with open(path, "rb") as f:
+            with pytest.raises(zipfile.BadZipFile, match="File is not a zip file"):
+                parser.read_csv(f, compression="zip")
+
+
+@pytest.mark.parametrize("filename", [None, "test.{ext}"])
+def test_compression(
+    request,
+    parser_and_data,
+    compression_only,
+    buffer,
+    filename,
+    compression_to_extension,
+):
+    parser, data, expected = parser_and_data
+    compress_type = compression_only
+
+    ext = compression_to_extension[compress_type]
+    filename = filename if filename is None else filename.format(ext=ext)
+
+    if filename and buffer:
+        request.applymarker(
+            pytest.mark.xfail(
+                reason="Cannot deduce compression from buffer of compressed data."
+            )
+        )
+
+    with tm.ensure_clean(filename=filename) as path:
+        tm.write_to_compressed(compress_type, path, data)
+        compression = "infer" if filename else compress_type
+
+        if buffer:
+            with open(path, "rb") as f:
+                result = parser.read_csv(f, compression=compression)
+        else:
+            result = parser.read_csv(path, compression=compression)
+
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
+def test_infer_compression(all_parsers, csv1, buffer, ext):
+    # see gh-9770
+    parser = all_parsers
+    kwargs = {"index_col": 0, "parse_dates": True}
+
+    expected = parser.read_csv(csv1, **kwargs)
+    kwargs["compression"] = "infer"
+
+    if buffer:
+        with open(csv1, encoding="utf-8") as f:
+            result = parser.read_csv(f, **kwargs)
+    else:
+        ext = "." + ext if ext else ""
+        result = parser.read_csv(csv1 + ext, **kwargs)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt):
+    # see gh-18071, gh-24130
+    parser = all_parsers
+    encoding = encoding_fmt.format(utf_value)
+    path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip")
+
+    result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t")
+    expected = DataFrame(
+        {
+            "Country": ["Venezuela", "Venezuela"],
+            "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
+def test_invalid_compression(all_parsers, invalid_compression):
+    parser = all_parsers
+    compress_kwargs = {"compression": invalid_compression}
+
+    msg = f"Unrecognized compression type: {invalid_compression}"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv("test_file.zip", **compress_kwargs)
+
+
+def test_compression_tar_archive(all_parsers, csv_dir_path):
+    parser = all_parsers
+    path = os.path.join(csv_dir_path, "tar_csv.tar.gz")
+    df = parser.read_csv(path)
+    assert list(df.columns) == ["a"]
+
+
+def test_ignore_compression_extension(all_parsers):
+    parser = all_parsers
+    df = DataFrame({"a": [0, 1]})
+    with tm.ensure_clean("test.csv") as path_csv:
+        with tm.ensure_clean("test.csv.zip") as path_zip:
+            # make sure to create un-compressed file with zip extension
+            df.to_csv(path_csv, index=False)
+            Path(path_zip).write_text(
+                Path(path_csv).read_text(encoding="utf-8"), encoding="utf-8"
+            )
+
+            tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
+
+
+def test_writes_tar_gz(all_parsers):
+    parser = all_parsers
+    data = DataFrame(
+        {
+            "Country": ["Venezuela", "Venezuela"],
+            "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."],
+        }
+    )
+    with tm.ensure_clean("test.tar.gz") as tar_path:
+        data.to_csv(tar_path, index=False)
+
+        # test that read_csv infers .tar.gz to gzip:
+        tm.assert_frame_equal(parser.read_csv(tar_path), data)
+
+        # test that file is indeed gzipped:
+        with tarfile.open(tar_path, "r:gz") as tar:
+            result = parser.read_csv(
+                tar.extractfile(tar.getnames()[0]), compression="infer"
+            )
+            tm.assert_frame_equal(result, data)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_concatenate_chunks.py
@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+
+from pandas.errors import DtypeWarning
+
+import pandas._testing as tm
+from pandas.core.arrays import ArrowExtensionArray
+
+from pandas.io.parsers.c_parser_wrapper import _concatenate_chunks
+
+
+def test_concatenate_chunks_pyarrow():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array([1, 2]))},
+    ]
+    result = _concatenate_chunks(chunks)
+    expected = ArrowExtensionArray(pa.array([1.5, 2.5, 1.0, 2.0]))
+    tm.assert_extension_array_equal(result[0], expected)
+
+
+def test_concatenate_chunks_pyarrow_strings():
+    # GH#51876
+    pa = pytest.importorskip("pyarrow")
+    chunks = [
+        {0: ArrowExtensionArray(pa.array([1.5, 2.5]))},
+        {0: ArrowExtensionArray(pa.array(["a", "b"]))},
+    ]
+    with tm.assert_produces_warning(DtypeWarning, match="have mixed types"):
+        result = _concatenate_chunks(chunks)
+    expected = np.concatenate(
+        [np.array([1.5, 2.5], dtype=object), np.array(["a", "b"])]
+    )
+    tm.assert_numpy_array_equal(result[0], expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_converters.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_converters.py
@ -0,0 +1,263 @@
+"""
+Tests column conversion functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+from dateutil.parser import parse
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+
+def test_converters_type_must_be_dict(all_parsers):
+    parser = all_parsers
+    data = """index,A,B,C,D
+foo,2,3,4,5
+"""
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters=0)
+        return
+    with pytest.raises(TypeError, match="Type converters.+"):
+        parser.read_csv(StringIO(data), converters=0)
+
+
+@pytest.mark.parametrize("column", [3, "D"])
+@pytest.mark.parametrize(
+    "converter", [parse, lambda x: int(x.split("/")[2])]  # Produce integer.
+)
+def test_converters(all_parsers, column, converter):
+    parser = all_parsers
+    data = """A,B,C,D
+a,1,2,01/01/2009
+b,3,4,01/02/2009
+c,4,5,01/03/2009
+"""
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters={column: converter})
+        return
+
+    result = parser.read_csv(StringIO(data), converters={column: converter})
+
+    expected = parser.read_csv(StringIO(data))
+    expected["D"] = expected["D"].map(converter)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_no_implicit_conv(all_parsers):
+    # see gh-2184
+    parser = all_parsers
+    data = """000102,1.2,A\n001245,2,B"""
+
+    converters = {0: lambda x: x.strip()}
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), header=None, converters=converters)
+        return
+
+    result = parser.read_csv(StringIO(data), header=None, converters=converters)
+
+    # Column 0 should not be casted to numeric and should remain as object.
+    expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_euro_decimal_format(all_parsers):
+    # see gh-583
+    converters = {}
+    parser = all_parsers
+
+    data = """Id;Number1;Number2;Text1;Text2;Number3
+1;1521,1541;187101,9543;ABC;poi;4,7387
+2;121,12;14897,76;DEF;uyt;0,3773
+3;878,158;108013,434;GHI;rez;2,7356"""
+    converters["Number1"] = converters["Number2"] = converters[
+        "Number3"
+    ] = lambda x: float(x.replace(",", "."))
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep=";", converters=converters)
+        return
+
+    result = parser.read_csv(StringIO(data), sep=";", converters=converters)
+    expected = DataFrame(
+        [
+            [1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387],
+            [2, 121.12, 14897.76, "DEF", "uyt", 0.3773],
+            [3, 878.158, 108013.434, "GHI", "rez", 2.7356],
+        ],
+        columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_converters_corner_with_nans(all_parsers):
+    parser = all_parsers
+    data = """id,score,days
+1,2,12
+2,2-5,
+3,,14+
+4,6-12,2"""
+
+    # Example converters.
+    def convert_days(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_days_sentinel(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        is_plus = x.endswith("+")
+
+        if is_plus:
+            x = int(x[:-1]) + 1
+        else:
+            x = int(x)
+
+        return x
+
+    def convert_score(x):
+        x = x.strip()
+
+        if not x:
+            return np.nan
+
+        if x.find("-") > 0:
+            val_min, val_max = map(int, x.split("-"))
+            val = 0.5 * (val_min + val_max)
+        else:
+            val = float(x)
+
+        return val
+
+    results = []
+
+    for day_converter in [convert_days, convert_days_sentinel]:
+        if parser.engine == "pyarrow":
+            msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(
+                    StringIO(data),
+                    converters={"score": convert_score, "days": day_converter},
+                    na_values=["", None],
+                )
+            continue
+
+        result = parser.read_csv(
+            StringIO(data),
+            converters={"score": convert_score, "days": day_converter},
+            na_values=["", None],
+        )
+        assert pd.isna(result["days"][1])
+        results.append(result)
+
+    if parser.engine != "pyarrow":
+        tm.assert_frame_equal(results[0], results[1])
+
+
+@pytest.mark.parametrize("conv_f", [lambda x: x, str])
+def test_converter_index_col_bug(all_parsers, conv_f):
+    # see gh-1835 , GH#40589
+    parser = all_parsers
+    data = "A;B\n1;2\n3;4"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
+            )
+        return
+
+    rs = parser.read_csv(
+        StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
+    )
+
+    xp = DataFrame({"B": [2, 4]}, index=Index(["1", "3"], name="A"))
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_converter_identity_object(all_parsers):
+    # GH#40589
+    parser = all_parsers
+    data = "A,B\n1,2\n3,4"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters={"A": lambda x: x})
+        return
+
+    rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})
+
+    xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_converter_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                header=list(range(2)),
+                converters={
+                    ("A", "X"): np.int32,
+                    ("B", "Y"): np.int32,
+                    ("B", "Z"): np.float32,
+                },
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        converters={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_dialect.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_dialect.py
@ -0,0 +1,195 @@
+"""
+Tests that dialects are properly handled during parsing
+for all of the parsers defined in parsers.py
+"""
+
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas.errors import ParserWarning
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.fixture
+def custom_dialect():
+    dialect_name = "weird"
+    dialect_kwargs = {
+        "doublequote": False,
+        "escapechar": "~",
+        "delimiter": ":",
+        "skipinitialspace": False,
+        "quotechar": "`",
+        "quoting": 3,
+    }
+    return dialect_name, dialect_kwargs
+
+
+def test_dialect(all_parsers):
+    parser = all_parsers
+    data = """\
+label1,label2,label3
+index1,"a,c,e
+index2,b,d,f
+"""
+
+    dia = csv.excel()
+    dia.quoting = csv.QUOTE_NONE
+
+    if parser.engine == "pyarrow":
+        msg = "The 'dialect' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), dialect=dia)
+        return
+
+    df = parser.read_csv(StringIO(data), dialect=dia)
+
+    data = """\
+label1,label2,label3
+index1,a,c,e
+index2,b,d,f
+"""
+    exp = parser.read_csv(StringIO(data))
+    exp.replace("a", '"a', inplace=True)
+    tm.assert_frame_equal(df, exp)
+
+
+def test_dialect_str(all_parsers):
+    dialect_name = "mydialect"
+    parser = all_parsers
+    data = """\
+fruit:vegetable
+apple:broccoli
+pear:tomato
+"""
+    exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"]})
+
+    with tm.with_csv_dialect(dialect_name, delimiter=":"):
+        if parser.engine == "pyarrow":
+            msg = "The 'dialect' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(StringIO(data), dialect=dialect_name)
+            return
+
+        df = parser.read_csv(StringIO(data), dialect=dialect_name)
+        tm.assert_frame_equal(df, exp)
+
+
+def test_invalid_dialect(all_parsers):
+    class InvalidDialect:
+        pass
+
+    data = "a\n1"
+    parser = all_parsers
+    msg = "Invalid dialect"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dialect=InvalidDialect)
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"],
+)
+@pytest.mark.parametrize("value", ["dialect", "default", "other"])
+def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, value):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    warning_klass = None
+    kwds = {}
+
+    # arg=None tests when we pass in the dialect without any other arguments.
+    if arg is not None:
+        if value == "dialect":  # No conflict --> no warning.
+            kwds[arg] = dialect_kwargs[arg]
+        elif value == "default":  # Default --> no warning.
+            from pandas.io.parsers.base_parser import parser_defaults
+
+            kwds[arg] = parser_defaults[arg]
+        else:  # Non-default + conflict with dialect --> warning.
+            warning_klass = ParserWarning
+            kwds[arg] = "blah"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        if parser.engine == "pyarrow":
+            msg = "The 'dialect' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv_check_warnings(
+                    # No warning bc we raise
+                    None,
+                    "Conflicting values for",
+                    StringIO(data),
+                    dialect=dialect_name,
+                    **kwds,
+                )
+            return
+        result = parser.read_csv_check_warnings(
+            warning_klass,
+            "Conflicting values for",
+            StringIO(data),
+            dialect=dialect_name,
+            **kwds,
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,warning_klass",
+    [
+        ({"sep": ","}, None),  # sep is default --> sep_override=True
+        ({"sep": "."}, ParserWarning),  # sep isn't default --> sep_override=False
+        ({"delimiter": ":"}, None),  # No conflict
+        ({"delimiter": None}, None),  # Default arguments --> sep_override=True
+        ({"delimiter": ","}, ParserWarning),  # Conflict
+        ({"delimiter": "."}, ParserWarning),  # Conflict
+    ],
+    ids=[
+        "sep-override-true",
+        "sep-override-false",
+        "delimiter-no-conflict",
+        "delimiter-default-arg",
+        "delimiter-conflict",
+        "delimiter-conflict2",
+    ],
+)
+def test_dialect_conflict_delimiter(all_parsers, custom_dialect, kwargs, warning_klass):
+    # see gh-23761.
+    dialect_name, dialect_kwargs = custom_dialect
+    parser = all_parsers
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    data = "a:b\n1:2"
+
+    with tm.with_csv_dialect(dialect_name, **dialect_kwargs):
+        if parser.engine == "pyarrow":
+            msg = "The 'dialect' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv_check_warnings(
+                    # no warning bc we raise
+                    None,
+                    "Conflicting values for 'delimiter'",
+                    StringIO(data),
+                    dialect=dialect_name,
+                    **kwargs,
+                )
+            return
+        result = parser.read_csv_check_warnings(
+            warning_klass,
+            "Conflicting values for 'delimiter'",
+            StringIO(data),
+            dialect=dialect_name,
+            **kwargs,
+        )
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_encoding.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_encoding.py
@ -0,0 +1,337 @@
+"""
+Tests encoding functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import (
+    BytesIO,
+    TextIOWrapper,
+)
+import os
+import tempfile
+import uuid
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    read_csv,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_bytes_io_input(all_parsers):
+    encoding = "cp1255"
+    parser = all_parsers
+
+    data = BytesIO("שלום:1234\n562:123".encode(encoding))
+    result = parser.read_csv(data, sep=":", encoding=encoding)
+
+    expected = DataFrame([[562, 123]], columns=["שלום", "1234"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_read_csv_unicode(all_parsers):
+    parser = all_parsers
+    data = BytesIO("\u0141aski, Jan;1".encode())
+
+    result = parser.read_csv(data, sep=";", encoding="utf-8", header=None)
+    expected = DataFrame([["\u0141aski, Jan", 1]])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize("sep", [",", "\t"])
+@pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"])
+def test_utf16_bom_skiprows(all_parsers, sep, encoding):
+    # see gh-2298
+    parser = all_parsers
+    data = """skip this
+skip this too
+A,B,C
+1,2,3
+4,5,6""".replace(
+        ",", sep
+    )
+    path = f"__{uuid.uuid4()}__.csv"
+    kwargs = {"sep": sep, "skiprows": 2}
+    utf8 = "utf-8"
+
+    with tm.ensure_clean(path) as path:
+        bytes_data = data.encode(encoding)
+
+        with open(path, "wb") as f:
+            f.write(bytes_data)
+
+        with TextIOWrapper(BytesIO(data.encode(utf8)), encoding=utf8) as bytes_buffer:
+            result = parser.read_csv(path, encoding=encoding, **kwargs)
+            expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_utf16_example(all_parsers, csv_dir_path):
+    path = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    result = parser.read_csv(path, encoding="utf-16", sep="\t")
+    assert len(result) == 50
+
+
+def test_unicode_encoding(all_parsers, csv_dir_path):
+    path = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+
+    result = parser.read_csv(path, header=None, encoding="latin-1")
+    result = result.set_index(0)
+    got = result[1][1632]
+
+    expected = "\xc1 k\xf6ldum klaka (Cold Fever) (1994)"
+    assert got == expected
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        # Basic test
+        ("a\n1", {}, DataFrame({"a": [1]})),
+        # "Regular" quoting
+        ('"a"\n1', {"quotechar": '"'}, DataFrame({"a": [1]})),
+        # Test in a data row instead of header
+        ("b\n1", {"names": ["a"]}, DataFrame({"a": ["b", "1"]})),
+        # Test in empty data row with skipping
+        ("\n1", {"names": ["a"], "skip_blank_lines": True}, DataFrame({"a": [1]})),
+        # Test in empty data row without skipping
+        (
+            "\n1",
+            {"names": ["a"], "skip_blank_lines": False},
+            DataFrame({"a": [np.nan, 1]}),
+        ),
+    ],
+)
+def test_utf8_bom(all_parsers, data, kwargs, expected, request):
+    # see gh-4793
+    parser = all_parsers
+    bom = "\ufeff"
+    utf8 = "utf-8"
+
+    def _encode_data_with_bom(_data):
+        bom_data = (bom + _data).encode(utf8)
+        return BytesIO(bom_data)
+
+    if (
+        parser.engine == "pyarrow"
+        and data == "\n1"
+        and kwargs.get("skip_blank_lines", True)
+    ):
+        # CSV parse error: Empty CSV file or block: cannot infer number of columns
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt):
+    # see gh-13549
+    expected = DataFrame({"mb_num": [4.8], "multibyte": ["test"]})
+    parser = all_parsers
+
+    encoding = encoding_fmt.format(utf_value)
+    data = "mb_num,multibyte\n4.8,test".encode(encoding)
+
+    result = parser.read_csv(BytesIO(data), encoding=encoding)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "file_path,encoding",
+    [
+        (("io", "data", "csv", "test1.csv"), "utf-8"),
+        (("io", "parser", "data", "unicode_series.csv"), "latin-1"),
+        (("io", "parser", "data", "sauron.SHIFT_JIS.csv"), "shiftjis"),
+    ],
+)
+def test_binary_mode_file_buffers(all_parsers, file_path, encoding, datapath):
+    # gh-23779: Python csv engine shouldn't error on files opened in binary.
+    # gh-31575: Python csv engine shouldn't error on files opened in raw binary.
+    parser = all_parsers
+
+    fpath = datapath(*file_path)
+    expected = parser.read_csv(fpath, encoding=encoding)
+
+    with open(fpath, encoding=encoding) as fa:
+        result = parser.read_csv(fa)
+        assert not fa.closed
+    tm.assert_frame_equal(expected, result)
+
+    with open(fpath, mode="rb") as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
+    tm.assert_frame_equal(expected, result)
+
+    with open(fpath, mode="rb", buffering=0) as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize("pass_encoding", [True, False])
+def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):
+    # see gh-24130
+    parser = all_parsers
+    encoding = encoding_fmt.format(utf_value)
+
+    if parser.engine == "pyarrow" and pass_encoding is True and utf_value in [16, 32]:
+        # FIXME: this is bad!
+        pytest.skip("These cases freeze")
+
+    expected = DataFrame({"foo": ["bar"]})
+
+    with tm.ensure_clean(mode="w+", encoding=encoding, return_filelike=True) as f:
+        f.write("foo\nbar")
+        f.seek(0)
+
+        result = parser.read_csv(f, encoding=encoding if pass_encoding else None)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_encoding_named_temp_file(all_parsers):
+    # see gh-31819
+    parser = all_parsers
+    encoding = "shift-jis"
+
+    title = "てすと"
+    data = "こむ"
+
+    expected = DataFrame({title: [data]})
+
+    with tempfile.NamedTemporaryFile() as f:
+        f.write(f"{title}\n{data}".encode(encoding))
+
+        f.seek(0)
+
+        result = parser.read_csv(f, encoding=encoding)
+        tm.assert_frame_equal(result, expected)
+        assert not f.closed
+
+
+@pytest.mark.parametrize(
+    "encoding", ["utf-8", "utf-16", "utf-16-be", "utf-16-le", "utf-32"]
+)
+def test_parse_encoded_special_characters(encoding):
+    # GH16218 Verify parsing of data with encoded special characters
+    # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a")
+    data = "a\tb\n：foo\t0\nbar\t1\nbaz\t2"  # noqa: RUF001
+    encoded_data = BytesIO(data.encode(encoding))
+    result = read_csv(encoded_data, delimiter="\t", encoding=encoding)
+
+    expected = DataFrame(
+        data=[["：foo", 0], ["bar", 1], ["baz", 2]],  # noqa: RUF001
+        columns=["a", "b"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"])
+def test_encoding_memory_map(all_parsers, encoding):
+    # GH40986
+    parser = all_parsers
+    expected = DataFrame(
+        {
+            "name": ["Raphael", "Donatello", "Miguel Angel", "Leonardo"],
+            "mask": ["red", "purple", "orange", "blue"],
+            "weapon": ["sai", "bo staff", "nunchunk", "katana"],
+        }
+    )
+    with tm.ensure_clean() as file:
+        expected.to_csv(file, index=False, encoding=encoding)
+
+        if parser.engine == "pyarrow":
+            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(file, encoding=encoding, memory_map=True)
+            return
+
+        df = parser.read_csv(file, encoding=encoding, memory_map=True)
+    tm.assert_frame_equal(df, expected)
+
+
+def test_chunk_splits_multibyte_char(all_parsers):
+    """
+    Chunk splits a multibyte character with memory_map=True
+
+    GH 43540
+    """
+    parser = all_parsers
+    # DEFAULT_CHUNKSIZE = 262144, defined in parsers.pyx
+    df = DataFrame(data=["a" * 127] * 2048)
+
+    # Put two-bytes utf-8 encoded character "ą" at the end of chunk
+    # utf-8 encoding of "ą" is b'\xc4\x85'
+    df.iloc[2047] = "a" * 127 + "ą"
+    with tm.ensure_clean("bug-gh43540.csv") as fname:
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+
+        if parser.engine == "pyarrow":
+            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(fname, header=None, memory_map=True)
+            return
+
+        dfr = parser.read_csv(fname, header=None, memory_map=True)
+    tm.assert_frame_equal(dfr, df)
+
+
+def test_readcsv_memmap_utf8(all_parsers):
+    """
+    GH 43787
+
+    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
+    """
+    lines = []
+    line_length = 128
+    start_char = " "
+    end_char = "\U00010080"
+    # This for loop creates a list of 128-char strings
+    # consisting of consecutive Unicode chars
+    for lnum in range(ord(start_char), ord(end_char), line_length):
+        line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
+        try:
+            line.encode("utf-8")
+        except UnicodeEncodeError:
+            continue
+        lines.append(line)
+    parser = all_parsers
+    df = DataFrame(lines)
+    with tm.ensure_clean("utf8test.csv") as fname:
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+
+        if parser.engine == "pyarrow":
+            msg = "The 'memory_map' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8")
+            return
+
+        dfr = parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8")
+    tm.assert_frame_equal(df, dfr)
+
+
+@pytest.mark.usefixtures("pyarrow_xfail")
+@pytest.mark.parametrize("mode", ["w+b", "w+t"])
+def test_not_readable(all_parsers, mode):
+    # GH43439
+    parser = all_parsers
+    content = b"abcd"
+    if "t" in mode:
+        content = "abcd"
+    with tempfile.SpooledTemporaryFile(mode=mode, encoding="utf-8") as handle:
+        handle.write(content)
+        handle.seek(0)
+        df = parser.read_csv(handle)
+    expected = DataFrame([], columns=["abcd"])
+    tm.assert_frame_equal(df, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_header.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_header.py
@ -0,0 +1,733 @@
+"""
+Tests that the file header is properly handled or inferred
+during parsing for all of the parsers defined in parsers.py
+"""
+
+from collections import namedtuple
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserError
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_read_with_bad_header(all_parsers):
+    parser = all_parsers
+    msg = r"but only \d+ lines in file"
+
+    with pytest.raises(ValueError, match=msg):
+        s = StringIO(",,")
+        parser.read_csv(s, header=[10])
+
+
+def test_negative_header(all_parsers):
+    # see gh-27779
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    with pytest.raises(
+        ValueError,
+        match="Passing negative integer to header is invalid. "
+        "For no header, use header=None instead",
+    ):
+        parser.read_csv(StringIO(data), header=-1)
+
+
+@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])])
+def test_negative_multi_index_header(all_parsers, header):
+    # see gh-27779
+    parser = all_parsers
+    data = """1,2,3,4,5
+        6,7,8,9,10
+        11,12,13,14,15
+        """
+    with pytest.raises(
+        ValueError, match="cannot specify multi-index header with negative integers"
+    ):
+        parser.read_csv(StringIO(data), header=header)
+
+
+@pytest.mark.parametrize("header", [True, False])
+def test_bool_header_arg(all_parsers, header):
+    # see gh-6114
+    parser = all_parsers
+    data = """\
+MyColumn
+a
+b
+a
+b"""
+    msg = "Passing a bool to header is invalid"
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame are different
+def test_header_with_index_col(all_parsers):
+    parser = all_parsers
+    data = """foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    names = ["A", "B", "C"]
+    result = parser.read_csv(StringIO(data), names=names)
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_not_first_line(all_parsers):
+    parser = all_parsers
+    data = """got,to,ignore,this,line
+got,to,ignore,this,line
+index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+    data2 = """index,A,B,C,D
+foo,2,3,4,5
+bar,7,8,9,10
+baz,12,13,14,15
+"""
+
+    result = parser.read_csv(StringIO(data), header=2, index_col=0)
+    expected = parser.read_csv(StringIO(data2), header=0, index_col=0)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_multi_index(all_parsers):
+    parser = all_parsers
+
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[0, 1])
+    data_gen_f = lambda r, c: f"R{r}C{c}"
+
+    data = [[data_gen_f(r, c) for c in range(3)] for r in range(5)]
+    index = MultiIndex.from_arrays(
+        [[f"R_l0_g{i}" for i in range(5)], [f"R_l1_g{i}" for i in range(5)]],
+        names=["R0", "R1"],
+    )
+    columns = MultiIndex.from_arrays(
+        [
+            [f"C_l0_g{i}" for i in range(3)],
+            [f"C_l1_g{i}" for i in range(3)],
+            [f"C_l2_g{i}" for i in range(3)],
+            [f"C_l3_g{i}" for i in range(3)],
+        ],
+        names=["C0", "C1", "C2", "C3"],
+    )
+    expected = DataFrame(data, columns=columns, index=index)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        (
+            {"index_col": ["foo", "bar"]},
+            (
+                "index_col must only contain "
+                "row numbers when specifying "
+                "a multi-index header"
+            ),
+        ),
+        (
+            {"index_col": [0, 1], "names": ["foo", "bar"]},
+            ("cannot specify names when specifying a multi-index header"),
+        ),
+        (
+            {"index_col": [0, 1], "usecols": ["foo", "bar"]},
+            ("cannot specify usecols when specifying a multi-index header"),
+        ),
+    ],
+)
+def test_header_multi_index_invalid(all_parsers, kwargs, msg):
+    data = """\
+C0,,C_l0_g0,C_l0_g1,C_l0_g2
+
+C1,,C_l1_g0,C_l1_g1,C_l1_g2
+C2,,C_l2_g0,C_l2_g1,C_l2_g2
+C3,,C_l3_g0,C_l3_g1,C_l3_g2
+R0,R1,,,
+R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
+R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
+R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
+R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
+R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
+"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=[0, 1, 2, 3], **kwargs)
+
+
+_TestTuple = namedtuple("_TestTuple", ["first", "second"])
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 3,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 3,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format1(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+,,,,,,
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 2,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 2,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format2(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    data = """,a,a,a,b,c,c
+,q,r,s,t,u,v
+one,1,2,3,4,5,6
+two,7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"header": [0, 1]},
+        {
+            "skiprows": 2,
+            "names": [
+                ("a", "q"),
+                ("a", "r"),
+                ("a", "s"),
+                ("b", "t"),
+                ("c", "u"),
+                ("c", "v"),
+            ],
+        },
+        {
+            "skiprows": 2,
+            "names": [
+                _TestTuple("a", "q"),
+                _TestTuple("a", "r"),
+                _TestTuple("a", "s"),
+                _TestTuple("b", "t"),
+                _TestTuple("c", "u"),
+                _TestTuple("c", "v"),
+            ],
+        },
+    ],
+)
+def test_header_multi_index_common_format3(all_parsers, kwargs):
+    parser = all_parsers
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+        index=["one", "two"],
+        columns=MultiIndex.from_tuples(
+            [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")]
+        ),
+    )
+    expected = expected.reset_index(drop=True)
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), index_col=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_multi_index_common_format_malformed1(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]],
+            codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+            names=["a", "q"],
+        ),
+    )
+    data = """a,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_multi_index_common_format_malformed2(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"),
+        index=Index([1, 7]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]],
+            codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]],
+            names=[None, "q"],
+        ),
+    )
+
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0)
+    tm.assert_frame_equal(expected, result)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_multi_index_common_format_malformed3(all_parsers):
+    parser = all_parsers
+    expected = DataFrame(
+        np.array([[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"),
+        index=MultiIndex(levels=[[1, 7], [2, 8]], codes=[[0, 1], [0, 1]]),
+        columns=MultiIndex(
+            levels=[["a", "b", "c"], ["s", "t", "u", "v"]],
+            codes=[[0, 1, 2, 2], [0, 1, 2, 3]],
+            names=[None, "q"],
+        ),
+    )
+    data = """,a,a,b,c,c
+q,r,s,t,u,v
+1,2,3,4,5,6
+7,8,9,10,11,12"""
+
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
+    tm.assert_frame_equal(expected, result)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_multi_index_blank_line(all_parsers):
+    # GH 40442
+    parser = all_parsers
+    data = [[None, None], [1, 2], [3, 4]]
+    columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
+    expected = DataFrame(data, columns=columns)
+    data = "a,b\nA,B\n,\n1,2\n3,4"
+    result = parser.read_csv(StringIO(data), header=[0, 1])
+    tm.assert_frame_equal(expected, result)
+
+
+@pytest.mark.parametrize(
+    "data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)]
+)
+def test_header_names_backward_compat(all_parsers, data, header, request):
+    # see gh-2539
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and header is not None:
+        mark = pytest.mark.xfail(reason="DataFrame.columns are different")
+        request.applymarker(mark)
+
+    expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), names=["a", "b", "c"], header=header)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block: cannot infer
+@pytest.mark.parametrize("kwargs", [{}, {"index_col": False}])
+def test_read_only_header_no_rows(all_parsers, kwargs):
+    # See gh-7773
+    parser = all_parsers
+    expected = DataFrame(columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO("a,b,c"), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,names",
+    [
+        ({}, [0, 1, 2, 3, 4]),
+        (
+            {"names": ["foo", "bar", "baz", "quux", "panda"]},
+            ["foo", "bar", "baz", "quux", "panda"],
+        ),
+    ],
+)
+def test_no_header(all_parsers, kwargs, names):
+    parser = all_parsers
+    data = """1,2,3,4,5
+6,7,8,9,10
+11,12,13,14,15
+"""
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names
+    )
+    result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("header", [["a", "b"], "string_header"])
+def test_non_int_header(all_parsers, header):
+    # see gh-16338
+    msg = "header must be integer or list of integers"
+    data = """1,2\n3,4"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=header)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_singleton_header(all_parsers):
+    # see gh-7757
+    data = """a,b,c\n0,1,2\n1,2,3"""
+    parser = all_parsers
+
+    expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
+    result = parser.read_csv(StringIO(data), header=[0])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            "A,A,A,B\none,one,one,two\n0,40,34,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [("A", "one"), ("A", "one.1"), ("A", "one.2"), ("B", "two")]
+                ),
+            ),
+        ),
+        (
+            "A,A,A,B\none,one,one.1,two\n0,40,34,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [("A", "one"), ("A", "one.1"), ("A", "one.1.1"), ("B", "two")]
+                ),
+            ),
+        ),
+        (
+            "A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1",
+            DataFrame(
+                [[0, 40, 34, 0.1, 0.1]],
+                columns=MultiIndex.from_tuples(
+                    [
+                        ("A", "one"),
+                        ("A", "one.1"),
+                        ("A", "one.1.1"),
+                        ("B", "two"),
+                        ("B", "two.1"),
+                    ]
+                ),
+            ),
+        ),
+    ],
+)
+def test_mangles_multi_index(all_parsers, data, expected):
+    # see gh-18062
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is requireds
+@pytest.mark.parametrize("index_col", [None, [0]])
+@pytest.mark.parametrize(
+    "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])]
+)
+def test_multi_index_unnamed(all_parsers, index_col, columns):
+    # see gh-23687
+    #
+    # When specifying a multi-index header, make sure that
+    # we don't error just because one of the rows in our header
+    # has ALL column names containing the string "Unnamed". The
+    # correct condition to check is whether the row contains
+    # ALL columns that did not have names (and instead were given
+    # placeholder ones).
+    parser = all_parsers
+    header = [0, 1]
+
+    if index_col is None:
+        data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n"
+    else:
+        data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n"
+
+    result = parser.read_csv(StringIO(data), header=header, index_col=index_col)
+    exp_columns = []
+
+    if columns is None:
+        columns = ["", "", ""]
+
+    for i, col in enumerate(columns):
+        if not col:  # Unnamed.
+            col = f"Unnamed: {i if index_col is None else i + 1}_level_0"
+
+        exp_columns.append(col)
+
+    columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"]))
+    expected = DataFrame([[2, 3], [4, 5]], columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Expected 2 columns, got 3
+def test_names_longer_than_header_but_equal_with_data_rows(all_parsers):
+    # GH#38453
+    parser = all_parsers
+    data = """a, b
+1,2,3
+5,6,4
+"""
+    result = parser.read_csv(StringIO(data), header=0, names=["A", "B", "C"])
+    expected = DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_read_csv_multiindex_columns(all_parsers):
+    # GH#6051
+    parser = all_parsers
+
+    s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81"
+    s2 = (
+        "Male, Male, Male, Female, Female\n"
+        "R, R, L, R, R\n"
+        ".86, .67, .88, .78, .81\n"
+        ".86, .67, .88, .78, .82"
+    )
+
+    mi = MultiIndex.from_tuples(
+        [
+            ("Male", "R"),
+            (" Male", " R"),
+            (" Male", " L"),
+            (" Female", " R"),
+            (" Female", " R.1"),
+        ]
+    )
+    expected = DataFrame(
+        [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi
+    )
+
+    df1 = parser.read_csv(StringIO(s1), header=[0, 1])
+    tm.assert_frame_equal(df1, expected.iloc[:1])
+    df2 = parser.read_csv(StringIO(s2), header=[0, 1])
+    tm.assert_frame_equal(df2, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_read_csv_multi_header_length_check(all_parsers):
+    # GH#43102
+    parser = all_parsers
+
+    case = """row11,row12,row13
+row21,row22, row23
+row31,row32
+"""
+
+    with pytest.raises(
+        ParserError, match="Header rows must have an equal number of columns."
+    ):
+        parser.read_csv(StringIO(case), header=[0, 2])
+
+
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 2
+def test_header_none_and_implicit_index(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1,5\ny,2\nz,3\n"
+    result = parser.read_csv(StringIO(data), names=["a", "b"], header=None)
+    expected = DataFrame(
+        {"a": [1, 2, 3], "b": [5, np.nan, np.nan]}, index=["x", "y", "z"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # regex mismatch "CSV parse error: Expected 2 columns, got "
+def test_header_none_and_implicit_index_in_second_row(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1\ny,2,5\nz,3\n"
+    with pytest.raises(ParserError, match="Expected 2 fields in line 2, saw 3"):
+        parser.read_csv(StringIO(data), names=["a", "b"], header=None)
+
+
+def test_header_none_and_on_bad_lines_skip(all_parsers):
+    # GH#22144
+    parser = all_parsers
+    data = "x,1\ny,2,5\nz,3\n"
+    result = parser.read_csv(
+        StringIO(data), names=["a", "b"], header=None, on_bad_lines="skip"
+    )
+    expected = DataFrame({"a": ["x", "z"], "b": [1, 3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is requireds
+def test_header_missing_rows(all_parsers):
+    # GH#47400
+    parser = all_parsers
+    data = """a,b
+1,2
+"""
+    msg = r"Passed header=\[0,1,2\], len of 3, but only 2 lines in file"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=[0, 1, 2])
+
+
+# ValueError: The 'delim_whitespace' option is not supported with the 'pyarrow' engine
+@xfail_pyarrow
+def test_header_multiple_whitespaces(all_parsers):
+    # GH#54931
+    parser = all_parsers
+    data = """aa    bb(1,1)   cc(1,1)
+                0  2  3.5"""
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    expected = DataFrame({"aa": [0], "bb(1,1)": 2, "cc(1,1)": 3.5})
+    tm.assert_frame_equal(result, expected)
+
+
+# ValueError: The 'delim_whitespace' option is not supported with the 'pyarrow' engine
+@xfail_pyarrow
+def test_header_delim_whitespace(all_parsers):
+    # GH#54918
+    parser = all_parsers
+    data = """a,b
+1,2
+3,4
+    """
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(StringIO(data), delim_whitespace=True)
+    expected = DataFrame({"a,b": ["1,2", "3,4"]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_no_header_pyarrow(pyarrow_parser_only):
+    parser = pyarrow_parser_only
+    data = """
+a,i,x
+b,j,y
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        usecols=[0, 1],
+        dtype="string[pyarrow]",
+        dtype_backend="pyarrow",
+        engine="pyarrow",
+    )
+    expected = DataFrame([["a", "i"], ["b", "j"]], dtype="string[pyarrow]")
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_index_col.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_index_col.py
@ -0,0 +1,376 @@
+"""
+Tests that the specified index column (a.k.a "index_col")
+is properly handled or inferred during parsing for all of
+the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize("with_header", [True, False])
+def test_index_col_named(all_parsers, with_header):
+    parser = all_parsers
+    no_header = """\
+KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
+KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
+KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
+KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
+KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
+KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000"""
+    header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n"
+
+    if with_header:
+        data = header + no_header
+
+        result = parser.read_csv(StringIO(data), index_col="ID")
+        expected = parser.read_csv(StringIO(data), header=0).set_index("ID")
+        tm.assert_frame_equal(result, expected)
+    else:
+        data = no_header
+        msg = "Index ID invalid"
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), index_col="ID")
+
+
+def test_index_col_named2(all_parsers):
+    parser = all_parsers
+    data = """\
+1,2,3,4,hello
+5,6,7,8,world
+9,10,11,12,foo
+"""
+
+    expected = DataFrame(
+        {"a": [1, 5, 9], "b": [2, 6, 10], "c": [3, 7, 11], "d": [4, 8, 12]},
+        index=Index(["hello", "world", "foo"], name="message"),
+    )
+    names = ["a", "b", "c", "d", "message"]
+
+    result = parser.read_csv(StringIO(data), names=names, index_col=["message"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_is_true(all_parsers):
+    # see gh-9798
+    data = "a,b\n1,2"
+    parser = all_parsers
+
+    msg = "The value of index_col couldn't be 'True'"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), index_col=True)
+
+
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
+def test_infer_index_col(all_parsers):
+    data = """A,B,C
+foo,1,2,3
+bar,4,5,6
+baz,7,8,9
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        index=["foo", "bar", "baz"],
+        columns=["A", "B", "C"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+@pytest.mark.parametrize(
+    "index_col,kwargs",
+    [
+        (None, {"columns": ["x", "y", "z"]}),
+        (False, {"columns": ["x", "y", "z"]}),
+        (0, {"columns": ["y", "z"], "index": Index([], name="x")}),
+        (1, {"columns": ["x", "z"], "index": Index([], name="y")}),
+        ("x", {"columns": ["y", "z"], "index": Index([], name="x")}),
+        ("y", {"columns": ["x", "z"], "index": Index([], name="y")}),
+        (
+            [0, 1],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]),
+            },
+        ),
+        (
+            ["x", "y"],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["x", "y"]),
+            },
+        ),
+        (
+            [1, 0],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]),
+            },
+        ),
+        (
+            ["y", "x"],
+            {
+                "columns": ["z"],
+                "index": MultiIndex.from_arrays([[]] * 2, names=["y", "x"]),
+            },
+        ),
+    ],
+)
+def test_index_col_empty_data(all_parsers, index_col, kwargs):
+    data = "x,y,z"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=index_col)
+
+    expected = DataFrame(**kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_empty_with_index_col_false(all_parsers):
+    # see gh-10413
+    data = "x,y"
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), index_col=False)
+
+    expected = DataFrame(columns=["x", "y"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "index_names",
+    [
+        ["", ""],
+        ["foo", ""],
+        ["", "bar"],
+        ["foo", "bar"],
+        ["NotReallyUnnamed", "Unnamed: 0"],
+    ],
+)
+def test_multi_index_naming(all_parsers, index_names, request):
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and "" in index_names:
+        mark = pytest.mark.xfail(reason="One case raises, others are wrong")
+        request.applymarker(mark)
+
+    # We don't want empty index names being replaced with "Unnamed: 0"
+    data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"])
+    result = parser.read_csv(StringIO(data), index_col=[0, 1])
+
+    expected = DataFrame(
+        {"col": [1, 2, 3, 4]}, index=MultiIndex.from_product([["a", "b"], ["c", "d"]])
+    )
+    expected.index.names = [name if name else None for name in index_names]
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_multi_index_naming_not_all_at_beginning(all_parsers):
+    parser = all_parsers
+    data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4"
+    result = parser.read_csv(StringIO(data), index_col=[0, 2])
+
+    expected = DataFrame(
+        {"Unnamed: 2": ["c", "d", "c", "d"]},
+        index=MultiIndex(
+            levels=[["a", "b"], [1, 2, 3, 4]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_no_multi_index_level_names_empty(all_parsers):
+    # GH 10984
+    parser = all_parsers
+    midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)])
+    expected = DataFrame(
+        np.random.default_rng(2).standard_normal((3, 3)),
+        index=midx,
+        columns=["x", "y", "z"],
+    )
+    with tm.ensure_clean() as path:
+        expected.to_csv(path)
+        result = parser.read_csv(path, index_col=[0, 1, 2])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_header_with_index_col(all_parsers):
+    # GH 33476
+    parser = all_parsers
+    data = """
+I11,A,A
+I12,B,B
+I2,1,3
+"""
+    midx = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"])
+    idx = Index(["I2"])
+    expected = DataFrame([[1, 3]], index=idx, columns=midx)
+
+    result = parser.read_csv(StringIO(data), index_col=0, header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+    col_idx = Index(["A", "A.1"])
+    idx = Index(["I12", "I2"], name="I11")
+    expected = DataFrame([["B", "B"], ["1", "3"]], index=idx, columns=col_idx)
+
+    result = parser.read_csv(StringIO(data), index_col="I11", header=0)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+def test_index_col_large_csv(all_parsers, monkeypatch):
+    # https://github.com/pandas-dev/pandas/issues/37094
+    parser = all_parsers
+
+    ARR_LEN = 100
+    df = DataFrame(
+        {
+            "a": range(ARR_LEN + 1),
+            "b": np.random.default_rng(2).standard_normal(ARR_LEN + 1),
+        }
+    )
+
+    with tm.ensure_clean() as path:
+        df.to_csv(path, index=False)
+        with monkeypatch.context() as m:
+            m.setattr("pandas.core.algorithms._MINIMUM_COMP_ARR_LEN", ARR_LEN)
+            result = parser.read_csv(path, index_col=[0])
+
+    tm.assert_frame_equal(result, df.set_index("a"))
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_index_col_multiindex_columns_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1], index_col=0
+    )
+    expected = DataFrame(
+        [],
+        index=Index([]),
+        columns=MultiIndex.from_arrays(
+            [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_index_col_header_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(StringIO("a0,a1,a2\n"), header=[0], index_col=0)
+    expected = DataFrame(
+        [],
+        columns=["a1", "a2"],
+        index=Index([], name="a0"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_multiindex_columns_no_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1])
+    expected = DataFrame(
+        [], columns=MultiIndex.from_arrays([["a0", "a1", "a2"], ["b0", "b1", "b2"]])
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_multiindex_columns_index_col_with_data(all_parsers):
+    # GH#38292
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("a0,a1,a2\nb0,b1,b2\ndata,data,data"), header=[0, 1], index_col=0
+    )
+    expected = DataFrame(
+        [["data", "data"]],
+        columns=MultiIndex.from_arrays(
+            [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
+        ),
+        index=Index(["data"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
+def test_infer_types_boolean_sum(all_parsers):
+    # GH#44079
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO("0,1"),
+        names=["a", "b"],
+        index_col=["a"],
+        dtype={"a": "UInt8"},
+    )
+    expected = DataFrame(
+        data={
+            "a": [
+                0,
+            ],
+            "b": [1],
+        }
+    ).set_index("a")
+    # Not checking index type now, because the C parser will return a
+    # index column of dtype 'object', and the Python parser will return a
+    # index column of dtype 'int64'.
+    tm.assert_frame_equal(result, expected, check_index_type=False)
+
+
+@pytest.mark.parametrize("dtype, val", [(object, "01"), ("int64", 1)])
+def test_specify_dtype_for_index_col(all_parsers, dtype, val, request):
+    # GH#9435
+    data = "a,b\n01,2"
+    parser = all_parsers
+    if dtype == object and parser.engine == "pyarrow":
+        request.applymarker(
+            pytest.mark.xfail(reason="Cannot disable type-inference for pyarrow engine")
+        )
+    result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
+    expected = DataFrame({"b": [2]}, index=Index([val], name="a", dtype=dtype))
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # TypeError: an integer is required
+def test_multiindex_columns_not_leading_index_col(all_parsers):
+    # GH#38549
+    parser = all_parsers
+    data = """a,b,c,d
+e,f,g,h
+x,y,1,2
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        header=[0, 1],
+        index_col=1,
+    )
+    cols = MultiIndex.from_tuples(
+        [("a", "e"), ("c", "g"), ("d", "h")], names=["b", "f"]
+    )
+    expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"])
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_mangle_dupes.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_mangle_dupes.py
@ -0,0 +1,182 @@
+"""
+Tests that duplicate columns are handled appropriately when parsed by the
+CSV engine. In general, the expected result is that they are either thoroughly
+de-duplicated (if mangling requested) or ignored otherwise.
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_basic(all_parsers):
+    parser = all_parsers
+
+    data = "a,a,b,b,b\n1,2,3,4,5"
+    result = parser.read_csv(StringIO(data), sep=",")
+
+    expected = DataFrame([[1, 2, 3, 4, 5]], columns=["a", "a.1", "b", "b.1", "b.2"])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_basic_names(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "a,b,a\n0,1,2\n3,4,5"
+    expected = DataFrame([[0, 1, 2], [3, 4, 5]], columns=["a", "b", "a.1"])
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_basic_names_raise(all_parsers):
+    # See gh-7160
+    parser = all_parsers
+
+    data = "0,1,2\n3,4,5"
+    with pytest.raises(ValueError, match="Duplicate names"):
+        parser.read_csv(StringIO(data), names=["a", "b", "a"])
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.2", "a.1"])),
+        (
+            "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6",
+            DataFrame(
+                [[1, 2, 3, 4, 5, 6]],
+                columns=["a", "a.2", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
+            ),
+        ),
+        (
+            "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7",
+            DataFrame(
+                [[1, 2, 3, 4, 5, 6, 7]],
+                columns=["a", "a.4", "a.3", "a.1", "a.2", "a.5", "a.6"],
+            ),
+        ),
+    ],
+)
+def test_thorough_mangle_columns(all_parsers, data, expected):
+    # see gh-17060
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,names,expected",
+    [
+        (
+            "a,b,b\n1,2,3",
+            ["a.1", "a.1", "a.1.1"],
+            DataFrame(
+                [["a", "b", "b"], ["1", "2", "3"]], columns=["a.1", "a.1.1", "a.1.1.1"]
+            ),
+        ),
+        (
+            "a,b,c,d,e,f\n1,2,3,4,5,6",
+            ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"],
+            DataFrame(
+                [["a", "b", "c", "d", "e", "f"], ["1", "2", "3", "4", "5", "6"]],
+                columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"],
+            ),
+        ),
+        (
+            "a,b,c,d,e,f,g\n1,2,3,4,5,6,7",
+            ["a", "a", "a.3", "a.1", "a.2", "a", "a"],
+            DataFrame(
+                [
+                    ["a", "b", "c", "d", "e", "f", "g"],
+                    ["1", "2", "3", "4", "5", "6", "7"],
+                ],
+                columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"],
+            ),
+        ),
+    ],
+)
+def test_thorough_mangle_names(all_parsers, data, names, expected):
+    # see gh-17095
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match="Duplicate names"):
+        parser.read_csv(StringIO(data), names=names)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame.columns are different
+def test_mangled_unnamed_placeholders(all_parsers):
+    # xref gh-13017
+    orig_key = "0"
+    parser = all_parsers
+
+    orig_value = [1, 2, 3]
+    df = DataFrame({orig_key: orig_value})
+
+    # This test recursively updates `df`.
+    for i in range(3):
+        expected = DataFrame(columns=Index([], dtype="str"))
+
+        for j in range(i + 1):
+            col_name = "Unnamed: 0" + f".{1*j}" * min(j, 1)
+            expected.insert(loc=0, column=col_name, value=[0, 1, 2])
+
+        expected[orig_key] = orig_value
+        df = parser.read_csv(StringIO(df.to_csv()))
+
+        tm.assert_frame_equal(df, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_mangle_dupe_cols_already_exists(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = "a,a,a.1,a,a.3,a.1,a.1.1\n1,2,3,4,5,6,7"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4, 5, 6, 7]],
+        columns=["a", "a.2", "a.1", "a.4", "a.3", "a.1.2", "a.1.1"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: Found non-unique column index
+def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers):
+    # GH#14704
+    parser = all_parsers
+
+    data = ",Unnamed: 0,,Unnamed: 2\n1,2,3,4"
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [[1, 2, 3, 4]],
+        columns=["Unnamed: 0.1", "Unnamed: 0", "Unnamed: 2.1", "Unnamed: 2"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecol, engine", [([0, 1, 1], "python"), ([0, 1, 1], "c")])
+def test_mangle_cols_names(all_parsers, usecol, engine):
+    # GH 11823
+    parser = all_parsers
+    data = "1,2,3"
+    names = ["A", "A", "B"]
+    with pytest.raises(ValueError, match="Duplicate names"):
+        parser.read_csv(StringIO(data), names=names, usecols=usecol, engine=engine)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_multi_thread.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_multi_thread.py
@ -0,0 +1,157 @@
+"""
+Tests multithreading behaviour for reading and
+parsing files for each parser defined in parsers.py
+"""
+from contextlib import ExitStack
+from io import BytesIO
+from multiprocessing.pool import ThreadPool
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.util.version import Version
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+
+# We'll probably always skip these for pyarrow
+# Maybe we'll add our own tests for pyarrow too
+pytestmark = [
+    pytest.mark.single_cpu,
+    pytest.mark.slow,
+]
+
+
+@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+def test_multi_thread_string_io_read_csv(all_parsers, request):
+    # see gh-11786
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        pa = pytest.importorskip("pyarrow")
+        if Version(pa.__version__) < Version("16.0"):
+            request.applymarker(
+                pytest.mark.xfail(reason="# ValueError: Found non-unique column index")
+            )
+    max_row_range = 100
+    num_files = 10
+
+    bytes_to_df = (
+        "\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode()
+        for _ in range(num_files)
+    )
+
+    # Read all files in many threads.
+    with ExitStack() as stack:
+        files = [stack.enter_context(BytesIO(b)) for b in bytes_to_df]
+
+        pool = stack.enter_context(ThreadPool(8))
+
+        results = pool.map(parser.read_csv, files)
+        first_result = results[0]
+
+        for result in results:
+            tm.assert_frame_equal(first_result, result)
+
+
+def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks):
+    """
+    Generate a DataFrame via multi-thread.
+
+    Parameters
+    ----------
+    parser : BaseParser
+        The parser object to use for reading the data.
+    path : str
+        The location of the CSV file to read.
+    num_rows : int
+        The number of rows to read per task.
+    num_tasks : int
+        The number of tasks to use for reading this DataFrame.
+
+    Returns
+    -------
+    df : DataFrame
+    """
+
+    def reader(arg):
+        """
+        Create a reader for part of the CSV.
+
+        Parameters
+        ----------
+        arg : tuple
+            A tuple of the following:
+
+            * start : int
+                The starting row to start for parsing CSV
+            * nrows : int
+                The number of rows to read.
+
+        Returns
+        -------
+        df : DataFrame
+        """
+        start, nrows = arg
+
+        if not start:
+            return parser.read_csv(
+                path, index_col=0, header=0, nrows=nrows, parse_dates=["date"]
+            )
+
+        return parser.read_csv(
+            path,
+            index_col=0,
+            header=None,
+            skiprows=int(start) + 1,
+            nrows=nrows,
+            parse_dates=[9],
+        )
+
+    tasks = [
+        (num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks)
+    ]
+
+    with ThreadPool(processes=num_tasks) as pool:
+        results = pool.map(reader, tasks)
+
+    header = results[0].columns
+
+    for r in results[1:]:
+        r.columns = header
+
+    final_dataframe = pd.concat(results)
+    return final_dataframe
+
+
+@xfail_pyarrow  # ValueError: The 'nrows' option is not supported
+def test_multi_thread_path_multipart_read_csv(all_parsers):
+    # see gh-11786
+    num_tasks = 4
+    num_rows = 48
+
+    parser = all_parsers
+    file_name = "__thread_pool_reader__.csv"
+    df = DataFrame(
+        {
+            "a": np.random.default_rng(2).random(num_rows),
+            "b": np.random.default_rng(2).random(num_rows),
+            "c": np.random.default_rng(2).random(num_rows),
+            "d": np.random.default_rng(2).random(num_rows),
+            "e": np.random.default_rng(2).random(num_rows),
+            "foo": ["foo"] * num_rows,
+            "bar": ["bar"] * num_rows,
+            "baz": ["baz"] * num_rows,
+            "date": pd.date_range("20000101 09:00:00", periods=num_rows, freq="s"),
+            "int": np.arange(num_rows, dtype="int64"),
+        }
+    )
+
+    with tm.ensure_clean(file_name) as path:
+        df.to_csv(path)
+
+        final_dataframe = _generate_multi_thread_dataframe(
+            parser, path, num_rows, num_tasks
+        )
+        tm.assert_frame_equal(df, final_dataframe)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_na_values.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_na_values.py
@ -0,0 +1,780 @@
+"""
+Tests that NA values are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas._libs.parsers import STR_NA_VALUES
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+def test_string_nas(all_parsers):
+    parser = all_parsers
+    data = """A,B,C
+a,b,c
+d,,f
+,g,h
+"""
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [["a", "b", "c"], ["d", np.nan, "f"], [np.nan, "g", "h"]],
+        columns=["A", "B", "C"],
+    )
+    if parser.engine == "pyarrow":
+        expected.loc[2, "A"] = None
+        expected.loc[1, "B"] = None
+    tm.assert_frame_equal(result, expected)
+
+
+def test_detect_string_na(all_parsers):
+    parser = all_parsers
+    data = """A,B
+foo,bar
+NA,baz
+NaN,nan
+"""
+    expected = DataFrame(
+        [["foo", "bar"], [np.nan, "baz"], [np.nan, np.nan]], columns=["A", "B"]
+    )
+    if parser.engine == "pyarrow":
+        expected.loc[[1, 2], "A"] = None
+        expected.loc[2, "B"] = None
+    result = parser.read_csv(StringIO(data))
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "na_values",
+    [
+        ["-999.0", "-999"],
+        [-999, -999.0],
+        [-999.0, -999],
+        ["-999.0"],
+        ["-999"],
+        [-999.0],
+        [-999],
+    ],
+)
+@pytest.mark.parametrize(
+    "data",
+    [
+        """A,B
+-999,1.2
+2,-999
+3,4.5
+""",
+        """A,B
+-999,1.200
+2,-999.000
+3,4.500
+""",
+    ],
+)
+def test_non_string_na_values(all_parsers, data, na_values, request):
+    # see gh-3611: with an odd float format, we can't match
+    # the string "999.0" exactly but still need float matching
+    parser = all_parsers
+    expected = DataFrame([[np.nan, 1.2], [2.0, np.nan], [3.0, 4.5]], columns=["A", "B"])
+
+    if parser.engine == "pyarrow" and not all(isinstance(x, str) for x in na_values):
+        msg = "The 'pyarrow' engine requires all na_values to be strings"
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), na_values=na_values)
+        return
+    elif parser.engine == "pyarrow" and "-999.000" in data:
+        # bc the pyarrow engine does not include the float-ified version
+        #  of "-999" -> -999, it does not match the entry with the trailing
+        #  zeros, so "-999.000" is not treated as null.
+        mark = pytest.mark.xfail(
+            reason="pyarrow engined does not recognize equivalent floats"
+        )
+        request.applymarker(mark)
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_default_na_values(all_parsers):
+    _NA_VALUES = {
+        "-1.#IND",
+        "1.#QNAN",
+        "1.#IND",
+        "-1.#QNAN",
+        "#N/A",
+        "N/A",
+        "n/a",
+        "NA",
+        "<NA>",
+        "#NA",
+        "NULL",
+        "null",
+        "NaN",
+        "nan",
+        "-NaN",
+        "-nan",
+        "#N/A N/A",
+        "",
+        "None",
+    }
+    assert _NA_VALUES == STR_NA_VALUES
+
+    parser = all_parsers
+    nv = len(_NA_VALUES)
+
+    def f(i, v):
+        if i == 0:
+            buf = ""
+        elif i > 0:
+            buf = "".join([","] * i)
+
+        buf = f"{buf}{v}"
+
+        if i < nv - 1:
+            joined = "".join([","] * (nv - i - 1))
+            buf = f"{buf}{joined}"
+
+        return buf
+
+    data = StringIO("\n".join([f(i, v) for i, v in enumerate(_NA_VALUES)]))
+    expected = DataFrame(np.nan, columns=range(nv), index=range(nv))
+
+    result = parser.read_csv(data, header=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("na_values", ["baz", ["baz"]])
+def test_custom_na_values(all_parsers, na_values):
+    parser = all_parsers
+    data = """A,B,C
+ignore,this,row
+1,NA,3
+-1.#IND,5,baz
+7,8,NaN
+"""
+    expected = DataFrame(
+        [[1.0, np.nan, 3], [np.nan, 5, np.nan], [7, 8, np.nan]], columns=["A", "B", "C"]
+    )
+    if parser.engine == "pyarrow":
+        msg = "skiprows argument must be an integer when using engine='pyarrow'"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1])
+        return
+
+    result = parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_bool_na_values(all_parsers):
+    data = """A,B,C
+True,False,True
+NA,True,False
+False,NA,True"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        {
+            "A": np.array([True, np.nan, False], dtype=object),
+            "B": np.array([False, True, np.nan], dtype=object),
+            "C": [True, False, True],
+        }
+    )
+    if parser.engine == "pyarrow":
+        expected.loc[1, "A"] = None
+        expected.loc[2, "B"] = None
+    tm.assert_frame_equal(result, expected)
+
+
+def test_na_value_dict(all_parsers):
+    data = """A,B,C
+foo,bar,NA
+bar,foo,foo
+foo,bar,NA
+bar,foo,foo"""
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), na_values={"A": ["foo"], "B": ["bar"]})
+        return
+
+    df = parser.read_csv(StringIO(data), na_values={"A": ["foo"], "B": ["bar"]})
+    expected = DataFrame(
+        {
+            "A": [np.nan, "bar", np.nan, "bar"],
+            "B": [np.nan, "foo", np.nan, "foo"],
+            "C": [np.nan, "foo", np.nan, "foo"],
+        }
+    )
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize(
+    "index_col,expected",
+    [
+        (
+            [0],
+            DataFrame({"b": [np.nan], "c": [1], "d": [5]}, index=Index([0], name="a")),
+        ),
+        (
+            [0, 2],
+            DataFrame(
+                {"b": [np.nan], "d": [5]},
+                index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]),
+            ),
+        ),
+        (
+            ["a", "c"],
+            DataFrame(
+                {"b": [np.nan], "d": [5]},
+                index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]),
+            ),
+        ),
+    ],
+)
+def test_na_value_dict_multi_index(all_parsers, index_col, expected):
+    data = """\
+a,b,c,d
+0,NA,1,5
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), na_values=set(), index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        (
+            {},
+            DataFrame(
+                {
+                    "A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": {"A": [], "C": []}, "keep_default_na": False},
+            DataFrame(
+                {
+                    "A": ["a", "b", "", "d", "e", "nan", "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", "nan", "five", "", "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": ["a"], "keep_default_na": False},
+            DataFrame(
+                {
+                    "A": [np.nan, "b", "", "d", "e", "nan", "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", "nan", "five", "", "seven"],
+                }
+            ),
+        ),
+        (
+            {"na_values": {"A": [], "C": []}},
+            DataFrame(
+                {
+                    "A": ["a", "b", np.nan, "d", "e", np.nan, "g"],
+                    "B": [1, 2, 3, 4, 5, 6, 7],
+                    "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"],
+                }
+            ),
+        ),
+    ],
+)
+def test_na_values_keep_default(
+    all_parsers, kwargs, expected, request, using_infer_string
+):
+    data = """\
+A,B,C
+a,1,one
+b,2,two
+,3,three
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        if "na_values" in kwargs and isinstance(kwargs["na_values"], dict):
+            msg = "The pyarrow engine doesn't support passing a dict for na_values"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(StringIO(data), **kwargs)
+            return
+        if not using_infer_string or "na_values" in kwargs:
+            mark = pytest.mark.xfail()
+            request.applymarker(mark)
+
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_na_values_no_keep_default(all_parsers):
+    # see gh-4318: passing na_values=None and
+    # keep_default_na=False yields 'None" as a na_value
+    data = """\
+A,B,C
+a,1,None
+b,2,two
+,3,None
+d,4,nan
+e,5,five
+nan,6,
+g,7,seven
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), keep_default_na=False)
+
+    expected = DataFrame(
+        {
+            "A": ["a", "b", "", "d", "e", "nan", "g"],
+            "B": [1, 2, 3, 4, 5, 6, 7],
+            "C": ["None", "two", "None", "nan", "five", "", "seven"],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_keep_default_na_dict_na_values(all_parsers):
+    # see gh-19227
+    data = "a,b\n,2"
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), na_values={"b": ["2"]}, keep_default_na=False
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data), na_values={"b": ["2"]}, keep_default_na=False
+    )
+    expected = DataFrame({"a": [""], "b": [np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_no_keep_default_na_dict_na_scalar_values(all_parsers):
+    # see gh-19227
+    #
+    # Scalar values shouldn't cause the parsing to crash or fail.
+    data = "a,b\n1,2"
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), na_values={"b": 2}, keep_default_na=False)
+        return
+
+    df = parser.read_csv(StringIO(data), na_values={"b": 2}, keep_default_na=False)
+    expected = DataFrame({"a": [1], "b": [np.nan]})
+    tm.assert_frame_equal(df, expected)
+
+
+@pytest.mark.parametrize("col_zero_na_values", [113125, "113125"])
+def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_values):
+    # see gh-19227
+    data = """\
+113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
+729639,"qwer","",asdfkj,466.681,,252.373
+"""
+    parser = all_parsers
+    expected = DataFrame(
+        {
+            0: [np.nan, 729639.0],
+            1: [np.nan, "qwer"],
+            2: ["/blaha", np.nan],
+            3: ["kjsdkj", "asdfkj"],
+            4: [412.166, 466.681],
+            5: ["225.874", ""],
+            6: [np.nan, 252.373],
+        }
+    )
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                header=None,
+                keep_default_na=False,
+                na_values={2: "", 6: "214.008", 1: "blah", 0: col_zero_na_values},
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        keep_default_na=False,
+        na_values={2: "", 6: "214.008", 1: "blah", 0: col_zero_na_values},
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "na_filter,row_data",
+    [
+        (True, [[1, "A"], [np.nan, np.nan], [3, "C"]]),
+        (False, [["1", "A"], ["nan", "B"], ["3", "C"]]),
+    ],
+)
+def test_na_values_na_filter_override(
+    request, all_parsers, na_filter, row_data, using_infer_string
+):
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        # mismatched dtypes in both cases, FutureWarning in the True case
+        if not (using_infer_string and na_filter):
+            mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
+            request.applymarker(mark)
+    data = """\
+A,B
+1,A
+nan,B
+3,C
+"""
+    result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter)
+
+    expected = DataFrame(row_data, columns=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Expected 8 columns, got 5:
+def test_na_trailing_columns(all_parsers):
+    parser = all_parsers
+    data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
+2012-03-14,USD,AAPL,BUY,1000
+2012-05-12,USD,SBUX,SELL,500"""
+
+    # Trailing columns should be all NaN.
+    result = parser.read_csv(StringIO(data))
+    expected = DataFrame(
+        [
+            ["2012-03-14", "USD", "AAPL", "BUY", 1000, np.nan, np.nan, np.nan],
+            ["2012-05-12", "USD", "SBUX", "SELL", 500, np.nan, np.nan, np.nan],
+        ],
+        columns=[
+            "Date",
+            "Currency",
+            "Symbol",
+            "Type",
+            "Units",
+            "UnitPrice",
+            "Cost",
+            "Tax",
+        ],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "na_values,row_data",
+    [
+        (1, [[np.nan, 2.0], [2.0, np.nan]]),
+        ({"a": 2, "b": 1}, [[1.0, 2.0], [np.nan, np.nan]]),
+    ],
+)
+def test_na_values_scalar(all_parsers, na_values, row_data):
+    # see gh-12224
+    parser = all_parsers
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    if parser.engine == "pyarrow" and isinstance(na_values, dict):
+        if isinstance(na_values, dict):
+            err = ValueError
+            msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        else:
+            err = TypeError
+            msg = "The 'pyarrow' engine requires all na_values to be strings"
+        with pytest.raises(err, match=msg):
+            parser.read_csv(StringIO(data), names=names, na_values=na_values)
+        return
+    elif parser.engine == "pyarrow":
+        msg = "The 'pyarrow' engine requires all na_values to be strings"
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), names=names, na_values=na_values)
+        return
+
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+    expected = DataFrame(row_data, columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_na_values_dict_aliasing(all_parsers):
+    parser = all_parsers
+    na_values = {"a": 2, "b": 1}
+    na_values_copy = na_values.copy()
+
+    names = ["a", "b"]
+    data = "1,2\n2,1"
+
+    expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), names=names, na_values=na_values)
+        return
+
+    result = parser.read_csv(StringIO(data), names=names, na_values=na_values)
+
+    tm.assert_frame_equal(result, expected)
+    tm.assert_dict_equal(na_values, na_values_copy)
+
+
+def test_na_values_dict_col_index(all_parsers):
+    # see gh-14203
+    data = "a\nfoo\n1"
+    parser = all_parsers
+    na_values = {0: "foo"}
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), na_values=na_values)
+        return
+
+    result = parser.read_csv(StringIO(data), na_values=na_values)
+    expected = DataFrame({"a": [np.nan, 1]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            str(2**63) + "\n" + str(2**63 + 1),
+            {"na_values": [2**63]},
+            DataFrame([str(2**63), str(2**63 + 1)]),
+        ),
+        (str(2**63) + ",1" + "\n,2", {}, DataFrame([[str(2**63), 1], ["", 2]])),
+        (str(2**63) + "\n1", {"na_values": [2**63]}, DataFrame([np.nan, 1])),
+    ],
+)
+def test_na_values_uint64(all_parsers, data, kwargs, expected, request):
+    # see gh-14983
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and "na_values" in kwargs:
+        msg = "The 'pyarrow' engine requires all na_values to be strings"
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), header=None, **kwargs)
+        return
+    elif parser.engine == "pyarrow":
+        mark = pytest.mark.xfail(reason="Returns float64 instead of object")
+        request.applymarker(mark)
+
+    result = parser.read_csv(StringIO(data), header=None, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_na_values_no_default_with_index(all_parsers):
+    # see gh-15835
+    data = "a,1\nb,2"
+    parser = all_parsers
+    expected = DataFrame({"1": [2]}, index=Index(["b"], name="a"))
+
+    result = parser.read_csv(StringIO(data), index_col=0, keep_default_na=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "na_filter,index_data", [(False, ["", "5"]), (True, [np.nan, 5.0])]
+)
+def test_no_na_filter_on_index(all_parsers, na_filter, index_data, request):
+    # see gh-5239
+    #
+    # Don't parse NA-values in index unless na_filter=True
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    if parser.engine == "pyarrow" and na_filter is False:
+        mark = pytest.mark.xfail(reason="mismatched index result")
+        request.applymarker(mark)
+
+    expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index(index_data, name="b"))
+    result = parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_inf_na_values_with_int_index(all_parsers):
+    # see gh-17128
+    parser = all_parsers
+    data = "idx,col1,col2\n1,3,4\n2,inf,-inf"
+
+    # Don't fail with OverflowError with inf's and integer index column.
+    out = parser.read_csv(StringIO(data), index_col=[0], na_values=["inf", "-inf"])
+    expected = DataFrame(
+        {"col1": [3, np.nan], "col2": [4, np.nan]}, index=Index([1, 2], name="idx")
+    )
+    tm.assert_frame_equal(out, expected)
+
+
+@xfail_pyarrow  # mismatched shape
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
+    # see gh-20377
+    parser = all_parsers
+    data = "a,b,c\n1,,3\n4,5,6"
+
+    # na_filter=True --> missing value becomes NaN.
+    # na_filter=False --> missing value remains empty string.
+    empty = np.nan if na_filter else ""
+    expected = DataFrame({"a": ["1", "4"], "b": [empty, "5"], "c": ["3", "6"]})
+
+    result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # mismatched exception message
+@pytest.mark.parametrize(
+    "data, na_values",
+    [
+        ("false,1\n,1\ntrue", None),
+        ("false,1\nnull,1\ntrue", None),
+        ("false,1\nnan,1\ntrue", None),
+        ("false,1\nfoo,1\ntrue", "foo"),
+        ("false,1\nfoo,1\ntrue", ["foo"]),
+        ("false,1\nfoo,1\ntrue", {"a": "foo"}),
+    ],
+)
+def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
+    parser = all_parsers
+    msg = "|".join(
+        [
+            "Bool column has NA values in column [0a]",
+            "cannot safely convert passed user dtype of "
+            "bool for object dtyped data in column 0",
+        ]
+    )
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(
+            StringIO(data),
+            header=None,
+            names=["a", "b"],
+            dtype={"a": "bool"},
+            na_values=na_values,
+        )
+
+
+# TODO: this test isn't about the na_values keyword, it is about the empty entries
+#  being returned with NaN entries, whereas the pyarrow engine returns "nan"
+@xfail_pyarrow  # mismatched shapes
+def test_str_nan_dropped(all_parsers):
+    # see gh-21131
+    parser = all_parsers
+
+    data = """File: small.csv,,
+10010010233,0123,654
+foo,,bar
+01001000155,4530,898"""
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        names=["col1", "col2", "col3"],
+        dtype={"col1": str, "col2": str, "col3": str},
+    ).dropna()
+
+    expected = DataFrame(
+        {
+            "col1": ["10010010233", "01001000155"],
+            "col2": ["0123", "4530"],
+            "col3": ["654", "898"],
+        },
+        index=[1, 3],
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_nan_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,inf"
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine doesn't support passing a dict for na_values"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"}
+            )
+        return
+
+    result = parser.read_csv(
+        StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"}
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): [1],
+            ("B", "Y"): [2],
+            ("B", "Z"): [np.nan],
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # Failed: DID NOT RAISE <class 'ValueError'>; it casts the NaN to False
+def test_bool_and_nan_to_bool(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    with pytest.raises(ValueError, match="NA values"):
+        parser.read_csv(StringIO(data), dtype="bool")
+
+
+def test_bool_and_nan_to_int(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    with pytest.raises(ValueError, match="convert|NoneType"):
+        parser.read_csv(StringIO(data), dtype="int")
+
+
+def test_bool_and_nan_to_float(all_parsers):
+    # GH#42808
+    parser = all_parsers
+    data = """0
+NaN
+True
+False
+"""
+    result = parser.read_csv(StringIO(data), dtype="float")
+    expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]})
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_network.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_network.py
@ -0,0 +1,327 @@
+"""
+Tests parsers ability to read and parse non-local files
+and hence require a network connection to be read.
+"""
+from io import BytesIO
+import logging
+import re
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.feather_format import read_feather
+from pandas.io.parsers import read_csv
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.mark.network
+@pytest.mark.single_cpu
+@pytest.mark.parametrize("mode", ["explicit", "infer"])
+@pytest.mark.parametrize("engine", ["python", "c"])
+def test_compressed_urls(
+    httpserver,
+    datapath,
+    salaries_table,
+    mode,
+    engine,
+    compression_only,
+    compression_to_extension,
+):
+    # test reading compressed urls with various engines and
+    # extension inference
+    if compression_only == "tar":
+        pytest.skip("TODO: Add tar salaraies.csv to pandas/io/parsers/data")
+
+    extension = compression_to_extension[compression_only]
+    with open(datapath("io", "parser", "data", "salaries.csv" + extension), "rb") as f:
+        httpserver.serve_content(content=f.read())
+
+    url = httpserver.url + "/salaries.csv" + extension
+
+    if mode != "explicit":
+        compression_only = mode
+
+    url_table = read_csv(url, sep="\t", compression=compression_only, engine=engine)
+    tm.assert_frame_equal(url_table, salaries_table)
+
+
+@pytest.mark.network
+@pytest.mark.single_cpu
+def test_url_encoding_csv(httpserver, datapath):
+    """
+    read_csv should honor the requested encoding for URLs.
+
+    GH 10424
+    """
+    with open(datapath("io", "parser", "data", "unicode_series.csv"), "rb") as f:
+        httpserver.serve_content(content=f.read())
+        df = read_csv(httpserver.url, encoding="latin-1", header=None)
+    assert df.loc[15, 1] == "Á köldum klaka (Cold Fever) (1994)"
+
+
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath("io", "data", "csv", "tips.csv"))
+
+
+@pytest.mark.single_cpu
+@pytest.mark.usefixtures("s3_resource")
+@td.skip_if_not_us_locale()
+class TestS3:
+    def test_parse_public_s3_bucket(self, s3_public_bucket_with_data, tips_df, s3so):
+        # more of an integration test due to the not-public contents portion
+        # can probably mock this though.
+        pytest.importorskip("s3fs")
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_private_s3_bucket(self, s3_private_bucket_with_data, tips_df, s3so):
+        # Read public file from bucket with not-public contents
+        pytest.importorskip("s3fs")
+        df = read_csv(
+            f"s3://{s3_private_bucket_with_data.name}/tips.csv", storage_options=s3so
+        )
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3n_bucket(self, s3_public_bucket_with_data, tips_df, s3so):
+        # Read from AWS s3 as "s3n" URL
+        df = read_csv(
+            f"s3n://{s3_public_bucket_with_data.name}/tips.csv",
+            nrows=10,
+            storage_options=s3so,
+        )
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3a_bucket(self, s3_public_bucket_with_data, tips_df, s3so):
+        # Read from AWS s3 as "s3a" URL
+        df = read_csv(
+            f"s3a://{s3_public_bucket_with_data.name}/tips.csv",
+            nrows=10,
+            storage_options=s3so,
+        )
+        assert isinstance(df, DataFrame)
+        assert not df.empty
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_nrows(
+        self, s3_public_bucket_with_data, tips_df, s3so
+    ):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                nrows=10,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_parse_public_s3_bucket_chunked(
+        self, s3_public_bucket_with_data, tips_df, s3so
+    ):
+        # Read with a chunksize
+        chunksize = 5
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            with read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                chunksize=chunksize,
+                compression=comp,
+                storage_options=s3so,
+            ) as df_reader:
+                assert df_reader.chunksize == chunksize
+                for i_chunk in [0, 1, 2]:
+                    # Read a couple of chunks and make sure we see them
+                    # properly.
+                    df = df_reader.get_chunk()
+                    assert isinstance(df, DataFrame)
+                    assert not df.empty
+                    true_df = tips_df.iloc[
+                        chunksize * i_chunk : chunksize * (i_chunk + 1)
+                    ]
+                    tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_chunked_python(
+        self, s3_public_bucket_with_data, tips_df, s3so
+    ):
+        # Read with a chunksize using the Python parser
+        chunksize = 5
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            with read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                chunksize=chunksize,
+                compression=comp,
+                engine="python",
+                storage_options=s3so,
+            ) as df_reader:
+                assert df_reader.chunksize == chunksize
+                for i_chunk in [0, 1, 2]:
+                    # Read a couple of chunks and make sure we see them properly.
+                    df = df_reader.get_chunk()
+                    assert isinstance(df, DataFrame)
+                    assert not df.empty
+                    true_df = tips_df.iloc[
+                        chunksize * i_chunk : chunksize * (i_chunk + 1)
+                    ]
+                    tm.assert_frame_equal(true_df, df)
+
+    def test_parse_public_s3_bucket_python(
+        self, s3_public_bucket_with_data, tips_df, s3so
+    ):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                engine="python",
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_infer_s3_compression(self, s3_public_bucket_with_data, tips_df, s3so):
+        for ext in ["", ".gz", ".bz2"]:
+            df = read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                engine="python",
+                compression="infer",
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(df, tips_df)
+
+    def test_parse_public_s3_bucket_nrows_python(
+        self, s3_public_bucket_with_data, tips_df, s3so
+    ):
+        for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]:
+            df = read_csv(
+                f"s3://{s3_public_bucket_with_data.name}/tips.csv" + ext,
+                engine="python",
+                nrows=10,
+                compression=comp,
+                storage_options=s3so,
+            )
+            assert isinstance(df, DataFrame)
+            assert not df.empty
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
+
+    def test_read_s3_fails(self, s3so):
+        msg = "The specified bucket does not exist"
+        with pytest.raises(OSError, match=msg):
+            read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
+
+    def test_read_s3_fails_private(self, s3_private_bucket, s3so):
+        msg = "The specified bucket does not exist"
+        # Receive a permission error when trying to read a private bucket.
+        # It's irrelevant here that this isn't actually a table.
+        with pytest.raises(OSError, match=msg):
+            read_csv(f"s3://{s3_private_bucket.name}/file.csv")
+
+    @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)
+    def test_write_s3_csv_fails(self, tips_df, s3so):
+        # GH 32486
+        # Attempting to write to an invalid S3 path should raise
+        import botocore
+
+        # GH 34087
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
+        # Catch a ClientError since AWS Service Errors are defined dynamically
+        error = (FileNotFoundError, botocore.exceptions.ClientError)
+
+        with pytest.raises(error, match="The specified bucket does not exist"):
+            tips_df.to_csv(
+                "s3://an_s3_bucket_data_doesnt_exit/not_real.csv", storage_options=s3so
+            )
+
+    @pytest.mark.xfail(reason="GH#39155 s3fs upgrade", strict=False)
+    def test_write_s3_parquet_fails(self, tips_df, s3so):
+        # GH 27679
+        # Attempting to write to an invalid S3 path should raise
+        pytest.importorskip("pyarrow")
+        import botocore
+
+        # GH 34087
+        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html
+        # Catch a ClientError since AWS Service Errors are defined dynamically
+        error = (FileNotFoundError, botocore.exceptions.ClientError)
+
+        with pytest.raises(error, match="The specified bucket does not exist"):
+            tips_df.to_parquet(
+                "s3://an_s3_bucket_data_doesnt_exit/not_real.parquet",
+                storage_options=s3so,
+            )
+
+    @pytest.mark.single_cpu
+    def test_read_csv_handles_boto_s3_object(
+        self, s3_public_bucket_with_data, tips_file
+    ):
+        # see gh-16135
+
+        s3_object = s3_public_bucket_with_data.Object("tips.csv")
+
+        with BytesIO(s3_object.get()["Body"].read()) as buffer:
+            result = read_csv(buffer, encoding="utf8")
+        assert isinstance(result, DataFrame)
+        assert not result.empty
+
+        expected = read_csv(tips_file)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.single_cpu
+    def test_read_csv_chunked_download(self, s3_public_bucket, caplog, s3so):
+        # 8 MB, S3FS uses 5MB chunks
+        df = DataFrame(np.zeros((100000, 4)), columns=list("abcd"))
+        with BytesIO(df.to_csv().encode("utf-8")) as buf:
+            s3_public_bucket.put_object(Key="large-file.csv", Body=buf)
+            uri = f"{s3_public_bucket.name}/large-file.csv"
+            match_re = re.compile(rf"^Fetch: {uri}, 0-(?P<stop>\d+)$")
+            with caplog.at_level(logging.DEBUG, logger="s3fs"):
+                read_csv(
+                    f"s3://{uri}",
+                    nrows=5,
+                    storage_options=s3so,
+                )
+                for log in caplog.messages:
+                    if match := re.match(match_re, log):
+                        # Less than 8 MB
+                        assert int(match.group("stop")) < 8000000
+
+    def test_read_s3_with_hash_in_key(self, s3_public_bucket_with_data, tips_df, s3so):
+        # GH 25945
+        result = read_csv(
+            f"s3://{s3_public_bucket_with_data.name}/tips#1.csv", storage_options=s3so
+        )
+        tm.assert_frame_equal(tips_df, result)
+
+    def test_read_feather_s3_file_path(
+        self, s3_public_bucket_with_data, feather_file, s3so
+    ):
+        # GH 29055
+        pytest.importorskip("pyarrow")
+        expected = read_feather(feather_file)
+        res = read_feather(
+            f"s3://{s3_public_bucket_with_data.name}/simple_dataset.feather",
+            storage_options=s3so,
+        )
+        tm.assert_frame_equal(expected, res)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_parse_dates.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_parse_dates.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_python_parser_only.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_python_parser_only.py
@ -0,0 +1,566 @@
+"""
+Tests that apply specifically to the Python parser. Unless specifically
+stated as a Python-specific issue, the goal is to eventually move as many of
+these tests out of this module as soon as the C parser can accept further
+arguments when parsing.
+"""
+from __future__ import annotations
+
+import csv
+from io import (
+    BytesIO,
+    StringIO,
+    TextIOWrapper,
+)
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pytest
+
+from pandas.errors import (
+    ParserError,
+    ParserWarning,
+)
+
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+
+def test_default_separator(python_parser_only):
+    # see gh-17333
+    #
+    # csv.Sniffer in Python treats "o" as separator.
+    data = "aob\n1o2\n3o4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1, 3], "b": [2, 4]})
+
+    result = parser.read_csv(StringIO(data), sep=None)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("skipfooter", ["foo", 1.5, True])
+def test_invalid_skipfooter_non_int(python_parser_only, skipfooter):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter must be an integer"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+
+def test_invalid_skipfooter_negative(python_parser_only):
+    # see gh-15925 (comment)
+    data = "a\n1\n2"
+    parser = python_parser_only
+    msg = "skipfooter cannot be negative"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), skipfooter=-1)
+
+
+@pytest.mark.parametrize("kwargs", [{"sep": None}, {"delimiter": "|"}])
+def test_sniff_delimiter(python_parser_only, kwargs):
+    data = """index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), index_col=0, **kwargs)
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_sniff_delimiter_comment(python_parser_only):
+    data = """# comment line
+index|A|B|C
+# comment line
+foo|1|2|3 # ignore | this
+bar|4|5|6
+baz|7|8|9
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), index_col=0, sep=None, comment="#")
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_sniff_delimiter_encoding(python_parser_only, encoding):
+    parser = python_parser_only
+    data = """ignore this
+ignore this too
+index|A|B|C
+foo|1|2|3
+bar|4|5|6
+baz|7|8|9
+"""
+
+    if encoding is not None:
+        data = data.encode(encoding)
+        data = BytesIO(data)
+        data = TextIOWrapper(data, encoding=encoding)
+    else:
+        data = StringIO(data)
+
+    result = parser.read_csv(data, index_col=0, sep=None, skiprows=2, encoding=encoding)
+    expected = DataFrame(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+        columns=["A", "B", "C"],
+        index=Index(["foo", "bar", "baz"], name="index"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_single_line(python_parser_only):
+    # see gh-6607: sniff separator
+    parser = python_parser_only
+    result = parser.read_csv(StringIO("1,2"), names=["a", "b"], header=None, sep=None)
+
+    expected = DataFrame({"a": [1], "b": [2]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("kwargs", [{"skipfooter": 2}, {"nrows": 3}])
+def test_skipfooter(python_parser_only, kwargs):
+    # see gh-6607
+    data = """A,B,C
+1,2,3
+4,5,6
+7,8,9
+want to skip this
+also also skip this
+"""
+    parser = python_parser_only
+    result = parser.read_csv(StringIO(data), **kwargs)
+
+    expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "compression,klass", [("gzip", "GzipFile"), ("bz2", "BZ2File")]
+)
+def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
+    # see gh-6607
+    parser = python_parser_only
+
+    with open(csv1, "rb") as f:
+        data = f.read()
+
+    data = data.replace(b",", b"::")
+    expected = parser.read_csv(csv1)
+
+    module = pytest.importorskip(compression)
+    klass = getattr(module, klass)
+
+    with tm.ensure_clean() as path:
+        with klass(path, mode="wb") as tmp:
+            tmp.write(data)
+
+        result = parser.read_csv(path, sep="::", compression=compression)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index(python_parser_only):
+    # see gh-6607
+    data = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+    parser = python_parser_only
+
+    expected = DataFrame(
+        [
+            [-0.5109, -2.3358, -0.4645, 0.05076, 0.3640],
+            [0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
+            [-0.6662, -0.5243, -0.3580, 0.89145, 2.5838],
+        ],
+        columns=["A", "B", "C", "D", "E"],
+        index=MultiIndex.from_tuples(
+            [("a", "b", 10.0032, 5), ("a", "q", 20, 4), ("x", "q", 30, 3)],
+            names=["one", "two", "three", "four"],
+        ),
+    )
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_csv_buglet_4x_multi_index2(python_parser_only):
+    # see gh-6893
+    data = "      A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9"
+    parser = python_parser_only
+
+    expected = DataFrame.from_records(
+        [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)],
+        columns=list("abcABC"),
+        index=list("abc"),
+    )
+    result = parser.read_csv(StringIO(data), sep=r"\s+")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("add_footer", [True, False])
+def test_skipfooter_with_decimal(python_parser_only, add_footer):
+    # see gh-6971
+    data = "1#2\n3#4"
+    parser = python_parser_only
+    expected = DataFrame({"a": [1.2, 3.4]})
+
+    if add_footer:
+        # The stray footer line should not mess with the
+        # casting of the first two lines if we skip it.
+        kwargs = {"skipfooter": 1}
+        data += "\nFooter"
+    else:
+        kwargs = {}
+
+    result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "sep", ["::", "#####", "!!!", "123", "#1!c5", "%!c!d", "@@#4:2", "_!pd#_"]
+)
+@pytest.mark.parametrize(
+    "encoding", ["utf-16", "utf-16-be", "utf-16-le", "utf-32", "cp037"]
+)
+def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding):
+    # see gh-3404
+    expected = DataFrame({"a": [1], "b": [2]})
+    parser = python_parser_only
+
+    data = "1" + sep + "2"
+    encoded_data = data.encode(encoding)
+
+    result = parser.read_csv(
+        BytesIO(encoded_data), sep=sep, names=["a", "b"], encoding=encoding
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+def test_multi_char_sep_quotes(python_parser_only, quoting):
+    # see gh-13374
+    kwargs = {"sep": ",,"}
+    parser = python_parser_only
+
+    data = 'a,,b\n1,,a\n2,,"2,,b"'
+
+    if quoting == csv.QUOTE_NONE:
+        msg = "Expected 2 fields in line 3, saw 3"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
+    else:
+        msg = "ignored when a multi-char delimiter is used"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), quoting=quoting, **kwargs)
+
+
+def test_none_delimiter(python_parser_only):
+    # see gh-13374 and gh-17465
+    parser = python_parser_only
+    data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9"
+    expected = DataFrame({"a": [0, 7], "b": [1, 8], "c": [2, 9]})
+
+    # We expect the third line in the data to be
+    # skipped because it is malformed, but we do
+    # not expect any errors to occur.
+    with tm.assert_produces_warning(
+        ParserWarning, match="Skipping line 3", check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data), header=0, sep=None, on_bad_lines="warn"
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data", ['a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'])
+@pytest.mark.parametrize("skipfooter", [0, 1])
+def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
+    # see gh-13879 and gh-15910
+    parser = python_parser_only
+    if skipfooter:
+        msg = "parsing errors in the skipped footer rows"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
+    else:
+        msg = "unexpected end of data|expected after"
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data), skipfooter=skipfooter)
+
+
+def test_malformed_skipfooter(python_parser_only):
+    parser = python_parser_only
+    data = """ignore
+A,B,C
+1,2,3 # comment
+1,2,3,4,5
+2,3,4
+footer
+"""
+    msg = "Expected 3 fields in line 4, saw 5"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+def test_python_engine_file_no_next(python_parser_only):
+    parser = python_parser_only
+
+    class NoNextBuffer:
+        def __init__(self, csv_data) -> None:
+            self.data = csv_data
+
+        def __iter__(self) -> Iterator:
+            return self.data.__iter__()
+
+        def read(self):
+            return self.data
+
+        def readline(self):
+            return self.data
+
+    parser.read_csv(NoNextBuffer("a\n1"))
+
+
+@pytest.mark.parametrize("bad_line_func", [lambda x: ["2", "3"], lambda x: x[:2]])
+def test_on_bad_lines_callable(python_parser_only, bad_line_func):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_write_to_external_list(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    lst = []
+
+    def bad_line_func(bad_line: list[str]) -> list[str]:
+        lst.append(bad_line)
+        return ["2", "3"]
+
+    result = parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+    assert lst == [["2", "3", "4", "5", "6"]]
+
+
+@pytest.mark.parametrize("bad_line_func", [lambda x: ["foo", "bar"], lambda x: x[:2]])
+@pytest.mark.parametrize("sep", [",", "111"])
+def test_on_bad_lines_callable_iterator_true(python_parser_only, bad_line_func, sep):
+    # GH 5686
+    # iterator=True has a separate code path than iterator=False
+    parser = python_parser_only
+    data = f"""
+0{sep}1
+hi{sep}there
+foo{sep}bar{sep}baz
+good{sep}bye
+"""
+    bad_sio = StringIO(data)
+    result_iter = parser.read_csv(
+        bad_sio, on_bad_lines=bad_line_func, chunksize=1, iterator=True, sep=sep
+    )
+    expecteds = [
+        {"0": "hi", "1": "there"},
+        {"0": "foo", "1": "bar"},
+        {"0": "good", "1": "bye"},
+    ]
+    for i, (result, expected) in enumerate(zip(result_iter, expecteds)):
+        expected = DataFrame(expected, index=range(i, i + 1))
+        tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_dont_swallow_errors(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+    msg = "This function is buggy."
+
+    def bad_line_func(bad_line):
+        raise ValueError(msg)
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(bad_sio, on_bad_lines=bad_line_func)
+
+
+def test_on_bad_lines_callable_not_expected_length(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+
+    result = parser.read_csv_check_warnings(
+        ParserWarning, "Length of header or names", bad_sio, on_bad_lines=lambda x: x
+    )
+    expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_callable_returns_none(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2
+2,3,4,5,6
+3,4
+"""
+    bad_sio = StringIO(data)
+
+    result = parser.read_csv(bad_sio, on_bad_lines=lambda x: None)
+    expected = DataFrame({"a": [1, 3], "b": [2, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_on_bad_lines_index_col_inferred(python_parser_only):
+    # GH 5686
+    parser = python_parser_only
+    data = """a,b
+1,2,3
+4,5,6
+"""
+    bad_sio = StringIO(data)
+
+    result = parser.read_csv(bad_sio, on_bad_lines=lambda x: ["99", "99"])
+    expected = DataFrame({"a": [2, 5], "b": [3, 6]}, index=[1, 4])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_index_col_false_and_header_none(python_parser_only):
+    # GH#46955
+    parser = python_parser_only
+    data = """
+0.5,0.03
+0.1,0.2,0.3,2
+"""
+    result = parser.read_csv_check_warnings(
+        ParserWarning,
+        "Length of header",
+        StringIO(data),
+        sep=",",
+        header=None,
+        index_col=False,
+    )
+    expected = DataFrame({0: [0.5, 0.1], 1: [0.03, 0.2]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_header_int_do_not_infer_multiindex_names_on_different_line(python_parser_only):
+    # GH#46569
+    parser = python_parser_only
+    data = StringIO("a\na,b\nc,d,e\nf,g,h")
+    result = parser.read_csv_check_warnings(
+        ParserWarning, "Length of header", data, engine="python", index_col=False
+    )
+    expected = DataFrame({"a": ["a", "c", "f"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype", [{"a": object}, {"a": str, "b": np.int64, "c": np.int64}]
+)
+def test_no_thousand_convert_with_dot_for_non_numeric_cols(python_parser_only, dtype):
+    # GH#50270
+    parser = python_parser_only
+    data = """\
+a;b;c
+0000.7995;16.000;0
+3.03.001.00514;0;4.000
+4923.600.041;23.000;131"""
+    result = parser.read_csv(
+        StringIO(data),
+        sep=";",
+        dtype=dtype,
+        thousands=".",
+    )
+    expected = DataFrame(
+        {
+            "a": ["0000.7995", "3.03.001.00514", "4923.600.041"],
+            "b": [16000, 0, 23000],
+            "c": [0, 4000, 131],
+        }
+    )
+    if dtype["a"] == object:
+        expected["a"] = expected["a"].astype(object)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype,expected",
+    [
+        (
+            {"a": str, "b": np.float64, "c": np.int64},
+            DataFrame(
+                {
+                    "b": [16000.1, 0, 23000],
+                    "c": [0, 4001, 131],
+                }
+            ),
+        ),
+        (
+            str,
+            DataFrame(
+                {
+                    "b": ["16,000.1", "0", "23,000"],
+                    "c": ["0", "4,001", "131"],
+                }
+            ),
+        ),
+    ],
+)
+def test_no_thousand_convert_for_non_numeric_cols(python_parser_only, dtype, expected):
+    # GH#50270
+    parser = python_parser_only
+    data = """a;b;c
+0000,7995;16,000.1;0
+3,03,001,00514;0;4,001
+4923,600,041;23,000;131
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        sep=";",
+        dtype=dtype,
+        thousands=",",
+    )
+    expected.insert(0, "a", ["0000,7995", "3,03,001,00514", "4923,600,041"])
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_quoting.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_quoting.py
@ -0,0 +1,183 @@
+"""
+Tests that quoting specifications are properly handled
+during parsing for all of the parsers defined in parsers.py
+"""
+
+import csv
+from io import StringIO
+
+import pytest
+
+from pandas.compat import PY311
+from pandas.errors import ParserError
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        ({"quotechar": "foo"}, '"quotechar" must be a(n)? 1-character string'),
+        (
+            {"quotechar": None, "quoting": csv.QUOTE_MINIMAL},
+            "quotechar must be set if quoting enabled",
+        ),
+        ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'),
+    ],
+)
+@skip_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
+def test_bad_quote_char(all_parsers, kwargs, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), **kwargs)
+
+
+@pytest.mark.parametrize(
+    "quoting,msg",
+    [
+        ("foo", '"quoting" must be an integer|Argument'),
+        (10, 'bad "quoting" value'),  # quoting must be in the range [0, 3]
+    ],
+)
+@xfail_pyarrow  # ValueError: The 'quoting' option is not supported
+def test_bad_quoting(all_parsers, quoting, msg):
+    data = "1,2,3"
+    parser = all_parsers
+
+    with pytest.raises(TypeError, match=msg):
+        parser.read_csv(StringIO(data), quoting=quoting)
+
+
+def test_quote_char_basic(all_parsers):
+    parser = all_parsers
+    data = 'a,b,c\n1,2,"cat"'
+    expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"])
+
+    result = parser.read_csv(StringIO(data), quotechar='"')
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"])
+def test_quote_char_various(all_parsers, quote_char):
+    parser = all_parsers
+    expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"])
+
+    data = 'a,b,c\n1,2,"cat"'
+    new_data = data.replace('"', quote_char)
+
+    result = parser.read_csv(StringIO(new_data), quotechar=quote_char)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: The 'quoting' option is not supported
+@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE])
+@pytest.mark.parametrize("quote_char", ["", None])
+def test_null_quote_char(all_parsers, quoting, quote_char):
+    kwargs = {"quotechar": quote_char, "quoting": quoting}
+    data = "a,b,c\n1,2,3"
+    parser = all_parsers
+
+    if quoting != csv.QUOTE_NONE:
+        # Sanity checking.
+        msg = (
+            '"quotechar" must be a 1-character string'
+            if PY311 and all_parsers.engine == "python" and quote_char == ""
+            else "quotechar must be set if quoting enabled"
+        )
+
+        with pytest.raises(TypeError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    elif not (PY311 and all_parsers.engine == "python"):
+        # Python 3.11+ doesn't support null/blank quote chars in their csv parsers
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,exp_data",
+    [
+        ({}, [[1, 2, "foo"]]),  # Test default.
+        # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+        ({"quotechar": '"', "quoting": csv.QUOTE_MINIMAL}, [[1, 2, "foo"]]),
+        # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading.
+        ({"quotechar": '"', "quoting": csv.QUOTE_ALL}, [[1, 2, "foo"]]),
+        # QUOTE_NONE tells the reader to do no special handling
+        # of quote characters and leave them alone.
+        ({"quotechar": '"', "quoting": csv.QUOTE_NONE}, [[1, 2, '"foo"']]),
+        # QUOTE_NONNUMERIC tells the reader to cast
+        # all non-quoted fields to float
+        ({"quotechar": '"', "quoting": csv.QUOTE_NONNUMERIC}, [[1.0, 2.0, "foo"]]),
+    ],
+)
+@xfail_pyarrow  # ValueError: The 'quoting' option is not supported
+def test_quoting_various(all_parsers, kwargs, exp_data):
+    data = '1,2,"foo"'
+    parser = all_parsers
+    columns = ["a", "b", "c"]
+
+    result = parser.read_csv(StringIO(data), names=columns, **kwargs)
+    expected = DataFrame(exp_data, columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "doublequote,exp_data", [(True, [[3, '4 " 5']]), (False, [[3, '4 " 5"']])]
+)
+def test_double_quote(all_parsers, doublequote, exp_data, request):
+    parser = all_parsers
+    data = 'a,b\n3,"4 "" 5"'
+
+    if parser.engine == "pyarrow" and not doublequote:
+        mark = pytest.mark.xfail(reason="Mismatched result")
+        request.applymarker(mark)
+
+    result = parser.read_csv(StringIO(data), quotechar='"', doublequote=doublequote)
+    expected = DataFrame(exp_data, columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("quotechar", ['"', "\u0001"])
+def test_quotechar_unicode(all_parsers, quotechar):
+    # see gh-14477
+    data = "a\n1"
+    parser = all_parsers
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), quotechar=quotechar)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("balanced", [True, False])
+def test_unbalanced_quoting(all_parsers, balanced, request):
+    # see gh-22789.
+    parser = all_parsers
+    data = 'a,b,c\n1,2,"3'
+
+    if parser.engine == "pyarrow" and not balanced:
+        mark = pytest.mark.xfail(reason="Mismatched result")
+        request.applymarker(mark)
+
+    if balanced:
+        # Re-balance the quoting and read in without errors.
+        expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+        result = parser.read_csv(StringIO(data + '"'))
+        tm.assert_frame_equal(result, expected)
+    else:
+        msg = (
+            "EOF inside string starting at row 1"
+            if parser.engine == "c"
+            else "unexpected end of data"
+        )
+
+        with pytest.raises(ParserError, match=msg):
+            parser.read_csv(StringIO(data))
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_read_fwf.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_read_fwf.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_skiprows.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_skiprows.py
@ -0,0 +1,334 @@
+"""
+Tests that skipped rows are properly handled during
+parsing for all of the parsers defined in parsers.py
+"""
+
+from datetime import datetime
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import EmptyDataError
+
+from pandas import (
+    DataFrame,
+    Index,
+)
+import pandas._testing as tm
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+@pytest.mark.parametrize("skiprows", [list(range(6)), 6])
+def test_skip_rows_bug(all_parsers, skiprows):
+    # see gh-505
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+#foo,a,b,c
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    result = parser.read_csv(
+        StringIO(text), skiprows=skiprows, header=None, index_col=0, parse_dates=True
+    )
+    index = Index(
+        [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0
+    )
+
+    expected = DataFrame(
+        np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_deep_skip_rows(all_parsers):
+    # see gh-4382
+    parser = all_parsers
+    data = "a,b,c\n" + "\n".join(
+        [",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10)]
+    )
+    condensed_data = "a,b,c\n" + "\n".join(
+        [",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]]
+    )
+
+    result = parser.read_csv(StringIO(data), skiprows=[6, 8])
+    condensed_result = parser.read_csv(StringIO(condensed_data))
+    tm.assert_frame_equal(result, condensed_result)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame are different
+def test_skip_rows_blank(all_parsers):
+    # see gh-9832
+    parser = all_parsers
+    text = """#foo,a,b,c
+#foo,a,b,c
+
+#foo,a,b,c
+#foo,a,b,c
+
+1/1/2000,1.,2.,3.
+1/2/2000,4,5,6
+1/3/2000,7,8,9
+"""
+    data = parser.read_csv(
+        StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True
+    )
+    index = Index(
+        [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0
+    )
+
+    expected = DataFrame(
+        np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index
+    )
+    tm.assert_frame_equal(data, expected)
+
+
+@pytest.mark.parametrize(
+    "data,kwargs,expected",
+    [
+        (
+            """id,text,num_lines
+1,"line 11
+line 12",2
+2,"line 21
+line 22",2
+3,"line 31",1""",
+            {"skiprows": [1]},
+            DataFrame(
+                [[2, "line 21\nline 22", 2], [3, "line 31", 1]],
+                columns=["id", "text", "num_lines"],
+            ),
+        ),
+        (
+            "a,b,c\n~a\n b~,~e\n d~,~f\n f~\n1,2,~12\n 13\n 14~",
+            {"quotechar": "~", "skiprows": [2]},
+            DataFrame([["a\n b", "e\n d", "f\n f"]], columns=["a", "b", "c"]),
+        ),
+        (
+            (
+                "Text,url\n~example\n "
+                "sentence\n one~,url1\n~"
+                "example\n sentence\n two~,url2\n~"
+                "example\n sentence\n three~,url3"
+            ),
+            {"quotechar": "~", "skiprows": [1, 3]},
+            DataFrame([["example\n sentence\n two", "url2"]], columns=["Text", "url"]),
+        ),
+    ],
+)
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_row_with_newline(all_parsers, data, kwargs, expected):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_row_with_quote(all_parsers):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    data = """id,text,num_lines
+1,"line '11' line 12",2
+2,"line '21' line 22",2
+3,"line '31' line 32",1"""
+
+    exp_data = [[2, "line '21' line 22", 2], [3, "line '31' line 32", 1]]
+    expected = DataFrame(exp_data, columns=["id", "text", "num_lines"])
+
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,exp_data",
+    [
+        (
+            """id,text,num_lines
+1,"line \n'11' line 12",2
+2,"line \n'21' line 22",2
+3,"line \n'31' line 32",1""",
+            [[2, "line \n'21' line 22", 2], [3, "line \n'31' line 32", 1]],
+        ),
+        (
+            """id,text,num_lines
+1,"line '11\n' line 12",2
+2,"line '21\n' line 22",2
+3,"line '31\n' line 32",1""",
+            [[2, "line '21\n' line 22", 2], [3, "line '31\n' line 32", 1]],
+        ),
+        (
+            """id,text,num_lines
+1,"line '11\n' \r\tline 12",2
+2,"line '21\n' \r\tline 22",2
+3,"line '31\n' \r\tline 32",1""",
+            [[2, "line '21\n' \r\tline 22", 2], [3, "line '31\n' \r\tline 32", 1]],
+        ),
+    ],
+)
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
+    # see gh-12775 and gh-10911
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), skiprows=[1])
+
+    expected = DataFrame(exp_data, columns=["id", "text", "num_lines"])
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: The 'delim_whitespace' option is not supported
+@pytest.mark.parametrize(
+    "lineterminator", ["\n", "\r\n", "\r"]  # "LF"  # "CRLF"  # "CR"
+)
+def test_skiprows_lineterminator(all_parsers, lineterminator, request):
+    # see gh-9079
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "SMOSMANIA ThetaProbe-ML2X ",
+            "2007/01/01 01:00   0.2140 U M ",
+            "2007/01/01 02:00   0.2141 M O ",
+            "2007/01/01 04:00   0.2142 D M ",
+        ]
+    )
+    expected = DataFrame(
+        [
+            ["2007/01/01", "01:00", 0.2140, "U", "M"],
+            ["2007/01/01", "02:00", 0.2141, "M", "O"],
+            ["2007/01/01", "04:00", 0.2142, "D", "M"],
+        ],
+        columns=["date", "time", "var", "flag", "oflag"],
+    )
+
+    if parser.engine == "python" and lineterminator == "\r":
+        mark = pytest.mark.xfail(reason="'CR' not respect with the Python parser yet")
+        request.applymarker(mark)
+
+    data = data.replace("\n", lineterminator)
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data),
+            skiprows=1,
+            delim_whitespace=True,
+            names=["date", "time", "var", "flag", "oflag"],
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # AssertionError: DataFrame are different
+def test_skiprows_infield_quote(all_parsers):
+    # see gh-14459
+    parser = all_parsers
+    data = 'a"\nb"\na\n1'
+    expected = DataFrame({"a": [1]})
+
+    result = parser.read_csv(StringIO(data), skiprows=2)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        ({}, DataFrame({"1": [3, 5]})),
+        ({"header": 0, "names": ["foo"]}, DataFrame({"foo": [3, 5]})),
+    ],
+)
+def test_skip_rows_callable(all_parsers, kwargs, expected):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    result = parser.read_csv(StringIO(data), skiprows=lambda x: x % 2 == 0, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_rows_callable_not_in(all_parsers):
+    parser = all_parsers
+    data = "0,a\n1,b\n2,c\n3,d\n4,e"
+    expected = DataFrame([[1, "b"], [3, "d"]])
+
+    result = parser.read_csv(
+        StringIO(data), header=None, skiprows=lambda x: x not in [1, 3]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_rows_skip_all(all_parsers):
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+    msg = "No columns to parse from file"
+
+    with pytest.raises(EmptyDataError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: True)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_rows_bad_callable(all_parsers):
+    msg = "by zero"
+    parser = all_parsers
+    data = "a\n1\n2\n3\n4\n5"
+
+    with pytest.raises(ZeroDivisionError, match=msg):
+        parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0)
+
+
+@xfail_pyarrow  # ValueError: skiprows argument must be an integer
+def test_skip_rows_and_n_rows(all_parsers):
+    # GH#44021
+    data = """a,b
+1,a
+2,b
+3,c
+4,d
+5,e
+6,f
+7,g
+8,h
+"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), nrows=5, skiprows=[2, 4, 6])
+    expected = DataFrame({"a": [1, 3, 5, 7, 8], "b": ["a", "c", "e", "g", "h"]})
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_skip_rows_with_chunks(all_parsers):
+    # GH 55677
+    data = """col_a
+10
+20
+30
+40
+50
+60
+70
+80
+90
+100
+"""
+    parser = all_parsers
+    reader = parser.read_csv(
+        StringIO(data), engine=parser, skiprows=lambda x: x in [1, 4, 5], chunksize=4
+    )
+    df1 = next(reader)
+    df2 = next(reader)
+
+    tm.assert_frame_equal(df1, DataFrame({"col_a": [20, 30, 60, 70]}))
+    tm.assert_frame_equal(df2, DataFrame({"col_a": [80, 90, 100]}, index=[4, 5, 6]))
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_textreader.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_textreader.py
@ -0,0 +1,342 @@
+"""
+Tests the TextReader class in parsers.pyx, which
+is integral to the C engine in parsers.py
+"""
+from io import (
+    BytesIO,
+    StringIO,
+)
+
+import numpy as np
+import pytest
+
+import pandas._libs.parsers as parser
+from pandas._libs.parsers import TextReader
+from pandas.errors import ParserWarning
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.parsers import (
+    TextFileReader,
+    read_csv,
+)
+from pandas.io.parsers.c_parser_wrapper import ensure_dtype_objs
+
+
+class TestTextReader:
+    @pytest.fixture
+    def csv_path(self, datapath):
+        return datapath("io", "data", "csv", "test1.csv")
+
+    def test_file_handle(self, csv_path):
+        with open(csv_path, "rb") as f:
+            reader = TextReader(f)
+            reader.read()
+
+    def test_file_handle_mmap(self, csv_path):
+        # this was never using memory_map=True
+        with open(csv_path, "rb") as f:
+            reader = TextReader(f, header=None)
+            reader.read()
+
+    def test_StringIO(self, csv_path):
+        with open(csv_path, "rb") as f:
+            text = f.read()
+        src = BytesIO(text)
+        reader = TextReader(src, header=None)
+        reader.read()
+
+    def test_string_factorize(self):
+        # should this be optional?
+        data = "a\nb\na\nb\na"
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+        assert len(set(map(id, result[0]))) == 2
+
+    def test_skipinitialspace(self):
+        data = "a,   b\na,   b\na,   b\na,   b"
+
+        reader = TextReader(StringIO(data), skipinitialspace=True, header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(
+            result[0], np.array(["a", "a", "a", "a"], dtype=np.object_)
+        )
+        tm.assert_numpy_array_equal(
+            result[1], np.array(["b", "b", "b", "b"], dtype=np.object_)
+        )
+
+    def test_parse_booleans(self):
+        data = "True\nFalse\nTrue\nTrue"
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        assert result[0].dtype == np.bool_
+
+    def test_delimit_whitespace(self):
+        data = 'a  b\na\t\t "b"\n"a"\t \t b'
+
+        reader = TextReader(StringIO(data), delim_whitespace=True, header=None)
+        result = reader.read()
+
+        tm.assert_numpy_array_equal(
+            result[0], np.array(["a", "a", "a"], dtype=np.object_)
+        )
+        tm.assert_numpy_array_equal(
+            result[1], np.array(["b", "b", "b"], dtype=np.object_)
+        )
+
+    def test_embedded_newline(self):
+        data = 'a\n"hello\nthere"\nthis'
+
+        reader = TextReader(StringIO(data), header=None)
+        result = reader.read()
+
+        expected = np.array(["a", "hello\nthere", "this"], dtype=np.object_)
+        tm.assert_numpy_array_equal(result[0], expected)
+
+    def test_euro_decimal(self):
+        data = "12345,67\n345,678"
+
+        reader = TextReader(StringIO(data), delimiter=":", decimal=",", header=None)
+        result = reader.read()
+
+        expected = np.array([12345.67, 345.678])
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands(self):
+        data = "123,456\n12,500"
+
+        reader = TextReader(StringIO(data), delimiter=":", thousands=",", header=None)
+        result = reader.read()
+
+        expected = np.array([123456, 12500], dtype=np.int64)
+        tm.assert_almost_equal(result[0], expected)
+
+    def test_integer_thousands_alt(self):
+        data = "123.456\n12.500"
+
+        reader = TextFileReader(
+            StringIO(data), delimiter=":", thousands=".", header=None
+        )
+        result = reader.read()
+
+        expected = DataFrame([123456, 12500])
+        tm.assert_frame_equal(result, expected)
+
+    def test_skip_bad_lines(self):
+        # too many lines, see #2430 for why
+        data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r"
+
+        reader = TextReader(StringIO(data), delimiter=":", header=None)
+        msg = r"Error tokenizing data\. C error: Expected 3 fields in line 4, saw 4"
+        with pytest.raises(parser.ParserError, match=msg):
+            reader.read()
+
+        reader = TextReader(
+            StringIO(data), delimiter=":", header=None, on_bad_lines=2  # Skip
+        )
+        result = reader.read()
+        expected = {
+            0: np.array(["a", "d", "g", "l"], dtype=object),
+            1: np.array(["b", "e", "h", "m"], dtype=object),
+            2: np.array(["c", "f", "i", "n"], dtype=object),
+        }
+        assert_array_dicts_equal(result, expected)
+
+        with tm.assert_produces_warning(ParserWarning, match="Skipping line"):
+            reader = TextReader(
+                StringIO(data), delimiter=":", header=None, on_bad_lines=1  # Warn
+            )
+            reader.read()
+
+    def test_header_not_enough_lines(self):
+        data = "skip this\nskip this\na,b,c\n1,2,3\n4,5,6"
+
+        reader = TextReader(StringIO(data), delimiter=",", header=2)
+        header = reader.header
+        expected = [["a", "b", "c"]]
+        assert header == expected
+
+        recs = reader.read()
+        expected = {
+            0: np.array([1, 4], dtype=np.int64),
+            1: np.array([2, 5], dtype=np.int64),
+            2: np.array([3, 6], dtype=np.int64),
+        }
+        assert_array_dicts_equal(recs, expected)
+
+    def test_escapechar(self):
+        data = '\\"hello world"\n\\"hello world"\n\\"hello world"'
+
+        reader = TextReader(StringIO(data), delimiter=",", header=None, escapechar="\\")
+        result = reader.read()
+        expected = {0: np.array(['"hello world"'] * 3, dtype=object)}
+        assert_array_dicts_equal(result, expected)
+
+    def test_eof_has_eol(self):
+        # handling of new line at EOF
+        pass
+
+    def test_na_substitution(self):
+        pass
+
+    def test_numpy_string_dtype(self):
+        data = """\
+a,1
+aa,2
+aaa,3
+aaaa,4
+aaaaa,5"""
+
+        def _make_reader(**kwds):
+            if "dtype" in kwds:
+                kwds["dtype"] = ensure_dtype_objs(kwds["dtype"])
+            return TextReader(StringIO(data), delimiter=",", header=None, **kwds)
+
+        reader = _make_reader(dtype="S5,i4")
+        result = reader.read()
+
+        assert result[0].dtype == "S5"
+
+        ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaaa"], dtype="S5")
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == "i4"
+
+        reader = _make_reader(dtype="S4")
+        result = reader.read()
+        assert result[0].dtype == "S4"
+        ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaa"], dtype="S4")
+        assert (result[0] == ex_values).all()
+        assert result[1].dtype == "S4"
+
+    def test_pass_dtype(self):
+        data = """\
+one,two
+1,a
+2,b
+3,c
+4,d"""
+
+        def _make_reader(**kwds):
+            if "dtype" in kwds:
+                kwds["dtype"] = ensure_dtype_objs(kwds["dtype"])
+            return TextReader(StringIO(data), delimiter=",", **kwds)
+
+        reader = _make_reader(dtype={"one": "u1", 1: "S1"})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "S1"
+
+        reader = _make_reader(dtype={"one": np.uint8, 1: object})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "O"
+
+        reader = _make_reader(dtype={"one": np.dtype("u1"), 1: np.dtype("O")})
+        result = reader.read()
+        assert result[0].dtype == "u1"
+        assert result[1].dtype == "O"
+
+    def test_usecols(self):
+        data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=",", **kwds)
+
+        reader = _make_reader(usecols=(1, 2))
+        result = reader.read()
+
+        exp = _make_reader().read()
+        assert len(result) == 2
+        assert (result[1] == exp[1]).all()
+        assert (result[2] == exp[2]).all()
+
+    @pytest.mark.parametrize(
+        "text, kwargs",
+        [
+            ("a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12", {"delimiter": ","}),
+            (
+                "a  b  c\r1  2  3\r4  5  6\r7  8  9\r10  11  12",
+                {"delim_whitespace": True},
+            ),
+            ("a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12", {"delimiter": ","}),
+            (
+                (
+                    "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r"
+                    "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r"
+                    ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0"
+                ),
+                {"delimiter": ","},
+            ),
+            ("A  B  C\r  2  3\r4  5  6", {"delim_whitespace": True}),
+            ("A B C\r2 3\r4 5 6", {"delim_whitespace": True}),
+        ],
+    )
+    def test_cr_delimited(self, text, kwargs):
+        nice_text = text.replace("\r", "\r\n")
+        result = TextReader(StringIO(text), **kwargs).read()
+        expected = TextReader(StringIO(nice_text), **kwargs).read()
+        assert_array_dicts_equal(result, expected)
+
+    def test_empty_field_eof(self):
+        data = "a,b,c\n1,2,3\n4,,"
+
+        result = TextReader(StringIO(data), delimiter=",").read()
+
+        expected = {
+            0: np.array([1, 4], dtype=np.int64),
+            1: np.array(["2", ""], dtype=object),
+            2: np.array(["3", ""], dtype=object),
+        }
+        assert_array_dicts_equal(result, expected)
+
+    @pytest.mark.parametrize("repeat", range(10))
+    def test_empty_field_eof_mem_access_bug(self, repeat):
+        # GH5664
+        a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"])
+        b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1])
+        c = DataFrame(
+            [
+                [1, 2, 3, 4],
+                [6, np.nan, np.nan, np.nan],
+                [8, 9, 10, 11],
+                [13, 14, np.nan, np.nan],
+            ],
+            columns=list("abcd"),
+            index=[0, 5, 7, 12],
+        )
+
+        df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c")
+        tm.assert_frame_equal(df, a)
+
+        df = read_csv(
+            StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c"
+        )
+        tm.assert_frame_equal(df, b)
+
+        df = read_csv(
+            StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"),
+            names=list("abcd"),
+            engine="c",
+        )
+        tm.assert_frame_equal(df, c)
+
+    def test_empty_csv_input(self):
+        # GH14867
+        with read_csv(
+            StringIO(), chunksize=20, header=None, names=["a", "b", "c"]
+        ) as df:
+            assert isinstance(df, TextFileReader)
+
+
+def assert_array_dicts_equal(left, right):
+    for k, v in left.items():
+        tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k]))
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_unsupported.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_unsupported.py
@ -0,0 +1,226 @@
+"""
+Tests that features that are currently unsupported in
+either the Python or C parser are actually enforced
+and are clearly communicated to the user.
+
+Ultimately, the goal is to remove test cases from this
+test suite as new feature support is added to the parsers.
+"""
+from io import StringIO
+import os
+from pathlib import Path
+
+import pytest
+
+from pandas.errors import ParserError
+
+import pandas._testing as tm
+
+from pandas.io.parsers import read_csv
+import pandas.io.parsers.readers as parsers
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
+def python_engine(request):
+    return request.param
+
+
+class TestUnsupportedFeatures:
+    def test_mangle_dupe_cols_false(self):
+        # see gh-12935
+        data = "a b c\n1 2 3"
+
+        for engine in ("c", "python"):
+            with pytest.raises(TypeError, match="unexpected keyword"):
+                read_csv(StringIO(data), engine=engine, mangle_dupe_cols=True)
+
+    def test_c_engine(self):
+        # see gh-6607
+        data = "a b c\n1 2 3"
+        msg = "does not support"
+
+        depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+
+        # specify C engine with unsupported options (raise)
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(FutureWarning, match=depr_msg):
+                read_csv(StringIO(data), engine="c", sep=None, delim_whitespace=False)
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", sep=r"\s")
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", sep="\t", quotechar=chr(128))
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), engine="c", skipfooter=1)
+
+        # specify C-unsupported options without python-unsupported options
+        with tm.assert_produces_warning((parsers.ParserWarning, FutureWarning)):
+            read_csv(StringIO(data), sep=None, delim_whitespace=False)
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep=r"\s")
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), sep="\t", quotechar=chr(128))
+        with tm.assert_produces_warning(parsers.ParserWarning):
+            read_csv(StringIO(data), skipfooter=1)
+
+        text = """                      A       B       C       D        E
+one two three   four
+a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
+a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
+x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
+        msg = "Error tokenizing data"
+
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), sep="\\s+")
+        with pytest.raises(ParserError, match=msg):
+            read_csv(StringIO(text), engine="c", sep="\\s+")
+
+        msg = "Only length-1 thousands markers supported"
+        data = """A|B|C
+1|2,334|5
+10|13|10.
+"""
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands=",,")
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), thousands="")
+
+        msg = "Only length-1 line terminators supported"
+        data = "a,b,c~~1,2,3~~4,5,6"
+        with pytest.raises(ValueError, match=msg):
+            read_csv(StringIO(data), lineterminator="~~")
+
+    def test_python_engine(self, python_engine):
+        from pandas.io.parsers.readers import _python_unsupported as py_unsupported
+
+        data = """1,2,3,,
+1,2,3,4,
+1,2,3,4,5
+1,2,,,
+1,2,3,4,"""
+
+        for default in py_unsupported:
+            msg = (
+                f"The {repr(default)} option is not "
+                f"supported with the {repr(python_engine)} engine"
+            )
+
+            kwargs = {default: object()}
+            with pytest.raises(ValueError, match=msg):
+                read_csv(StringIO(data), engine=python_engine, **kwargs)
+
+    def test_python_engine_file_no_iter(self, python_engine):
+        # see gh-16530
+        class NoNextBuffer:
+            def __init__(self, csv_data) -> None:
+                self.data = csv_data
+
+            def __next__(self):
+                return self.data.__next__()
+
+            def read(self):
+                return self.data
+
+            def readline(self):
+                return self.data
+
+        data = "a\n1"
+        msg = "'NoNextBuffer' object is not iterable|argument 1 must be an iterator"
+
+        with pytest.raises(TypeError, match=msg):
+            read_csv(NoNextBuffer(data), engine=python_engine)
+
+    def test_pyarrow_engine(self):
+        from pandas.io.parsers.readers import _pyarrow_unsupported as pa_unsupported
+
+        data = """1,2,3,,
+        1,2,3,4,
+        1,2,3,4,5
+        1,2,,,
+        1,2,3,4,"""
+
+        for default in pa_unsupported:
+            msg = (
+                f"The {repr(default)} option is not "
+                f"supported with the 'pyarrow' engine"
+            )
+            kwargs = {default: object()}
+            default_needs_bool = {"warn_bad_lines", "error_bad_lines"}
+            if default == "dialect":
+                kwargs[default] = "excel"  # test a random dialect
+            elif default in default_needs_bool:
+                kwargs[default] = True
+            elif default == "on_bad_lines":
+                kwargs[default] = "warn"
+
+            warn = None
+            depr_msg = None
+            if "delim_whitespace" in kwargs:
+                depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+                warn = FutureWarning
+            if "verbose" in kwargs:
+                depr_msg = "The 'verbose' keyword in pd.read_csv is deprecated"
+                warn = FutureWarning
+
+            with pytest.raises(ValueError, match=msg):
+                with tm.assert_produces_warning(warn, match=depr_msg):
+                    read_csv(StringIO(data), engine="pyarrow", **kwargs)
+
+    def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers):
+        # GH 5686
+        # GH 54643
+        sio = StringIO("a,b\n1,2")
+        bad_lines_func = lambda x: x
+        parser = all_parsers
+        if all_parsers.engine not in ["python", "pyarrow"]:
+            msg = (
+                "on_bad_line can only be a callable "
+                "function if engine='python' or 'pyarrow'"
+            )
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(sio, on_bad_lines=bad_lines_func)
+        else:
+            parser.read_csv(sio, on_bad_lines=bad_lines_func)
+
+
+def test_close_file_handle_on_invalid_usecols(all_parsers):
+    # GH 45384
+    parser = all_parsers
+
+    error = ValueError
+    if parser.engine == "pyarrow":
+        # Raises pyarrow.lib.ArrowKeyError
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    with tm.ensure_clean("test.csv") as fname:
+        Path(fname).write_text("col1,col2\na,b\n1,2", encoding="utf-8")
+        with tm.assert_produces_warning(False):
+            with pytest.raises(error, match="col3"):
+                parser.read_csv(fname, usecols=["col1", "col2", "col3"])
+        # unlink fails on windows if file handles still point to it
+        os.unlink(fname)
+
+
+def test_invalid_file_inputs(request, all_parsers):
+    # GH#45957
+    parser = all_parsers
+    if parser.engine == "python":
+        request.applymarker(
+            pytest.mark.xfail(reason=f"{parser.engine} engine supports lists.")
+        )
+
+    with pytest.raises(ValueError, match="Invalid"):
+        parser.read_csv([])
+
+
+def test_invalid_dtype_backend(all_parsers):
+    parser = all_parsers
+    msg = (
+        "dtype_backend numpy is invalid, only 'numpy_nullable' and "
+        "'pyarrow' are allowed."
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv("test", dtype_backend="numpy")
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/test_upcast.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/test_upcast.py
@ -0,0 +1,102 @@
+import numpy as np
+import pytest
+
+from pandas._libs.parsers import (
+    _maybe_upcast,
+    na_values,
+)
+
+import pandas as pd
+from pandas import NA
+import pandas._testing as tm
+from pandas.core.arrays import (
+    ArrowStringArray,
+    BooleanArray,
+    FloatingArray,
+    IntegerArray,
+    StringArray,
+)
+
+
+def test_maybe_upcast(any_real_numpy_dtype):
+    # GH#36712
+
+    dtype = np.dtype(any_real_numpy_dtype)
+    na_value = na_values[dtype]
+    arr = np.array([1, 2, na_value], dtype=dtype)
+    result = _maybe_upcast(arr, use_dtype_backend=True)
+
+    expected_mask = np.array([False, False, True])
+    if issubclass(dtype.type, np.integer):
+        expected = IntegerArray(arr, mask=expected_mask)
+    else:
+        expected = FloatingArray(arr, mask=expected_mask)
+
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_maybe_upcast_no_na(any_real_numpy_dtype):
+    # GH#36712
+    arr = np.array([1, 2, 3], dtype=any_real_numpy_dtype)
+    result = _maybe_upcast(arr, use_dtype_backend=True)
+
+    expected_mask = np.array([False, False, False])
+    if issubclass(np.dtype(any_real_numpy_dtype).type, np.integer):
+        expected = IntegerArray(arr, mask=expected_mask)
+    else:
+        expected = FloatingArray(arr, mask=expected_mask)
+
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_maybe_upcaste_bool():
+    # GH#36712
+    dtype = np.bool_
+    na_value = na_values[dtype]
+    arr = np.array([True, False, na_value], dtype="uint8").view(dtype)
+    result = _maybe_upcast(arr, use_dtype_backend=True)
+
+    expected_mask = np.array([False, False, True])
+    expected = BooleanArray(arr, mask=expected_mask)
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_maybe_upcaste_bool_no_nan():
+    # GH#36712
+    dtype = np.bool_
+    arr = np.array([True, False, False], dtype="uint8").view(dtype)
+    result = _maybe_upcast(arr, use_dtype_backend=True)
+
+    expected_mask = np.array([False, False, False])
+    expected = BooleanArray(arr, mask=expected_mask)
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_maybe_upcaste_all_nan():
+    # GH#36712
+    dtype = np.int64
+    na_value = na_values[dtype]
+    arr = np.array([na_value, na_value], dtype=dtype)
+    result = _maybe_upcast(arr, use_dtype_backend=True)
+
+    expected_mask = np.array([True, True])
+    expected = IntegerArray(arr, mask=expected_mask)
+    tm.assert_extension_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("val", [na_values[np.object_], "c"])
+def test_maybe_upcast_object(val, string_storage):
+    # GH#36712
+    pa = pytest.importorskip("pyarrow")
+
+    with pd.option_context("mode.string_storage", string_storage):
+        arr = np.array(["a", "b", val], dtype=np.object_)
+        result = _maybe_upcast(arr, use_dtype_backend=True)
+
+        if string_storage == "python":
+            exp_val = "c" if val == "c" else NA
+            expected = StringArray(np.array(["a", "b", exp_val], dtype=np.object_))
+        else:
+            exp_val = "c" if val == "c" else None
+            expected = ArrowStringArray(pa.array(["a", "b", exp_val]))
+        tm.assert_extension_array_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_parse_dates.py
@ -0,0 +1,194 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Timestamp,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+_msg_pyarrow_requires_names = (
+    "The pyarrow engine does not allow 'usecols' to be integer column "
+    "positions. Pass a list of string column names instead."
+)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+def test_usecols_with_parse_dates(all_parsers, usecols):
+    # see gh-9755
+    data = """a,b,c,d,e
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
+    parser = all_parsers
+    parse_dates = [[1, 2]]
+
+    depr_msg = (
+        "Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated"
+    )
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+    if parser.engine == "pyarrow":
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(
+                    StringIO(data), usecols=usecols, parse_dates=parse_dates
+                )
+        return
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data), usecols=usecols, parse_dates=parse_dates
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns
+def test_usecols_with_parse_dates2(all_parsers):
+    # see gh-13604
+    parser = all_parsers
+    data = """2008-02-07 09:40,1032.43
+2008-02-07 09:50,1042.54
+2008-02-07 10:00,1051.65"""
+
+    names = ["date", "values"]
+    usecols = names[:]
+    parse_dates = [0]
+
+    index = Index(
+        [
+            Timestamp("2008-02-07 09:40"),
+            Timestamp("2008-02-07 09:50"),
+            Timestamp("2008-02-07 10:00"),
+        ],
+        name="date",
+    )
+    cols = {"values": [1032.43, 1042.54, 1051.65]}
+    expected = DataFrame(cols, index=index)
+
+    result = parser.read_csv(
+        StringIO(data),
+        parse_dates=parse_dates,
+        index_col=0,
+        usecols=usecols,
+        header=None,
+        names=names,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates3(all_parsers):
+    # see gh-14792
+    parser = all_parsers
+    data = """a,b,c,d,e,f,g,h,i,j
+2016/09/21,1,1,2,3,4,5,6,7,8"""
+
+    usecols = list("abcdefghij")
+    parse_dates = [0]
+
+    cols = {
+        "a": Timestamp("2016-09-21").as_unit("ns"),
+        "b": [1],
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=usecols)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates4(all_parsers):
+    data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
+    usecols = list("abcdefghij")
+    parse_dates = [[0, 1]]
+    parser = all_parsers
+
+    cols = {
+        "a_b": "2016/09/21 1",
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
+
+    depr_msg = (
+        "Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated"
+    )
+    with tm.assert_produces_warning(
+        (FutureWarning, DeprecationWarning), match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data),
+            usecols=usecols,
+            parse_dates=parse_dates,
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+@pytest.mark.parametrize(
+    "names",
+    [
+        list("abcde"),  # Names span all columns in original data.
+        list("acd"),  # Names span only the selected columns.
+    ],
+)
+def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names, request):
+    # see gh-9755
+    s = """0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
+    parse_dates = [[1, 2]]
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and not (len(names) == 3 and usecols[0] == 0):
+        mark = pytest.mark.xfail(
+            reason="Length mismatch in some cases, UserWarning in other"
+        )
+        request.applymarker(mark)
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    depr_msg = (
+        "Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated"
+    )
+    with tm.assert_produces_warning(
+        (FutureWarning, DeprecationWarning), match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
+        )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_strings.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_strings.py
@ -0,0 +1,96 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+def test_usecols_with_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "AAA": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "BBB": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_single_byte_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """A,B,C,D
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "A": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "B": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
+def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+    _msg_validate_usecols_arg = (
+        "'usecols' must either be list-like "
+        "of all strings, all unicode, all "
+        "integers or a callable."
+    )
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
+def test_usecols_with_multi_byte_characters(all_parsers, usecols):
+    data = """あああ,いい,ううう,ええええ
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "あああ": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "いい": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/parser/usecols/test_usecols_basic.py
@ -0,0 +1,563 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserError
+
+from pandas import (
+    DataFrame,
+    Index,
+    array,
+)
+import pandas._testing as tm
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+_msg_pyarrow_requires_names = (
+    "The pyarrow engine does not allow 'usecols' to be integer column "
+    "positions. Pass a list of string column names instead."
+)
+
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning"
+)
+
+
+def test_raise_on_mixed_dtype_usecols(all_parsers):
+    # See gh-12678
+    data = """a,b,c
+        1000,2000,3000
+        4000,5000,6000
+        """
+    usecols = [0, "b", 2]
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
+def test_usecols(all_parsers, usecols, request):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    if parser.engine == "pyarrow" and isinstance(usecols[0], int):
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            parser.read_csv(StringIO(data), usecols=usecols)
+        return
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_names(all_parsers):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    names = ["foo", "bar"]
+
+    if parser.engine == "pyarrow":
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
+        return
+
+    result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
+)
+def test_usecols_relative_to_names(all_parsers, names, usecols):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    if parser.engine == "pyarrow" and not isinstance(usecols[0], int):
+        # ArrowKeyError: Column 'fb' in include_columns does not exist
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_relative_to_names2(all_parsers):
+    # see gh-5766
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+
+    result = parser.read_csv(
+        StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
+    )
+
+    expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+# regex mismatch: "Length mismatch: Expected axis has 1 elements"
+@xfail_pyarrow
+def test_usecols_name_length_conflict(all_parsers):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    msg = "Number of passed names did not match number of header fields in the file"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
+
+
+def test_usecols_single_string(all_parsers):
+    # see gh-20558
+    parser = all_parsers
+    data = """foo, bar, baz
+1000, 2000, 3000
+4000, 5000, 6000"""
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols="foo")
+
+
+@skip_pyarrow  # CSV parse error in one case, AttributeError in another
+@pytest.mark.parametrize(
+    "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
+)
+def test_usecols_index_col_false(all_parsers, data):
+    # see gh-9082
+    parser = all_parsers
+    usecols = ["a", "c", "d"]
+    expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", ["b", 0])
+@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
+def test_usecols_index_col_conflict(all_parsers, usecols, index_col, request):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+
+    if parser.engine == "pyarrow" and isinstance(usecols[0], int):
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
+        return
+
+    expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_conflict2(all_parsers):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+
+    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
+    expected = expected.set_index(["b", "c"])
+
+    result = parser.read_csv(
+        StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
+def test_usecols_implicit_index_col(all_parsers):
+    # see gh-2654
+    parser = all_parsers
+    data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
+
+    result = parser.read_csv(StringIO(data), usecols=["a", "b"])
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_middle(all_parsers):
+    # GH#9098
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+"""
+    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="c")
+    expected = DataFrame({"b": [2], "d": [4]}, index=Index([3], name="c"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_end(all_parsers):
+    # GH#9098
+    parser = all_parsers
+    data = """a,b,c,d
+1,2,3,4
+"""
+    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col="d")
+    expected = DataFrame({"b": [2], "c": [3]}, index=Index([4], name="d"))
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_regex_sep(all_parsers):
+    # see gh-2733
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+
+    if parser.engine == "pyarrow":
+        msg = "the 'pyarrow' engine does not support regex separators"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+        return
+
+    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_whitespace(all_parsers):
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+
+    depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            with tm.assert_produces_warning(
+                FutureWarning, match=depr_msg, check_stacklevel=False
+            ):
+                parser.read_csv(
+                    StringIO(data), delim_whitespace=True, usecols=("a", "b")
+                )
+        return
+
+    with tm.assert_produces_warning(
+        FutureWarning, match=depr_msg, check_stacklevel=False
+    ):
+        result = parser.read_csv(
+            StringIO(data), delim_whitespace=True, usecols=("a", "b")
+        )
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        # Column selection by index.
+        ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
+        # Column selection by name.
+        (
+            ["0", "1"],
+            DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),
+        ),
+    ],
+)
+def test_usecols_with_integer_like_header(all_parsers, usecols, expected, request):
+    parser = all_parsers
+    data = """2,0,1
+1000,2000,3000
+4000,5000,6000"""
+
+    if parser.engine == "pyarrow" and isinstance(usecols[0], int):
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            parser.read_csv(StringIO(data), usecols=usecols)
+        return
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow  # mismatched shape
+def test_empty_usecols(all_parsers):
+    data = "a,b,c\n1,2,3\n4,5,6"
+    expected = DataFrame(columns=Index([]))
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=set())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_np_array_usecols(all_parsers):
+    # see gh-12546
+    parser = all_parsers
+    data = "a,b,c\n1,2,3"
+    usecols = np.array(["a", "b"])
+
+    expected = DataFrame([[1, 2]], columns=usecols)
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        (
+            lambda x: x.upper() in ["AAA", "BBB", "DDD"],
+            DataFrame(
+                {
+                    "AaA": {
+                        0: 0.056674972999999997,
+                        1: 2.6132309819999997,
+                        2: 3.5689350380000002,
+                    },
+                    "bBb": {0: 8, 1: 2, 2: 7},
+                    "ddd": {0: "a", 1: "b", 2: "a"},
+                }
+            ),
+        ),
+        (lambda x: False, DataFrame(columns=Index([]))),
+    ],
+)
+def test_callable_usecols(all_parsers, usecols, expected):
+    # see gh-14154
+    data = """AaA,bBb,CCC,ddd
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine does not allow 'usecols' to be a callable"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), usecols=usecols)
+        return
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+# ArrowKeyError: Column 'fa' in include_columns does not exist in CSV file
+@skip_pyarrow
+@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
+def test_incomplete_first_row(all_parsers, usecols):
+    # see gh-6710
+    data = "1,2\n1,2,3"
+    parser = all_parsers
+    names = ["a", "b", "c"]
+    expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
+
+    result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
+@pytest.mark.parametrize(
+    "data,usecols,kwargs,expected",
+    [
+        # see gh-8985
+        (
+            "19,29,39\n" * 2 + "10,20,30,40",
+            [0, 1, 2],
+            {"header": None},
+            DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
+        ),
+        # see gh-9549
+        (
+            ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
+            ["A", "B", "C"],
+            {},
+            DataFrame(
+                {
+                    "A": [1, 3, 1, 1, 1, 5],
+                    "B": [2, 4, 2, 2, 2, 6],
+                    "C": [3, 5, 4, 3, 3, 7],
+                }
+            ),
+        ),
+    ],
+)
+def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
+    # see gh-8985
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,kwargs,expected,msg",
+    [
+        (
+            ["a", "b", "c", "d"],
+            {},
+            DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+            None,
+        ),
+        (
+            ["a", "b", "c", "f"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
+        (
+            ["a", "b", "f", "g"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
+        ),
+        # see gh-14671
+        (
+            None,
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
+            None,
+        ),
+        (
+            ["A", "B", "C", "f"],
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (
+            ["A", "B", "f"],
+            {"names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+    ],
+)
+def test_raises_on_usecols_names_mismatch(
+    all_parsers, usecols, kwargs, expected, msg, request
+):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    kwargs.update(usecols=usecols)
+    parser = all_parsers
+
+    if parser.engine == "pyarrow" and not (
+        usecols is not None and expected is not None
+    ):
+        # everything but the first case
+        # ArrowKeyError: Column 'f' in include_columns does not exist in CSV file
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    if expected is None:
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
+def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols, request):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    if parser.engine == "pyarrow":
+        if isinstance(usecols[0], int):
+            with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+                parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
+            return
+        # "pyarrow.lib.ArrowKeyError: Column 'A' in include_columns does not exist"
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
+
+    result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
+    expected = DataFrame({"A": [1, 5], "C": [3, 7]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("names", [None, ["a", "b"]])
+def test_usecols_indices_out_of_bounds(all_parsers, names):
+    # GH#25623 & GH 41130; enforced in 2.0
+    parser = all_parsers
+    data = """
+a,b
+1,2
+    """
+
+    err = ParserError
+    msg = "Defining usecols with out-of-bounds"
+    if parser.engine == "pyarrow":
+        err = ValueError
+        msg = _msg_pyarrow_requires_names
+
+    with pytest.raises(err, match=msg):
+        parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
+
+
+def test_usecols_additional_columns(all_parsers):
+    # GH#46997
+    parser = all_parsers
+    usecols = lambda header: header.strip() in ["a", "b", "c"]
+
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine does not allow 'usecols' to be a callable"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO("a,b\nx,y,z"), index_col=False, usecols=usecols)
+        return
+    result = parser.read_csv(StringIO("a,b\nx,y,z"), index_col=False, usecols=usecols)
+    expected = DataFrame({"a": ["x"], "b": "y"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_additional_columns_integer_columns(all_parsers):
+    # GH#46997
+    parser = all_parsers
+    usecols = lambda header: header.strip() in ["0", "1"]
+    if parser.engine == "pyarrow":
+        msg = "The pyarrow engine does not allow 'usecols' to be a callable"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO("0,1\nx,y,z"), index_col=False, usecols=usecols)
+        return
+    result = parser.read_csv(StringIO("0,1\nx,y,z"), index_col=False, usecols=usecols)
+    expected = DataFrame({"0": ["x"], "1": "y"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_dtype(all_parsers):
+    parser = all_parsers
+    data = """
+col1,col2,col3
+a,1,x
+b,2,y
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        usecols=["col1", "col2"],
+        dtype={"col1": "string", "col2": "uint8", "col3": "string"},
+    )
+    expected = DataFrame(
+        {"col1": array(["a", "b"]), "col2": np.array([1, 2], dtype="uint8")}
+    )
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/common.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/common.py
@ -0,0 +1,50 @@
+from collections.abc import Generator
+from contextlib import contextmanager
+import pathlib
+import tempfile
+
+import pytest
+
+from pandas.io.pytables import HDFStore
+
+tables = pytest.importorskip("tables")
+# set these parameters so we don't have file sharing
+tables.parameters.MAX_NUMEXPR_THREADS = 1
+tables.parameters.MAX_BLOSC_THREADS = 1
+tables.parameters.MAX_THREADS = 1
+
+
+def safe_close(store):
+    try:
+        if store is not None:
+            store.close()
+    except OSError:
+        pass
+
+
+# contextmanager to ensure the file cleanup
+@contextmanager
+def ensure_clean_store(
+    path, mode="a", complevel=None, complib=None, fletcher32=False
+) -> Generator[HDFStore, None, None]:
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tmp_path = pathlib.Path(tmpdirname, path)
+        with HDFStore(
+            tmp_path,
+            mode=mode,
+            complevel=complevel,
+            complib=complib,
+            fletcher32=fletcher32,
+        ) as store:
+            yield store
+
+
+def _maybe_remove(store, key):
+    """
+    For tests using tables, try removing the table to be sure there is
+    no content from previous tests using the same table name.
+    """
+    try:
+        store.remove(key)
+    except (ValueError, KeyError):
+        pass
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/conftest.py
@ -0,0 +1,9 @@
+import uuid
+
+import pytest
+
+
+@pytest.fixture
+def setup_path():
+    """Fixture for setup path"""
+    return f"tmp.__{uuid.uuid4()}__.h5"
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_append.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_append.py
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_categorical.py
@ -0,0 +1,214 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Series,
+    _testing as tm,
+    concat,
+    read_hdf,
+)
+from pandas.tests.io.pytables.common import (
+    _maybe_remove,
+    ensure_clean_store,
+)
+
+pytestmark = [pytest.mark.single_cpu]
+
+
+def test_categorical(setup_path):
+    with ensure_clean_store(setup_path) as store:
+        # Basic
+        _maybe_remove(store, "s")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=False,
+            )
+        )
+        store.append("s", s, format="table")
+        result = store.select("s")
+        tm.assert_series_equal(s, result)
+
+        _maybe_remove(store, "s_ordered")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=True,
+            )
+        )
+        store.append("s_ordered", s, format="table")
+        result = store.select("s_ordered")
+        tm.assert_series_equal(s, result)
+
+        _maybe_remove(store, "df")
+        df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
+        store.append("df", df, format="table")
+        result = store.select("df")
+        tm.assert_frame_equal(result, df)
+
+        # Dtypes
+        _maybe_remove(store, "si")
+        s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
+        store.append("si", s)
+        result = store.select("si")
+        tm.assert_series_equal(result, s)
+
+        _maybe_remove(store, "si2")
+        s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
+        store.append("si2", s)
+        result = store.select("si2")
+        tm.assert_series_equal(result, s)
+
+        # Multiple
+        _maybe_remove(store, "df2")
+        df2 = df.copy()
+        df2["s2"] = Series(list("abcdefg")).astype("category")
+        store.append("df2", df2)
+        result = store.select("df2")
+        tm.assert_frame_equal(result, df2)
+
+        # Make sure the metadata is OK
+        info = store.info()
+        assert "/df2   " in info
+        # df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical
+        assert "/df2/meta/values_block_0/meta" in info
+        assert "/df2/meta/values_block_2/meta" in info
+
+        # unordered
+        _maybe_remove(store, "s2")
+        s = Series(
+            Categorical(
+                ["a", "b", "b", "a", "a", "c"],
+                categories=["a", "b", "c", "d"],
+                ordered=False,
+            )
+        )
+        store.append("s2", s, format="table")
+        result = store.select("s2")
+        tm.assert_series_equal(result, s)
+
+        # Query
+        _maybe_remove(store, "df3")
+        store.append("df3", df, data_columns=["s"])
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s in ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s = ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+
+        expected = df[df.s.isin(["d"])]
+        result = store.select("df3", where=['s in ["d"]'])
+        tm.assert_frame_equal(result, expected)
+
+        expected = df[df.s.isin(["f"])]
+        result = store.select("df3", where=['s in ["f"]'])
+        tm.assert_frame_equal(result, expected)
+
+        # Appending with same categories is ok
+        store.append("df3", df)
+
+        df = concat([df, df])
+        expected = df[df.s.isin(["b", "c"])]
+        result = store.select("df3", where=['s in ["b","c"]'])
+        tm.assert_frame_equal(result, expected)
+
+        # Appending must have the same categories
+        df3 = df.copy()
+        df3["s"] = df3["s"].cat.remove_unused_categories()
+
+        msg = "cannot append a categorical with different categories to the existing"
+        with pytest.raises(ValueError, match=msg):
+            store.append("df3", df3)
+
+        # Remove, and make sure meta data is removed (its a recursive
+        # removal so should be).
+        result = store.select("df3/meta/s/meta")
+        assert result is not None
+        store.remove("df3")
+
+        with pytest.raises(
+            KeyError, match="'No object named df3/meta/s/meta in the file'"
+        ):
+            store.select("df3/meta/s/meta")
+
+
+def test_categorical_conversion(tmp_path, setup_path):
+    # GH13322
+    # Check that read_hdf with categorical columns doesn't return rows if
+    # where criteria isn't met.
+    obsids = ["ESP_012345_6789", "ESP_987654_3210"]
+    imgids = ["APF00006np", "APF0001imm"]
+    data = [4.3, 9.8]
+
+    # Test without categories
+    df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
+
+    # We are expecting an empty DataFrame matching types of df
+    expected = df.iloc[[], :]
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df", where="obsids=B")
+    tm.assert_frame_equal(result, expected)
+
+    # Test with categories
+    df.obsids = df.obsids.astype("category")
+    df.imgids = df.imgids.astype("category")
+
+    # We are expecting an empty DataFrame matching types of df
+    expected = df.iloc[[], :]
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df", where="obsids=B")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_nan_only_columns(tmp_path, setup_path):
+    # GH18413
+    # Check that read_hdf with categorical columns with NaN-only values can
+    # be read back.
+    df = DataFrame(
+        {
+            "a": ["a", "b", "c", np.nan],
+            "b": [np.nan, np.nan, np.nan, np.nan],
+            "c": [1, 2, 3, 4],
+            "d": Series([None] * 4, dtype=object),
+        }
+    )
+    df["a"] = df.a.astype("category")
+    df["b"] = df.b.astype("category")
+    df["d"] = df.b.astype("category")
+    expected = df
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", data_columns=True)
+    result = read_hdf(path, "df")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "where, df, expected",
+    [
+        ('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})),
+        ('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})),
+    ],
+)
+def test_convert_value(
+    tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame
+):
+    # GH39420
+    # Check that read_hdf with categorical columns can filter by where condition.
+    df.col = df.col.astype("category")
+    max_widths = {"col": 1}
+    categorical_values = sorted(df.col.unique())
+    expected.col = expected.col.astype("category")
+    expected.col = expected.col.cat.set_categories(categorical_values)
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
+    result = read_hdf(path, where=where)
+    tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_compat.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_compat.py
@ -0,0 +1,75 @@
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+tables = pytest.importorskip("tables")
+
+
+@pytest.fixture
+def pytables_hdf5_file(tmp_path):
+    """
+    Use PyTables to create a simple HDF5 file.
+    """
+    table_schema = {
+        "c0": tables.Time64Col(pos=0),
+        "c1": tables.StringCol(5, pos=1),
+        "c2": tables.Int64Col(pos=2),
+    }
+
+    t0 = 1_561_105_000.0
+
+    testsamples = [
+        {"c0": t0, "c1": "aaaaa", "c2": 1},
+        {"c0": t0 + 1, "c1": "bbbbb", "c2": 2},
+        {"c0": t0 + 2, "c1": "ccccc", "c2": 10**5},
+        {"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295},
+    ]
+
+    objname = "pandas_test_timeseries"
+
+    path = tmp_path / "written_with_pytables.h5"
+    with tables.open_file(path, mode="w") as f:
+        t = f.create_table("/", name=objname, description=table_schema)
+        for sample in testsamples:
+            for key, value in sample.items():
+                t.row[key] = value
+            t.row.append()
+
+    yield path, objname, pd.DataFrame(testsamples)
+
+
+class TestReadPyTablesHDF5:
+    """
+    A group of tests which covers reading HDF5 files written by plain PyTables
+    (not written by pandas).
+
+    Was introduced for regression-testing issue 11188.
+    """
+
+    def test_read_complete(self, pytables_hdf5_file):
+        path, objname, df = pytables_hdf5_file
+        result = pd.read_hdf(path, key=objname)
+        expected = df
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+    def test_read_with_start(self, pytables_hdf5_file):
+        path, objname, df = pytables_hdf5_file
+        # This is a regression test for pandas-dev/pandas/issues/11188
+        result = pd.read_hdf(path, key=objname, start=1)
+        expected = df[1:].reset_index(drop=True)
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+    def test_read_with_stop(self, pytables_hdf5_file):
+        path, objname, df = pytables_hdf5_file
+        # This is a regression test for pandas-dev/pandas/issues/11188
+        result = pd.read_hdf(path, key=objname, stop=1)
+        expected = df[:1].reset_index(drop=True)
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
+    def test_read_with_startstop(self, pytables_hdf5_file):
+        path, objname, df = pytables_hdf5_file
+        # This is a regression test for pandas-dev/pandas/issues/11188
+        result = pd.read_hdf(path, key=objname, start=1, stop=2)
+        expected = df[1:2].reset_index(drop=True)
+        tm.assert_frame_equal(result, expected, check_index_type=True)
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_complex.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_complex.py
@ -0,0 +1,195 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+from pandas.tests.io.pytables.common import ensure_clean_store
+
+from pandas.io.pytables import read_hdf
+
+
+def test_complex_fixed(tmp_path, setup_path):
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)).astype(np.complex64),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df")
+    reread = read_hdf(path, "df")
+    tm.assert_frame_equal(df, reread)
+
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)).astype(np.complex128),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df")
+    reread = read_hdf(path, "df")
+    tm.assert_frame_equal(df, reread)
+
+
+def test_complex_table(tmp_path, setup_path):
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)).astype(np.complex64),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table")
+    reread = read_hdf(path, key="df")
+    tm.assert_frame_equal(df, reread)
+
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)).astype(np.complex128),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table", mode="w")
+    reread = read_hdf(path, "df")
+    tm.assert_frame_equal(df, reread)
+
+
+def test_complex_mixed_fixed(tmp_path, setup_path):
+    complex64 = np.array(
+        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
+    )
+    complex128 = np.array(
+        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
+    )
+    df = DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "d"],
+            "C": complex64,
+            "D": complex128,
+            "E": [1.0, 2.0, 3.0, 4.0],
+        },
+        index=list("abcd"),
+    )
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df")
+    reread = read_hdf(path, "df")
+    tm.assert_frame_equal(df, reread)
+
+
+def test_complex_mixed_table(tmp_path, setup_path):
+    complex64 = np.array(
+        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
+    )
+    complex128 = np.array(
+        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
+    )
+    df = DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "d"],
+            "C": complex64,
+            "D": complex128,
+            "E": [1.0, 2.0, 3.0, 4.0],
+        },
+        index=list("abcd"),
+    )
+
+    with ensure_clean_store(setup_path) as store:
+        store.append("df", df, data_columns=["A", "B"])
+        result = store.select("df", where="A>2")
+        tm.assert_frame_equal(df.loc[df.A > 2], result)
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", format="table")
+    reread = read_hdf(path, "df")
+    tm.assert_frame_equal(df, reread)
+
+
+def test_complex_across_dimensions_fixed(tmp_path, setup_path):
+    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+    s = Series(complex128, index=list("abcd"))
+    df = DataFrame({"A": s, "B": s})
+
+    objs = [s, df]
+    comps = [tm.assert_series_equal, tm.assert_frame_equal]
+    for obj, comp in zip(objs, comps):
+        path = tmp_path / setup_path
+        obj.to_hdf(path, key="obj", format="fixed")
+        reread = read_hdf(path, "obj")
+        comp(obj, reread)
+
+
+def test_complex_across_dimensions(tmp_path, setup_path):
+    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+    s = Series(complex128, index=list("abcd"))
+    df = DataFrame({"A": s, "B": s})
+
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="obj", format="table")
+    reread = read_hdf(path, "obj")
+    tm.assert_frame_equal(df, reread)
+
+
+def test_complex_indexing_error(setup_path):
+    complex128 = np.array(
+        [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
+    )
+    df = DataFrame(
+        {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},
+        index=list("abcd"),
+    )
+
+    msg = (
+        "Columns containing complex values can be stored "
+        "but cannot be indexed when using table format. "
+        "Either use fixed format, set index=False, "
+        "or do not include the columns containing complex "
+        "values to data_columns when initializing the table."
+    )
+
+    with ensure_clean_store(setup_path) as store:
+        with pytest.raises(TypeError, match=msg):
+            store.append("df", df, data_columns=["C"])
+
+
+def test_complex_series_error(tmp_path, setup_path):
+    complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+    s = Series(complex128, index=list("abcd"))
+
+    msg = (
+        "Columns containing complex values can be stored "
+        "but cannot be indexed when using table format. "
+        "Either use fixed format, set index=False, "
+        "or do not include the columns containing complex "
+        "values to data_columns when initializing the table."
+    )
+
+    path = tmp_path / setup_path
+    with pytest.raises(TypeError, match=msg):
+        s.to_hdf(path, key="obj", format="t")
+
+    path = tmp_path / setup_path
+    s.to_hdf(path, key="obj", format="t", index=False)
+    reread = read_hdf(path, "obj")
+    tm.assert_series_equal(s, reread)
+
+
+def test_complex_append(setup_path):
+    df = DataFrame(
+        {
+            "a": np.random.default_rng(2).standard_normal(100).astype(np.complex128),
+            "b": np.random.default_rng(2).standard_normal(100),
+        }
+    )
+
+    with ensure_clean_store(setup_path) as store:
+        store.append("df", df, data_columns=["b"])
+        store.append("df", df)
+        result = store.select("df")
+        tm.assert_frame_equal(pd.concat([df, df], axis=0), result)
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_errors.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_errors.py
@ -0,0 +1,256 @@
+import datetime
+from io import BytesIO
+import re
+
+import numpy as np
+import pytest
+
+from pandas import (
+    CategoricalIndex,
+    DataFrame,
+    HDFStore,
+    Index,
+    MultiIndex,
+    _testing as tm,
+    date_range,
+    read_hdf,
+)
+from pandas.tests.io.pytables.common import ensure_clean_store
+
+from pandas.io.pytables import (
+    Term,
+    _maybe_adjust_name,
+)
+
+pytestmark = [pytest.mark.single_cpu]
+
+
+def test_pass_spec_to_storer(setup_path):
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+
+    with ensure_clean_store(setup_path) as store:
+        store.put("df", df)
+        msg = (
+            "cannot pass a column specification when reading a Fixed format "
+            "store. this store must be selected in its entirety"
+        )
+        with pytest.raises(TypeError, match=msg):
+            store.select("df", columns=["A"])
+        msg = (
+            "cannot pass a where specification when reading from a Fixed "
+            "format store. this store must be selected in its entirety"
+        )
+        with pytest.raises(TypeError, match=msg):
+            store.select("df", where=[("columns=A")])
+
+
+def test_table_index_incompatible_dtypes(setup_path):
+    df1 = DataFrame({"a": [1, 2, 3]})
+    df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))
+
+    with ensure_clean_store(setup_path) as store:
+        store.put("frame", df1, format="table")
+        msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
+        with pytest.raises(TypeError, match=msg):
+            store.put("frame", df2, format="table", append=True)
+
+
+def test_unimplemented_dtypes_table_columns(setup_path):
+    with ensure_clean_store(setup_path) as store:
+        dtypes = [("date", datetime.date(2001, 1, 2))]
+
+        # currently not supported dtypes ####
+        for n, f in dtypes:
+            df = DataFrame(
+                1.1 * np.arange(120).reshape((30, 4)),
+                columns=Index(list("ABCD"), dtype=object),
+                index=Index([f"i-{i}" for i in range(30)], dtype=object),
+            )
+            df[n] = f
+            msg = re.escape(f"[{n}] is not implemented as a table column")
+            with pytest.raises(TypeError, match=msg):
+                store.append(f"df1_{n}", df)
+
+    # frame
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+    df["obj1"] = "foo"
+    df["obj2"] = "bar"
+    df["datetime1"] = datetime.date(2001, 1, 2)
+    df = df._consolidate()
+
+    with ensure_clean_store(setup_path) as store:
+        # this fails because we have a date in the object block......
+        msg = "|".join(
+            [
+                re.escape(
+                    "Cannot serialize the column [datetime1]\nbecause its data "
+                    "contents are not [string] but [date] object dtype"
+                ),
+                re.escape("[date] is not implemented as a table column"),
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            store.append("df_unimplemented", df)
+
+
+def test_invalid_terms(tmp_path, setup_path):
+    with ensure_clean_store(setup_path) as store:
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((10, 4)),
+            columns=Index(list("ABCD"), dtype=object),
+            index=date_range("2000-01-01", periods=10, freq="B"),
+        )
+        df["string"] = "foo"
+        df.loc[df.index[0:4], "string"] = "bar"
+
+        store.put("df", df, format="table")
+
+        # some invalid terms
+        msg = re.escape("__init__() missing 1 required positional argument: 'where'")
+        with pytest.raises(TypeError, match=msg):
+            Term()
+
+        # more invalid
+        msg = re.escape(
+            "cannot process expression [df.index[3]], "
+            "[2000-01-06 00:00:00] is not a valid condition"
+        )
+        with pytest.raises(ValueError, match=msg):
+            store.select("df", "df.index[3]")
+
+        msg = "invalid syntax"
+        with pytest.raises(SyntaxError, match=msg):
+            store.select("df", "index>")
+
+    # from the docs
+    path = tmp_path / setup_path
+    dfq = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)),
+        columns=list("ABCD"),
+        index=date_range("20130101", periods=10),
+    )
+    dfq.to_hdf(path, key="dfq", format="table", data_columns=True)
+
+    # check ok
+    read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']")
+    read_hdf(path, "dfq", where="A>0 or C>0")
+
+    # catch the invalid reference
+    path = tmp_path / setup_path
+    dfq = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)),
+        columns=list("ABCD"),
+        index=date_range("20130101", periods=10),
+    )
+    dfq.to_hdf(path, key="dfq", format="table")
+
+    msg = (
+        r"The passed where expression: A>0 or C>0\n\s*"
+        r"contains an invalid variable reference\n\s*"
+        r"all of the variable references must be a reference to\n\s*"
+        r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
+        r"The currently defined references are: index,columns\n"
+    )
+    with pytest.raises(ValueError, match=msg):
+        read_hdf(path, "dfq", where="A>0 or C>0")
+
+
+def test_append_with_diff_col_name_types_raises_value_error(setup_path):
+    df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)))
+    df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
+    df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)})
+    df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)})
+    df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
+
+    with ensure_clean_store(setup_path) as store:
+        name = "df_diff_valerror"
+        store.append(name, df)
+
+        for d in (df2, df3, df4, df5):
+            msg = re.escape(
+                "cannot match existing table structure for [0] on appending data"
+            )
+            with pytest.raises(ValueError, match=msg):
+                store.append(name, d)
+
+
+def test_invalid_complib(setup_path):
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+    with tm.ensure_clean(setup_path) as path:
+        msg = r"complib only supports \[.*\] compression."
+        with pytest.raises(ValueError, match=msg):
+            df.to_hdf(path, key="df", complib="foolib")
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        date_range("2019", freq="D", periods=3, tz="UTC"),
+        CategoricalIndex(list("abc")),
+    ],
+)
+def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path):
+    # GH 7775
+    mi = MultiIndex.from_arrays([idx, idx])
+    df = DataFrame(0, index=mi, columns=["a"])
+    path = tmp_path / setup_path
+    with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
+        df.to_hdf(path, key="df")
+
+
+def test_unsuppored_hdf_file_error(datapath):
+    # GH 9539
+    data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")
+    message = (
+        r"Dataset\(s\) incompatible with Pandas data types, "
+        "not table, or no datasets found in HDF5 file."
+    )
+
+    with pytest.raises(ValueError, match=message):
+        read_hdf(data_path)
+
+
+def test_read_hdf_errors(setup_path, tmp_path):
+    df = DataFrame(
+        np.random.default_rng(2).random((4, 5)),
+        index=list("abcd"),
+        columns=list("ABCDE"),
+    )
+
+    path = tmp_path / setup_path
+    msg = r"File [\S]* does not exist"
+    with pytest.raises(OSError, match=msg):
+        read_hdf(path, "key")
+
+    df.to_hdf(path, key="df")
+    store = HDFStore(path, mode="r")
+    store.close()
+
+    msg = "The HDFStore must be open for reading."
+    with pytest.raises(OSError, match=msg):
+        read_hdf(store, "df")
+
+
+def test_read_hdf_generic_buffer_errors():
+    msg = "Support for generic buffers has not been implemented."
+    with pytest.raises(NotImplementedError, match=msg):
+        read_hdf(BytesIO(b""), "df")
+
+
+@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"])
+def test_maybe_adjust_name_bad_version_raises(bad_version):
+    msg = "Version is incorrect, expected sequence of 3 integers"
+    with pytest.raises(ValueError, match=msg):
+        _maybe_adjust_name("values_block_0", version=bad_version)
--- a/lib/python3.11/site-packages/pandas/tests/io/pytables/test_file_handling.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/pytables/test_file_handling.py
@ -0,0 +1,517 @@
+import os
+
+import numpy as np
+import pytest
+
+from pandas.compat import (
+    PY311,
+    is_ci_environment,
+    is_platform_linux,
+    is_platform_little_endian,
+)
+from pandas.errors import (
+    ClosedFileError,
+    PossibleDataLossError,
+)
+
+from pandas import (
+    DataFrame,
+    HDFStore,
+    Index,
+    Series,
+    _testing as tm,
+    date_range,
+    read_hdf,
+)
+from pandas.tests.io.pytables.common import (
+    _maybe_remove,
+    ensure_clean_store,
+    tables,
+)
+
+from pandas.io import pytables
+from pandas.io.pytables import Term
+
+pytestmark = [pytest.mark.single_cpu]
+
+
+@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
+def test_mode(setup_path, tmp_path, mode, using_infer_string):
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=date_range("2000-01-01", periods=10, freq="B"),
+    )
+    msg = r"[\S]* does not exist"
+    path = tmp_path / setup_path
+
+    # constructor
+    if mode in ["r", "r+"]:
+        with pytest.raises(OSError, match=msg):
+            HDFStore(path, mode=mode)
+
+    else:
+        with HDFStore(path, mode=mode) as store:
+            assert store._handle.mode == mode
+
+    path = tmp_path / setup_path
+
+    # context
+    if mode in ["r", "r+"]:
+        with pytest.raises(OSError, match=msg):
+            with HDFStore(path, mode=mode) as store:
+                pass
+    else:
+        with HDFStore(path, mode=mode) as store:
+            assert store._handle.mode == mode
+
+    path = tmp_path / setup_path
+
+    # conv write
+    if mode in ["r", "r+"]:
+        with pytest.raises(OSError, match=msg):
+            df.to_hdf(path, key="df", mode=mode)
+        df.to_hdf(path, key="df", mode="w")
+    else:
+        df.to_hdf(path, key="df", mode=mode)
+
+    # conv read
+    if mode in ["w"]:
+        msg = (
+            "mode w is not allowed while performing a read. "
+            r"Allowed modes are r, r\+ and a."
+        )
+        with pytest.raises(ValueError, match=msg):
+            read_hdf(path, "df", mode=mode)
+    else:
+        result = read_hdf(path, "df", mode=mode)
+        if using_infer_string:
+            df.columns = df.columns.astype("str")
+        tm.assert_frame_equal(result, df)
+
+
+def test_default_mode(tmp_path, setup_path, using_infer_string):
+    # read_hdf uses default mode
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((10, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=date_range("2000-01-01", periods=10, freq="B"),
+    )
+    path = tmp_path / setup_path
+    df.to_hdf(path, key="df", mode="w")
+    result = read_hdf(path, "df")
+    expected = df.copy()
+    if using_infer_string:
+        expected.columns = expected.columns.astype("str")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_reopen_handle(tmp_path, setup_path):
+    path = tmp_path / setup_path
+
+    store = HDFStore(path, mode="a")
+    store["a"] = Series(
+        np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
+    )
+
+    msg = (
+        r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the "
+        "current file!"
+    )
+    # invalid mode change
+    with pytest.raises(PossibleDataLossError, match=msg):
+        store.open("w")
+
+    store.close()
+    assert not store.is_open
+
+    # truncation ok here
+    store.open("w")
+    assert store.is_open
+    assert len(store) == 0
+    store.close()
+    assert not store.is_open
+
+    store = HDFStore(path, mode="a")
+    store["a"] = Series(
+        np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
+    )
+
+    # reopen as read
+    store.open("r")
+    assert store.is_open
+    assert len(store) == 1
+    assert store._mode == "r"
+    store.close()
+    assert not store.is_open
+
+    # reopen as append
+    store.open("a")
+    assert store.is_open
+    assert len(store) == 1
+    assert store._mode == "a"
+    store.close()
+    assert not store.is_open
+
+    # reopen as append (again)
+    store.open("a")
+    assert store.is_open
+    assert len(store) == 1
+    assert store._mode == "a"
+    store.close()
+    assert not store.is_open
+
+
+def test_open_args(setup_path, using_infer_string):
+    with tm.ensure_clean(setup_path) as path:
+        df = DataFrame(
+            1.1 * np.arange(120).reshape((30, 4)),
+            columns=Index(list("ABCD"), dtype=object),
+            index=Index([f"i-{i}" for i in range(30)], dtype=object),
+        )
+
+        # create an in memory store
+        store = HDFStore(
+            path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0
+        )
+        store["df"] = df
+        store.append("df2", df)
+
+        expected = df.copy()
+        if using_infer_string:
+            expected.index = expected.index.astype("str")
+            expected.columns = expected.columns.astype("str")
+
+        tm.assert_frame_equal(store["df"], expected)
+        tm.assert_frame_equal(store["df2"], expected)
+
+        store.close()
+
+    # the file should not have actually been written
+    assert not os.path.exists(path)
+
+
+def test_flush(setup_path):
+    with ensure_clean_store(setup_path) as store:
+        store["a"] = Series(range(5))
+        store.flush()
+        store.flush(fsync=True)
+
+
+def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
+    # GH15943
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+
+    # Set complevel and check if complib is automatically set to
+    # default value
+    tmpfile = tmp_path / setup_path
+    df.to_hdf(tmpfile, key="df", complevel=9)
+    result = read_hdf(tmpfile, "df")
+    expected = df.copy()
+    if using_infer_string:
+        expected.index = expected.index.astype("str")
+        expected.columns = expected.columns.astype("str")
+    tm.assert_frame_equal(result, expected)
+
+    with tables.open_file(tmpfile, mode="r") as h5file:
+        for node in h5file.walk_nodes(where="/df", classname="Leaf"):
+            assert node.filters.complevel == 9
+            assert node.filters.complib == "zlib"
+
+    # Set complib and check to see if compression is disabled
+    tmpfile = tmp_path / setup_path
+    df.to_hdf(tmpfile, key="df", complib="zlib")
+    result = read_hdf(tmpfile, "df")
+    expected = df.copy()
+    if using_infer_string:
+        expected.index = expected.index.astype("str")
+        expected.columns = expected.columns.astype("str")
+    tm.assert_frame_equal(result, expected)
+
+    with tables.open_file(tmpfile, mode="r") as h5file:
+        for node in h5file.walk_nodes(where="/df", classname="Leaf"):
+            assert node.filters.complevel == 0
+            assert node.filters.complib is None
+
+    # Check if not setting complib or complevel results in no compression
+    tmpfile = tmp_path / setup_path
+    df.to_hdf(tmpfile, key="df")
+    result = read_hdf(tmpfile, "df")
+    expected = df.copy()
+    if using_infer_string:
+        expected.index = expected.index.astype("str")
+        expected.columns = expected.columns.astype("str")
+    tm.assert_frame_equal(result, expected)
+
+    with tables.open_file(tmpfile, mode="r") as h5file:
+        for node in h5file.walk_nodes(where="/df", classname="Leaf"):
+            assert node.filters.complevel == 0
+            assert node.filters.complib is None
+
+
+def test_complibs_default_settings_override(tmp_path, setup_path):
+    # Check if file-defaults can be overridden on a per table basis
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+    tmpfile = tmp_path / setup_path
+    store = HDFStore(tmpfile)
+    store.append("dfc", df, complevel=9, complib="blosc")
+    store.append("df", df)
+    store.close()
+
+    with tables.open_file(tmpfile, mode="r") as h5file:
+        for node in h5file.walk_nodes(where="/df", classname="Leaf"):
+            assert node.filters.complevel == 0
+            assert node.filters.complib is None
+        for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
+            assert node.filters.complevel == 9
+            assert node.filters.complib == "blosc"
+
+
+@pytest.mark.parametrize("lvl", range(10))
+@pytest.mark.parametrize("lib", tables.filters.all_complibs)
+@pytest.mark.filterwarnings("ignore:object name is not a valid")
+@pytest.mark.skipif(
+    not PY311 and is_ci_environment() and is_platform_linux(),
+    reason="Segfaulting in a CI environment"
+    # with xfail, would sometimes raise UnicodeDecodeError
+    # invalid state byte
+)
+def test_complibs(tmp_path, lvl, lib, request):
+    # GH14478
+    if PY311 and is_platform_linux() and lib == "blosc2" and lvl != 0:
+        request.applymarker(
+            pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11")
+        )
+    df = DataFrame(
+        np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_)
+    )
+
+    # Remove lzo if its not available on this platform
+    if not tables.which_lib_version("lzo"):
+        pytest.skip("lzo not available")
+    # Remove bzip2 if its not available on this platform
+    if not tables.which_lib_version("bzip2"):
+        pytest.skip("bzip2 not available")
+
+    tmpfile = tmp_path / f"{lvl}_{lib}.h5"
+    gname = f"{lvl}_{lib}"
+
+    # Write and read file to see if data is consistent
+    df.to_hdf(tmpfile, key=gname, complib=lib, complevel=lvl)
+    result = read_hdf(tmpfile, gname)
+    tm.assert_frame_equal(result, df)
+
+    # Open file and check metadata for correct amount of compression
+    with tables.open_file(tmpfile, mode="r") as h5table:
+        for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
+            assert node.filters.complevel == lvl
+            if lvl == 0:
+                assert node.filters.complib is None
+            else:
+                assert node.filters.complib == lib
+
+
+@pytest.mark.skipif(
+    not is_platform_little_endian(), reason="reason platform is not little endian"
+)
+def test_encoding(setup_path):
+    with ensure_clean_store(setup_path) as store:
+        df = DataFrame({"A": "foo", "B": "bar"}, index=range(5))
+        df.loc[2, "A"] = np.nan
+        df.loc[3, "B"] = np.nan
+        _maybe_remove(store, "df")
+        store.append("df", df, encoding="ascii")
+        tm.assert_frame_equal(store["df"], df)
+
+        expected = df.reindex(columns=["A"])
+        result = store.select("df", Term("columns=A", encoding="ascii"))
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "val",
+    [
+        [b"E\xc9, 17", b"", b"a", b"b", b"c"],
+        [b"E\xc9, 17", b"a", b"b", b"c"],
+        [b"EE, 17", b"", b"a", b"b", b"c"],
+        [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
+        [b"", b"a", b"b", b"c"],
+        [b"\xf8\xfc", b"a", b"b", b"c"],
+        [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
+        [np.nan, b"", b"b", b"c"],
+        [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
+    ],
+)
+@pytest.mark.parametrize("dtype", ["category", None])
+def test_latin_encoding(tmp_path, setup_path, dtype, val):
+    enc = "latin-1"
+    nan_rep = ""
+    key = "data"
+
+    val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
+    ser = Series(val, dtype=dtype)
+
+    store = tmp_path / setup_path
+    ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
+    retr = read_hdf(store, key)
+
+    # TODO:(3.0): once Categorical replace deprecation is enforced,
+    #  we may be able to re-simplify the construction of s_nan
+    if dtype == "category":
+        if nan_rep in ser.cat.categories:
+            s_nan = ser.cat.remove_categories([nan_rep])
+        else:
+            s_nan = ser
+    else:
+        s_nan = ser.replace(nan_rep, np.nan)
+
+    tm.assert_series_equal(s_nan, retr)
+
+
+def test_multiple_open_close(tmp_path, setup_path):
+    # gh-4409: open & close multiple times
+
+    path = tmp_path / setup_path
+
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+    df.to_hdf(path, key="df", mode="w", format="table")
+
+    # single
+    store = HDFStore(path)
+    assert "CLOSED" not in store.info()
+    assert store.is_open
+
+    store.close()
+    assert "CLOSED" in store.info()
+    assert not store.is_open
+
+    path = tmp_path / setup_path
+
+    if pytables._table_file_open_policy_is_strict:
+        # multiples
+        store1 = HDFStore(path)
+        msg = (
+            r"The file [\S]* is already opened\.  Please close it before "
+            r"reopening in write mode\."
+        )
+        with pytest.raises(ValueError, match=msg):
+            HDFStore(path)
+
+        store1.close()
+    else:
+        # multiples
+        store1 = HDFStore(path)
+        store2 = HDFStore(path)
+
+        assert "CLOSED" not in store1.info()
+        assert "CLOSED" not in store2.info()
+        assert store1.is_open
+        assert store2.is_open
+
+        store1.close()
+        assert "CLOSED" in store1.info()
+        assert not store1.is_open
+        assert "CLOSED" not in store2.info()
+        assert store2.is_open
+
+        store2.close()
+        assert "CLOSED" in store1.info()
+        assert "CLOSED" in store2.info()
+        assert not store1.is_open
+        assert not store2.is_open
+
+        # nested close
+        store = HDFStore(path, mode="w")
+        store.append("df", df)
+
+        store2 = HDFStore(path)
+        store2.append("df2", df)
+        store2.close()
+        assert "CLOSED" in store2.info()
+        assert not store2.is_open
+
+        store.close()
+        assert "CLOSED" in store.info()
+        assert not store.is_open
+
+        # double closing
+        store = HDFStore(path, mode="w")
+        store.append("df", df)
+
+        store2 = HDFStore(path)
+        store.close()
+        assert "CLOSED" in store.info()
+        assert not store.is_open
+
+        store2.close()
+        assert "CLOSED" in store2.info()
+        assert not store2.is_open
+
+    # ops on a closed store
+    path = tmp_path / setup_path
+
+    df = DataFrame(
+        1.1 * np.arange(120).reshape((30, 4)),
+        columns=Index(list("ABCD"), dtype=object),
+        index=Index([f"i-{i}" for i in range(30)], dtype=object),
+    )
+    df.to_hdf(path, key="df", mode="w", format="table")
+
+    store = HDFStore(path)
+    store.close()
+
+    msg = r"[\S]* file is not open!"
+    with pytest.raises(ClosedFileError, match=msg):
+        store.keys()
+
+    with pytest.raises(ClosedFileError, match=msg):
+        "df" in store
+
+    with pytest.raises(ClosedFileError, match=msg):
+        len(store)
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store["df"]
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.select("df")
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.get("df")
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.append("df2", df)
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.put("df3", df)
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.get_storer("df2")
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.remove("df2")
+
+    with pytest.raises(ClosedFileError, match=msg):
+        store.select("df")
+
+    msg = "'HDFStore' object has no attribute 'df'"
+    with pytest.raises(AttributeError, match=msg):
+        store.df
+
+
+def test_fspath():
+    with tm.ensure_clean("foo.h5") as path:
+        with HDFStore(path) as store:
+            assert os.fspath(store) == str(path)
--- a/Show More
+++ b/Show More