done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/tests/io/json/init.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/init.py
--- a/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/conftest.py
@ -0,0 +1,9 @@
+import pytest
+
+
+@pytest.fixture(params=["split", "records", "index", "columns", "values"])
+def orient(request):
+    """
+    Fixture for orients excluding the table format.
+    """
+    return request.param
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_compression.py
@ -0,0 +1,130 @@
+from io import (
+    BytesIO,
+    StringIO,
+)
+
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_compression_roundtrip(compression):
+    df = pd.DataFrame(
+        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
+        index=["A", "B"],
+        columns=["X", "Y", "Z"],
+    )
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
+
+        # explicitly ensure file was compressed.
+        with tm.decompress_file(path, compression) as fh:
+            result = fh.read().decode("utf8")
+            data = StringIO(result)
+        tm.assert_frame_equal(df, pd.read_json(data))
+
+
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
+    uncompressed_df = pd.read_json(uncompressed_path)
+
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
+    compressed_df = pd.read_json(compressed_path, compression="zip")
+
+    tm.assert_frame_equal(uncompressed_df, compressed_df)
+
+
+@td.skip_if_not_us_locale
+@pytest.mark.single_cpu
+def test_with_s3_url(compression, s3_public_bucket, s3so):
+    # Bucket created in tests/io/conftest.py
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+
+    with tm.ensure_clean() as path:
+        df.to_json(path, compression=compression)
+        with open(path, "rb") as f:
+            s3_public_bucket.put_object(Key="test-1", Body=f)
+
+    roundtripped_df = pd.read_json(
+        f"s3://{s3_public_bucket.name}/test-1",
+        compression=compression,
+        storage_options=s3so,
+    )
+    tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_lines_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+        roundtripped_df = pd.read_json(path, lines=True, compression=compression)
+        tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_chunksize_with_compression(compression):
+    with tm.ensure_clean() as path:
+        df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
+        df.to_json(path, orient="records", lines=True, compression=compression)
+
+        with pd.read_json(
+            path, lines=True, chunksize=1, compression=compression
+        ) as res:
+            roundtripped_df = pd.concat(res)
+        tm.assert_frame_equal(df, roundtripped_df)
+
+
+def test_write_unsupported_compression_type():
+    df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            df.to_json(path, compression="unsupported")
+
+
+def test_read_unsupported_compression_type():
+    with tm.ensure_clean() as path:
+        msg = "Unrecognized compression type: unsupported"
+        with pytest.raises(ValueError, match=msg):
+            pd.read_json(path, compression="unsupported")
+
+
+@pytest.mark.parametrize(
+    "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
+)
+@pytest.mark.parametrize("to_infer", [True, False])
+@pytest.mark.parametrize("read_infer", [True, False])
+def test_to_json_compression(
+    compression_only, read_infer, to_infer, compression_to_extension, infer_string
+):
+    with pd.option_context("future.infer_string", infer_string):
+        # see gh-15008
+        compression = compression_only
+
+        # We'll complete file extension subsequently.
+        filename = "test."
+        filename += compression_to_extension[compression]
+
+        df = pd.DataFrame({"A": [1]})
+
+        to_compression = "infer" if to_infer else compression
+        read_compression = "infer" if read_infer else compression
+
+        with tm.ensure_clean(filename) as path:
+            df.to_json(path, compression=to_compression)
+            result = pd.read_json(path, compression=read_compression)
+            tm.assert_frame_equal(result, df)
+
+
+def test_to_json_compression_mode(compression):
+    # GH 39985 (read_json does not support user-provided binary files)
+    expected = pd.DataFrame({"A": [1]})
+
+    with BytesIO() as buffer:
+        expected.to_json(buffer, compression=compression)
+        # df = pd.read_json(buffer, compression=compression)
+        # tm.assert_frame_equal(expected, df)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_deprecated_kwargs.py
@ -0,0 +1,21 @@
+"""
+Tests for the deprecated keyword arguments for `read_json`.
+"""
+from io import StringIO
+
+import pandas as pd
+import pandas._testing as tm
+
+from pandas.io.json import read_json
+
+
+def test_good_kwargs():
+    df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2])
+
+    with tm.assert_produces_warning(None):
+        data1 = StringIO(df.to_json(orient="split"))
+        tm.assert_frame_equal(df, read_json(data1, orient="split"))
+        data2 = StringIO(df.to_json(orient="columns"))
+        tm.assert_frame_equal(df, read_json(data2, orient="columns"))
+        data3 = StringIO(df.to_json(orient="index"))
+        tm.assert_frame_equal(df, read_json(data3, orient="index"))
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema.py
@ -0,0 +1,873 @@
+"""Tests for Table Schema integration."""
+from collections import OrderedDict
+from io import StringIO
+import json
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    PeriodDtype,
+)
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.json._table_schema import (
+    as_json_table_type,
+    build_table_schema,
+    convert_json_field_to_pandas_type,
+    convert_pandas_type_to_json_field,
+    set_default_names,
+)
+
+
+@pytest.fixture
+def df_schema():
+    return DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "c"],
+            "C": pd.date_range("2016-01-01", freq="d", periods=4),
+            "D": pd.timedelta_range("1h", periods=4, freq="min"),
+        },
+        index=pd.Index(range(4), name="idx"),
+    )
+
+
+@pytest.fixture
+def df_table():
+    return DataFrame(
+        {
+            "A": [1, 2, 3, 4],
+            "B": ["a", "b", "c", "c"],
+            "C": pd.date_range("2016-01-01", freq="d", periods=4),
+            "D": pd.timedelta_range("1h", periods=4, freq="min"),
+            "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
+            "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
+            "G": [1.0, 2.0, 3, 4.0],
+            "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
+        },
+        index=pd.Index(range(4), name="idx"),
+    )
+
+
+class TestBuildSchema:
+    def test_build_table_schema(self, df_schema, using_infer_string):
+        result = build_table_schema(df_schema, version=False)
+        expected = {
+            "fields": [
+                {"name": "idx", "type": "integer"},
+                {"name": "A", "type": "integer"},
+                {"name": "B", "type": "string"},
+                {"name": "C", "type": "datetime"},
+                {"name": "D", "type": "duration"},
+            ],
+            "primaryKey": ["idx"],
+        }
+        if using_infer_string:
+            expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"}
+        assert result == expected
+        result = build_table_schema(df_schema)
+        assert "pandas_version" in result
+
+    def test_series(self):
+        s = pd.Series([1, 2, 3], name="foo")
+        result = build_table_schema(s, version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "foo", "type": "integer"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+        result = build_table_schema(s)
+        assert "pandas_version" in result
+
+    def test_series_unnamed(self):
+        result = build_table_schema(pd.Series([1, 2, 3]), version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "values", "type": "integer"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+
+    def test_multiindex(self, df_schema, using_infer_string):
+        df = df_schema
+        idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)])
+        df.index = idx
+
+        result = build_table_schema(df, version=False)
+        expected = {
+            "fields": [
+                {"name": "level_0", "type": "string"},
+                {"name": "level_1", "type": "integer"},
+                {"name": "A", "type": "integer"},
+                {"name": "B", "type": "string"},
+                {"name": "C", "type": "datetime"},
+                {"name": "D", "type": "duration"},
+            ],
+            "primaryKey": ["level_0", "level_1"],
+        }
+        if using_infer_string:
+            expected["fields"][0] = {
+                "name": "level_0",
+                "type": "string",
+                "extDtype": "str",
+            }
+            expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"}
+        assert result == expected
+
+        df.index.names = ["idx0", None]
+        expected["fields"][0]["name"] = "idx0"
+        expected["primaryKey"] = ["idx0", "level_1"]
+        result = build_table_schema(df, version=False)
+        assert result == expected
+
+
+class TestTableSchemaType:
+    @pytest.mark.parametrize("int_type", [int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_data(self, int_type):
+        int_data = [1, 2, 3]
+        assert as_json_table_type(np.array(int_data, dtype=int_type).dtype) == "integer"
+
+    @pytest.mark.parametrize("float_type", [float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_data(self, float_type):
+        float_data = [1.0, 2.0, 3.0]
+        assert (
+            as_json_table_type(np.array(float_data, dtype=float_type).dtype) == "number"
+        )
+
+    @pytest.mark.parametrize("bool_type", [bool, np.bool_])
+    def test_as_json_table_type_bool_data(self, bool_type):
+        bool_data = [True, False]
+        assert (
+            as_json_table_type(np.array(bool_data, dtype=bool_type).dtype) == "boolean"
+        )
+
+    @pytest.mark.parametrize(
+        "date_data",
+        [
+            pd.to_datetime(["2016"]),
+            pd.to_datetime(["2016"], utc=True),
+            pd.Series(pd.to_datetime(["2016"])),
+            pd.Series(pd.to_datetime(["2016"], utc=True)),
+            pd.period_range("2016", freq="Y", periods=3),
+        ],
+    )
+    def test_as_json_table_type_date_data(self, date_data):
+        assert as_json_table_type(date_data.dtype) == "datetime"
+
+    @pytest.mark.parametrize(
+        "str_data",
+        [pd.Series(["a", "b"], dtype=object), pd.Index(["a", "b"], dtype=object)],
+    )
+    def test_as_json_table_type_string_data(self, str_data):
+        assert as_json_table_type(str_data.dtype) == "string"
+
+    @pytest.mark.parametrize(
+        "cat_data",
+        [
+            pd.Categorical(["a"]),
+            pd.Categorical([1]),
+            pd.Series(pd.Categorical([1])),
+            pd.CategoricalIndex([1]),
+            pd.Categorical([1]),
+        ],
+    )
+    def test_as_json_table_type_categorical_data(self, cat_data):
+        assert as_json_table_type(cat_data.dtype) == "any"
+
+    # ------
+    # dtypes
+    # ------
+    @pytest.mark.parametrize("int_dtype", [int, np.int16, np.int32, np.int64])
+    def test_as_json_table_type_int_dtypes(self, int_dtype):
+        assert as_json_table_type(int_dtype) == "integer"
+
+    @pytest.mark.parametrize("float_dtype", [float, np.float16, np.float32, np.float64])
+    def test_as_json_table_type_float_dtypes(self, float_dtype):
+        assert as_json_table_type(float_dtype) == "number"
+
+    @pytest.mark.parametrize("bool_dtype", [bool, np.bool_])
+    def test_as_json_table_type_bool_dtypes(self, bool_dtype):
+        assert as_json_table_type(bool_dtype) == "boolean"
+
+    @pytest.mark.parametrize(
+        "date_dtype",
+        [
+            np.dtype("<M8[ns]"),
+            PeriodDtype("D"),
+            DatetimeTZDtype("ns", "US/Central"),
+        ],
+    )
+    def test_as_json_table_type_date_dtypes(self, date_dtype):
+        # TODO: datedate.date? datetime.time?
+        assert as_json_table_type(date_dtype) == "datetime"
+
+    @pytest.mark.parametrize("td_dtype", [np.dtype("<m8[ns]")])
+    def test_as_json_table_type_timedelta_dtypes(self, td_dtype):
+        assert as_json_table_type(td_dtype) == "duration"
+
+    @pytest.mark.parametrize("str_dtype", [object])  # TODO(GH#14904) flesh out dtypes?
+    def test_as_json_table_type_string_dtypes(self, str_dtype):
+        assert as_json_table_type(str_dtype) == "string"
+
+    def test_as_json_table_type_categorical_dtypes(self):
+        assert as_json_table_type(pd.Categorical(["a"]).dtype) == "any"
+        assert as_json_table_type(CategoricalDtype()) == "any"
+
+
+class TestTableOrient:
+    def test_build_series(self):
+        s = pd.Series([1, 2], name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                (
+                    "data",
+                    [
+                        OrderedDict([("id", 0), ("a", 1)]),
+                        OrderedDict([("id", 1), ("a", 2)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_read_json_from_to_json_results(self):
+        # GH32383
+        df = DataFrame(
+            {
+                "_id": {"row_0": 0},
+                "category": {"row_0": "Goods"},
+                "recommender_id": {"row_0": 3},
+                "recommender_name_jp": {"row_0": "浦田"},
+                "recommender_name_en": {"row_0": "Urata"},
+                "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
+                "name_en": {"row_0": "Hakata Dolls Matsuo"},
+            }
+        )
+
+        result1 = pd.read_json(StringIO(df.to_json()))
+        result2 = DataFrame.from_dict(json.loads(df.to_json()))
+        tm.assert_frame_equal(result1, df)
+        tm.assert_frame_equal(result2, df)
+
+    def test_to_json(self, df_table, using_infer_string):
+        df = df_table
+        df.index.name = "idx"
+        result = df.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "idx", "type": "integer"},
+            {"name": "A", "type": "integer"},
+            {"name": "B", "type": "string"},
+            {"name": "C", "type": "datetime"},
+            {"name": "D", "type": "duration"},
+            {
+                "constraints": {"enum": ["a", "b", "c"]},
+                "name": "E",
+                "ordered": False,
+                "type": "any",
+            },
+            {
+                "constraints": {"enum": ["a", "b", "c"]},
+                "name": "F",
+                "ordered": True,
+                "type": "any",
+            },
+            {"name": "G", "type": "number"},
+            {"name": "H", "type": "datetime", "tz": "US/Central"},
+        ]
+
+        if using_infer_string:
+            fields[2] = {"name": "B", "type": "string", "extDtype": "str"}
+
+        schema = {"fields": fields, "primaryKey": ["idx"]}
+        data = [
+            OrderedDict(
+                [
+                    ("idx", 0),
+                    ("A", 1),
+                    ("B", "a"),
+                    ("C", "2016-01-01T00:00:00.000"),
+                    ("D", "P0DT1H0M0S"),
+                    ("E", "a"),
+                    ("F", "a"),
+                    ("G", 1.0),
+                    ("H", "2016-01-01T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 1),
+                    ("A", 2),
+                    ("B", "b"),
+                    ("C", "2016-01-02T00:00:00.000"),
+                    ("D", "P0DT1H1M0S"),
+                    ("E", "b"),
+                    ("F", "b"),
+                    ("G", 2.0),
+                    ("H", "2016-01-02T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 2),
+                    ("A", 3),
+                    ("B", "c"),
+                    ("C", "2016-01-03T00:00:00.000"),
+                    ("D", "P0DT1H2M0S"),
+                    ("E", "c"),
+                    ("F", "c"),
+                    ("G", 3.0),
+                    ("H", "2016-01-03T06:00:00.000Z"),
+                ]
+            ),
+            OrderedDict(
+                [
+                    ("idx", 3),
+                    ("A", 4),
+                    ("B", "c"),
+                    ("C", "2016-01-04T00:00:00.000"),
+                    ("D", "P0DT1H3M0S"),
+                    ("E", "c"),
+                    ("F", "c"),
+                    ("G", 4.0),
+                    ("H", "2016-01-04T06:00:00.000Z"),
+                ]
+            ),
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_to_json_float_index(self):
+        data = pd.Series(1, index=[1.0, 2.0])
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        expected = OrderedDict(
+            [
+                (
+                    "schema",
+                    {
+                        "fields": [
+                            {"name": "index", "type": "number"},
+                            {"name": "values", "type": "integer"},
+                        ],
+                        "primaryKey": ["index"],
+                    },
+                ),
+                (
+                    "data",
+                    [
+                        OrderedDict([("index", 1.0), ("values", 1)]),
+                        OrderedDict([("index", 2.0), ("values", 1)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_to_json_period_index(self):
+        idx = pd.period_range("2016", freq="Q-JAN", periods=2)
+        data = pd.Series(1, idx)
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"freq": "QE-JAN", "name": "index", "type": "datetime"},
+            {"name": "values", "type": "integer"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["index"]}
+        data = [
+            OrderedDict([("index", "2015-11-01T00:00:00.000"), ("values", 1)]),
+            OrderedDict([("index", "2016-02-01T00:00:00.000"), ("values", 1)]),
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_to_json_categorical_index(self):
+        data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
+        result = data.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        expected = OrderedDict(
+            [
+                (
+                    "schema",
+                    {
+                        "fields": [
+                            {
+                                "name": "index",
+                                "type": "any",
+                                "constraints": {"enum": ["a", "b"]},
+                                "ordered": False,
+                            },
+                            {"name": "values", "type": "integer"},
+                        ],
+                        "primaryKey": ["index"],
+                    },
+                ),
+                (
+                    "data",
+                    [
+                        OrderedDict([("index", "a"), ("values", 1)]),
+                        OrderedDict([("index", "b"), ("values", 1)]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    def test_date_format_raises(self, df_table):
+        msg = (
+            "Trying to write with `orient='table'` and `date_format='epoch'`. Table "
+            "Schema requires dates to be formatted with `date_format='iso'`"
+        )
+        with pytest.raises(ValueError, match=msg):
+            df_table.to_json(orient="table", date_format="epoch")
+
+        # others work
+        df_table.to_json(orient="table", date_format="iso")
+        df_table.to_json(orient="table")
+
+    def test_convert_pandas_type_to_json_field_int(self, index_or_series):
+        kind = index_or_series
+        data = [1, 2, 3]
+        result = convert_pandas_type_to_json_field(kind(data, name="name"))
+        expected = {"name": "name", "type": "integer"}
+        assert result == expected
+
+    def test_convert_pandas_type_to_json_field_float(self, index_or_series):
+        kind = index_or_series
+        data = [1.0, 2.0, 3.0]
+        result = convert_pandas_type_to_json_field(kind(data, name="name"))
+        expected = {"name": "name", "type": "number"}
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
+    )
+    @pytest.mark.parametrize("wrapper", [None, pd.Series])
+    def test_convert_pandas_type_to_json_field_datetime(
+        self, dt_args, extra_exp, wrapper
+    ):
+        data = [1.0, 2.0, 3.0]
+        data = pd.to_datetime(data, **dt_args)
+        if wrapper is pd.Series:
+            data = pd.Series(data, name="values")
+        result = convert_pandas_type_to_json_field(data)
+        expected = {"name": "values", "type": "datetime"}
+        expected.update(extra_exp)
+        assert result == expected
+
+    def test_convert_pandas_type_to_json_period_range(self):
+        arr = pd.period_range("2016", freq="Y-DEC", periods=4)
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {"name": "values", "type": "datetime", "freq": "YE-DEC"}
+        assert result == expected
+
+    @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
+    @pytest.mark.parametrize("ordered", [True, False])
+    def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
+        data = ["a", "b", "c"]
+        if kind is pd.Categorical:
+            arr = pd.Series(kind(data, ordered=ordered), name="cats")
+        elif kind is pd.CategoricalIndex:
+            arr = kind(data, ordered=ordered, name="cats")
+
+        result = convert_pandas_type_to_json_field(arr)
+        expected = {
+            "name": "cats",
+            "type": "any",
+            "constraints": {"enum": data},
+            "ordered": ordered,
+        }
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "inp,exp",
+        [
+            ({"type": "integer"}, "int64"),
+            ({"type": "number"}, "float64"),
+            ({"type": "boolean"}, "bool"),
+            ({"type": "duration"}, "timedelta64"),
+            ({"type": "datetime"}, "datetime64[ns]"),
+            ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
+            ({"type": "any"}, "object"),
+            (
+                {
+                    "type": "any",
+                    "constraints": {"enum": ["a", "b", "c"]},
+                    "ordered": False,
+                },
+                CategoricalDtype(categories=["a", "b", "c"], ordered=False),
+            ),
+            (
+                {
+                    "type": "any",
+                    "constraints": {"enum": ["a", "b", "c"]},
+                    "ordered": True,
+                },
+                CategoricalDtype(categories=["a", "b", "c"], ordered=True),
+            ),
+            ({"type": "string"}, None),
+        ],
+    )
+    def test_convert_json_field_to_pandas_type(self, inp, exp):
+        field = {"name": "foo"}
+        field.update(inp)
+        assert convert_json_field_to_pandas_type(field) == exp
+
+    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
+    def test_convert_json_field_to_pandas_type_raises(self, inp):
+        field = {"type": inp}
+        with pytest.raises(
+            ValueError, match=f"Unsupported or invalid field type: {inp}"
+        ):
+            convert_json_field_to_pandas_type(field)
+
+    def test_categorical(self):
+        s = pd.Series(pd.Categorical(["a", "b", "a"]))
+        s.index.name = "idx"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "idx", "type": "integer"},
+            {
+                "constraints": {"enum": ["a", "b"]},
+                "name": "values",
+                "ordered": False,
+                "type": "any",
+            },
+        ]
+
+        expected = OrderedDict(
+            [
+                ("schema", {"fields": fields, "primaryKey": ["idx"]}),
+                (
+                    "data",
+                    [
+                        OrderedDict([("idx", 0), ("values", "a")]),
+                        OrderedDict([("idx", 1), ("values", "b")]),
+                        OrderedDict([("idx", 2), ("values", "a")]),
+                    ],
+                ),
+            ]
+        )
+
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "idx,nm,prop",
+        [
+            (pd.Index([1]), "index", "name"),
+            (pd.Index([1], name="myname"), "myname", "name"),
+            (
+                pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
+                ["level_0", "level_1"],
+                "names",
+            ),
+            (
+                pd.MultiIndex.from_product(
+                    [("a", "b"), ("c", "d")], names=["n1", "n2"]
+                ),
+                ["n1", "n2"],
+                "names",
+            ),
+            (
+                pd.MultiIndex.from_product(
+                    [("a", "b"), ("c", "d")], names=["n1", None]
+                ),
+                ["n1", "level_1"],
+                "names",
+            ),
+        ],
+    )
+    def test_set_names_unset(self, idx, nm, prop):
+        data = pd.Series(1, idx)
+        result = set_default_names(data)
+        assert getattr(result.index, prop) == nm
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            pd.Index([], name="index"),
+            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
+            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
+        ],
+    )
+    def test_warns_non_roundtrippable_names(self, idx):
+        # GH 19130
+        df = DataFrame(index=idx)
+        df.index.name = "index"
+        with tm.assert_produces_warning():
+            set_default_names(df)
+
+    def test_timestamp_in_columns(self):
+        df = DataFrame(
+            [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
+        )
+        result = df.to_json(orient="table")
+        js = json.loads(result)
+        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000"
+        assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"
+
+    @pytest.mark.parametrize(
+        "case",
+        [
+            pd.Series([1], index=pd.Index([1], name="a"), name="a"),
+            DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
+            DataFrame(
+                {"A": [1]},
+                index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
+            ),
+        ],
+    )
+    def test_overlapping_names(self, case):
+        with pytest.raises(ValueError, match="Overlapping"):
+            case.to_json(orient="table")
+
+    def test_mi_falsey_name(self):
+        # GH 16203
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((4, 4)),
+            index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]),
+        )
+        result = [x["name"] for x in build_table_schema(df)["fields"]]
+        assert result == ["level_0", "level_1", 0, 1, 2, 3]
+
+
+class TestTableOrientReader:
+    @pytest.mark.parametrize(
+        "index_nm",
+        [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"ints": [1, 2, 3, 4]},
+            {"objects": ["a", "b", "c", "d"]},
+            {"objects": ["1", "2", "3", "4"]},
+            {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
+            {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
+            {
+                "ordered_cats": pd.Series(
+                    pd.Categorical(["a", "b", "c", "c"], ordered=True)
+                )
+            },
+            {"floats": [1.0, 2.0, 3.0, 4.0]},
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"bools": [True, False, False, True]},
+            {
+                "timezones": pd.date_range(
+                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                )  # added in # GH 35973
+            },
+        ],
+    )
+    def test_read_json_table_orient(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
+    @pytest.mark.parametrize(
+        "vals",
+        [{"timedeltas": pd.timedelta_range("1h", periods=4, freq="min")}],
+    )
+    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        with pytest.raises(NotImplementedError, match="can not yet read "):
+            pd.read_json(out, orient="table")
+
+    @pytest.mark.parametrize(
+        "index_nm",
+        [None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0"],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"ints": [1, 2, 3, 4]},
+            {"objects": ["a", "b", "c", "d"]},
+            {"objects": ["1", "2", "3", "4"]},
+            {"date_ranges": pd.date_range("2016-01-01", freq="d", periods=4)},
+            {"categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"]))},
+            {
+                "ordered_cats": pd.Series(
+                    pd.Categorical(["a", "b", "c", "c"], ordered=True)
+                )
+            },
+            {"floats": [1.0, 2.0, 3.0, 4.0]},
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"bools": [True, False, False, True]},
+            {
+                "timezones": pd.date_range(
+                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                )  # added in # GH 35973
+            },
+        ],
+    )
+    def test_read_json_table_period_orient(self, index_nm, vals, recwarn):
+        df = DataFrame(
+            vals,
+            index=pd.Index(
+                (pd.Period(f"2022Q{q}") for q in range(1, 5)), name=index_nm
+            ),
+        )
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            pd.Index(range(4)),
+            pd.date_range(
+                "2020-08-30",
+                freq="d",
+                periods=4,
+            )._with_freq(None),
+            pd.date_range(
+                "2020-08-30", freq="d", periods=4, tz="US/Central"
+            )._with_freq(None),
+            pd.MultiIndex.from_product(
+                [
+                    pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
+                    ["x", "y"],
+                ],
+            ),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "vals",
+        [
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
+            {
+                "timezones": pd.date_range(
+                    "2020-08-30", freq="d", periods=4, tz="Europe/London"
+                )
+            },
+        ],
+    )
+    def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
+        # GH 35973
+        df = DataFrame(vals, index=idx)
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    def test_comprehensive(self):
+        df = DataFrame(
+            {
+                "A": [1, 2, 3, 4],
+                "B": ["a", "b", "c", "c"],
+                "C": pd.date_range("2016-01-01", freq="d", periods=4),
+                # 'D': pd.timedelta_range('1h', periods=4, freq='min'),
+                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
+                "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
+                "G": [1.1, 2.2, 3.3, 4.4],
+                "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
+                "I": [True, False, False, True],
+            },
+            index=pd.Index(range(4), name="idx"),
+        )
+
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    @pytest.mark.parametrize(
+        "index_names",
+        [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"], ["index", "foo"]],
+    )
+    def test_multiindex(self, index_names):
+        # GH 18912
+        df = DataFrame(
+            [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]]],
+            index=[["A", "B"], ["Null", "Eins"]],
+            columns=["Aussprache", "Griechisch", "Args"],
+        )
+        df.index.names = index_names
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
+
+    def test_empty_frame_roundtrip(self):
+        # GH 21287
+        df = DataFrame(columns=["a", "b", "c"])
+        expected = df.copy()
+        out = StringIO(df.to_json(orient="table"))
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(expected, result)
+
+    def test_read_json_orient_table_old_schema_version(self):
+        df_json = """
+        {
+            "schema":{
+                "fields":[
+                    {"name":"index","type":"integer"},
+                    {"name":"a","type":"string"}
+                ],
+                "primaryKey":["index"],
+                "pandas_version":"0.20.0"
+            },
+            "data":[
+                {"index":0,"a":1},
+                {"index":1,"a":2.0},
+                {"index":2,"a":"s"}
+            ]
+        }
+        """
+        expected = DataFrame({"a": [1, 2.0, "s"]})
+        result = pd.read_json(StringIO(df_json), orient="table")
+        tm.assert_frame_equal(expected, result)
+
+    @pytest.mark.parametrize("freq", ["M", "2M", "Q", "2Q", "Y", "2Y"])
+    def test_read_json_table_orient_period_depr_freq(self, freq, recwarn):
+        # GH#9586
+        df = DataFrame(
+            {"ints": [1, 2]},
+            index=pd.PeriodIndex(["2020-01", "2021-06"], freq=freq),
+        )
+        out = df.to_json(orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
@ -0,0 +1,317 @@
+"""Tests for ExtensionDtype Table Schema integration."""
+
+from collections import OrderedDict
+import datetime as dt
+import decimal
+from io import StringIO
+import json
+
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    Index,
+    array,
+    read_json,
+)
+import pandas._testing as tm
+from pandas.core.arrays.integer import Int64Dtype
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.series import Series
+from pandas.tests.extension.date import (
+    DateArray,
+    DateDtype,
+)
+from pandas.tests.extension.decimal.array import (
+    DecimalArray,
+    DecimalDtype,
+)
+
+from pandas.io.json._table_schema import (
+    as_json_table_type,
+    build_table_schema,
+)
+
+
+class TestBuildSchema:
+    def test_build_table_schema(self):
+        df = DataFrame(
+            {
+                "A": DateArray([dt.date(2021, 10, 10)]),
+                "B": DecimalArray([decimal.Decimal(10)]),
+                "C": array(["pandas"], dtype="string"),
+                "D": array([10], dtype="Int64"),
+            }
+        )
+        result = build_table_schema(df, version=False)
+        expected = {
+            "fields": [
+                {"name": "index", "type": "integer"},
+                {"name": "A", "type": "any", "extDtype": "DateDtype"},
+                {"name": "B", "type": "number", "extDtype": "decimal"},
+                {"name": "C", "type": "string", "extDtype": "string"},
+                {"name": "D", "type": "integer", "extDtype": "Int64"},
+            ],
+            "primaryKey": ["index"],
+        }
+        assert result == expected
+        result = build_table_schema(df)
+        assert "pandas_version" in result
+
+
+class TestTableSchemaType:
+    @pytest.mark.parametrize(
+        "date_data",
+        [
+            DateArray([dt.date(2021, 10, 10)]),
+            DateArray(dt.date(2021, 10, 10)),
+            Series(DateArray(dt.date(2021, 10, 10))),
+        ],
+    )
+    def test_as_json_table_type_ext_date_array_dtype(self, date_data):
+        assert as_json_table_type(date_data.dtype) == "any"
+
+    def test_as_json_table_type_ext_date_dtype(self):
+        assert as_json_table_type(DateDtype()) == "any"
+
+    @pytest.mark.parametrize(
+        "decimal_data",
+        [
+            DecimalArray([decimal.Decimal(10)]),
+            Series(DecimalArray([decimal.Decimal(10)])),
+        ],
+    )
+    def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):
+        assert as_json_table_type(decimal_data.dtype) == "number"
+
+    def test_as_json_table_type_ext_decimal_dtype(self):
+        assert as_json_table_type(DecimalDtype()) == "number"
+
+    @pytest.mark.parametrize(
+        "string_data",
+        [
+            array(["pandas"], dtype="string"),
+            Series(array(["pandas"], dtype="string")),
+        ],
+    )
+    def test_as_json_table_type_ext_string_array_dtype(self, string_data):
+        assert as_json_table_type(string_data.dtype) == "string"
+
+    def test_as_json_table_type_ext_string_dtype(self):
+        assert as_json_table_type(StringDtype()) == "string"
+
+    @pytest.mark.parametrize(
+        "integer_data",
+        [
+            array([10], dtype="Int64"),
+            Series(array([10], dtype="Int64")),
+        ],
+    )
+    def test_as_json_table_type_ext_integer_array_dtype(self, integer_data):
+        assert as_json_table_type(integer_data.dtype) == "integer"
+
+    def test_as_json_table_type_ext_integer_dtype(self):
+        assert as_json_table_type(Int64Dtype()) == "integer"
+
+
+class TestTableOrient:
+    @pytest.fixture
+    def da(self):
+        return DateArray([dt.date(2021, 10, 10)])
+
+    @pytest.fixture
+    def dc(self):
+        return DecimalArray([decimal.Decimal(10)])
+
+    @pytest.fixture
+    def sa(self):
+        return array(["pandas"], dtype="string")
+
+    @pytest.fixture
+    def ia(self):
+        return array([10], dtype="Int64")
+
+    @pytest.fixture
+    def df(self, da, dc, sa, ia):
+        return DataFrame(
+            {
+                "A": da,
+                "B": dc,
+                "C": sa,
+                "D": ia,
+            }
+        )
+
+    def test_build_date_series(self, da):
+        s = Series(da, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "any", "extDtype": "DateDtype"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000")])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_decimal_series(self, dc):
+        s = Series(dc, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "number", "extDtype": "decimal"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10.0)])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_string_series(self, sa):
+        s = Series(sa, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "string", "extDtype": "string"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", "pandas")])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_build_int64_series(self, ia):
+        s = Series(ia, name="a")
+        s.index.name = "id"
+        result = s.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            {"name": "id", "type": "integer"},
+            {"name": "a", "type": "integer", "extDtype": "Int64"},
+        ]
+
+        schema = {"fields": fields, "primaryKey": ["id"]}
+
+        expected = OrderedDict(
+            [
+                ("schema", schema),
+                ("data", [OrderedDict([("id", 0), ("a", 10)])]),
+            ]
+        )
+
+        assert result == expected
+
+    def test_to_json(self, df):
+        df = df.copy()
+        df.index.name = "idx"
+        result = df.to_json(orient="table", date_format="iso")
+        result = json.loads(result, object_pairs_hook=OrderedDict)
+
+        assert "pandas_version" in result["schema"]
+        result["schema"].pop("pandas_version")
+
+        fields = [
+            OrderedDict({"name": "idx", "type": "integer"}),
+            OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
+            OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
+            OrderedDict({"name": "C", "type": "string", "extDtype": "string"}),
+            OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
+        ]
+
+        schema = OrderedDict({"fields": fields, "primaryKey": ["idx"]})
+        data = [
+            OrderedDict(
+                [
+                    ("idx", 0),
+                    ("A", "2021-10-10T00:00:00.000"),
+                    ("B", 10.0),
+                    ("C", "pandas"),
+                    ("D", 10),
+                ]
+            )
+        ]
+        expected = OrderedDict([("schema", schema), ("data", data)])
+
+        assert result == expected
+
+    def test_json_ext_dtype_reading_roundtrip(self):
+        # GH#40255
+        df = DataFrame(
+            {
+                "a": Series([2, NA], dtype="Int64"),
+                "b": Series([1.5, NA], dtype="Float64"),
+                "c": Series([True, NA], dtype="boolean"),
+            },
+            index=Index([1, NA], dtype="Int64"),
+        )
+        expected = df.copy()
+        data_json = df.to_json(orient="table", indent=4)
+        result = read_json(StringIO(data_json), orient="table")
+        tm.assert_frame_equal(result, expected)
+
+    def test_json_ext_dtype_reading(self):
+        # GH#40255
+        data_json = """{
+            "schema":{
+                "fields":[
+                    {
+                        "name":"a",
+                        "type":"integer",
+                        "extDtype":"Int64"
+                    }
+                ],
+            },
+            "data":[
+                {
+                    "a":2
+                },
+                {
+                    "a":null
+                }
+            ]
+        }"""
+        result = read_json(StringIO(data_json), orient="table")
+        expected = DataFrame({"a": Series([2, NA], dtype="Int64")})
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_normalize.py
@ -0,0 +1,907 @@
+import json
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    json_normalize,
+)
+import pandas._testing as tm
+
+from pandas.io.json._normalize import nested_to_record
+
+
+@pytest.fixture
+def deep_nested():
+    # deeply nested data
+    return [
+        {
+            "country": "USA",
+            "states": [
+                {
+                    "name": "California",
+                    "cities": [
+                        {"name": "San Francisco", "pop": 12345},
+                        {"name": "Los Angeles", "pop": 12346},
+                    ],
+                },
+                {
+                    "name": "Ohio",
+                    "cities": [
+                        {"name": "Columbus", "pop": 1234},
+                        {"name": "Cleveland", "pop": 1236},
+                    ],
+                },
+            ],
+        },
+        {
+            "country": "Germany",
+            "states": [
+                {"name": "Bayern", "cities": [{"name": "Munich", "pop": 12347}]},
+                {
+                    "name": "Nordrhein-Westfalen",
+                    "cities": [
+                        {"name": "Duesseldorf", "pop": 1238},
+                        {"name": "Koeln", "pop": 1239},
+                    ],
+                },
+            ],
+        },
+    ]
+
+
+@pytest.fixture
+def state_data():
+    return [
+        {
+            "counties": [
+                {"name": "Dade", "population": 12345},
+                {"name": "Broward", "population": 40000},
+                {"name": "Palm Beach", "population": 60000},
+            ],
+            "info": {"governor": "Rick Scott"},
+            "shortname": "FL",
+            "state": "Florida",
+        },
+        {
+            "counties": [
+                {"name": "Summit", "population": 1234},
+                {"name": "Cuyahoga", "population": 1337},
+            ],
+            "info": {"governor": "John Kasich"},
+            "shortname": "OH",
+            "state": "Ohio",
+        },
+    ]
+
+
+@pytest.fixture
+def author_missing_data():
+    return [
+        {"info": None},
+        {
+            "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+            "author_name": {"first": "Jane", "last_name": "Doe"},
+        },
+    ]
+
+
+@pytest.fixture
+def missing_metadata():
+    return [
+        {
+            "name": "Alice",
+            "addresses": [
+                {
+                    "number": 9562,
+                    "street": "Morris St.",
+                    "city": "Massillon",
+                    "state": "OH",
+                    "zip": 44646,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Foo York City"}]},
+        },
+        {
+            "addresses": [
+                {
+                    "number": 8449,
+                    "street": "Spring St.",
+                    "city": "Elizabethton",
+                    "state": "TN",
+                    "zip": 37643,
+                }
+            ],
+            "previous_residences": {"cities": [{"city_name": "Barmingham"}]},
+        },
+    ]
+
+
+@pytest.fixture
+def max_level_test_input_data():
+    """
+    input data to test json_normalize with max_level param
+    """
+    return [
+        {
+            "CreatedBy": {"Name": "User001"},
+            "Lookup": {
+                "TextField": "Some text",
+                "UserField": {"Id": "ID001", "Name": "Name001"},
+            },
+            "Image": {"a": "b"},
+        }
+    ]
+
+
+class TestJSONNormalize:
+    def test_simple_records(self):
+        recs = [
+            {"a": 1, "b": 2, "c": 3},
+            {"a": 4, "b": 5, "c": 6},
+            {"a": 7, "b": 8, "c": 9},
+            {"a": 10, "b": 11, "c": 12},
+        ]
+
+        result = json_normalize(recs)
+        expected = DataFrame(recs)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, "counties")
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(state_data, "counties", meta="state")
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_fields_list_type_normalize(self):
+        parse_metadata_fields_list_type = [
+            {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}
+        ]
+        result = json_normalize(
+            parse_metadata_fields_list_type,
+            record_path=["values"],
+            meta=[["metadata", "listdata"]],
+        )
+        expected = DataFrame(
+            {0: [1, 2, 3], "metadata.listdata": [[1, 2], [1, 2], [1, 2]]}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_empty_array(self):
+        result = json_normalize([])
+        expected = DataFrame()
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "data, record_path, exception_type",
+        [
+            ([{"a": 0}, {"a": 1}], None, None),
+            ({"a": [{"a": 0}, {"a": 1}]}, "a", None),
+            ('{"a": [{"a": 0}, {"a": 1}]}', None, NotImplementedError),
+            (None, None, NotImplementedError),
+        ],
+    )
+    def test_accepted_input(self, data, record_path, exception_type):
+        if exception_type is not None:
+            with pytest.raises(exception_type, match=""):
+                json_normalize(data, record_path=record_path)
+        else:
+            result = json_normalize(data, record_path=record_path)
+            expected = DataFrame([0, 1], columns=["a"])
+            tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize_with_separator(self, deep_nested):
+        # GH 14883
+        result = json_normalize({"A": {"A": 1, "B": 2}})
+        expected = DataFrame([[1, 2]], columns=["A.A", "A.B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="_")
+        expected = DataFrame([[1, 2]], columns=["A_A", "A_B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize({"A": {"A": 1, "B": 2}}, sep="\u03c3")
+        expected = DataFrame([[1, 2]], columns=["A\u03c3A", "A\u03c3B"])
+        tm.assert_frame_equal(result.reindex_like(expected), expected)
+
+        result = json_normalize(
+            deep_nested,
+            ["states", "cities"],
+            meta=["country", ["states", "name"]],
+            sep="_",
+        )
+        expected = Index(["name", "pop", "country", "states_name"]).sort_values()
+        assert result.columns.sort_values().equals(expected)
+
+    def test_normalize_with_multichar_separator(self):
+        # GH #43831
+        data = {"a": [1, 2], "b": {"b_1": 2, "b_2": (3, 4)}}
+        result = json_normalize(data, sep="__")
+        expected = DataFrame([[[1, 2], 2, (3, 4)]], columns=["a", "b__b_1", "b__b_2"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({"A": [1, 2]}, "A", record_prefix="Prefix.")
+        expected = DataFrame([[1], [2]], columns=["Prefix.0"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_nested_object_record_path(self):
+        # GH 22706
+        data = {
+            "state": "Florida",
+            "info": {
+                "governor": "Rick Scott",
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+        }
+        result = json_normalize(data, record_path=["info", "counties"])
+        expected = DataFrame(
+            [["Dade", 12345], ["Broward", 40000], ["Palm Beach", 60000]],
+            columns=["name", "population"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_more_deeply_nested(self, deep_nested):
+        result = json_normalize(
+            deep_nested, ["states", "cities"], meta=["country", ["states", "name"]]
+        )
+        ex_data = {
+            "country": ["USA"] * 4 + ["Germany"] * 3,
+            "states.name": [
+                "California",
+                "California",
+                "Ohio",
+                "Ohio",
+                "Bayern",
+                "Nordrhein-Westfalen",
+                "Nordrhein-Westfalen",
+            ],
+            "name": [
+                "San Francisco",
+                "Los Angeles",
+                "Columbus",
+                "Cleveland",
+                "Munich",
+                "Duesseldorf",
+                "Koeln",
+            ],
+            "pop": [12345, 12346, 1234, 1236, 12347, 1238, 1239],
+        }
+
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_shallow_nested(self):
+        data = [
+            {
+                "state": "Florida",
+                "shortname": "FL",
+                "info": {"governor": "Rick Scott"},
+                "counties": [
+                    {"name": "Dade", "population": 12345},
+                    {"name": "Broward", "population": 40000},
+                    {"name": "Palm Beach", "population": 60000},
+                ],
+            },
+            {
+                "state": "Ohio",
+                "shortname": "OH",
+                "info": {"governor": "John Kasich"},
+                "counties": [
+                    {"name": "Summit", "population": 1234},
+                    {"name": "Cuyahoga", "population": 1337},
+                ],
+            },
+        ]
+
+        result = json_normalize(
+            data, "counties", ["state", "shortname", ["info", "governor"]]
+        )
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL", "FL", "FL", "OH", "OH"],
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+            "population": [12345, 40000, 60000, 1234, 1337],
+        }
+        expected = DataFrame(ex_data, columns=result.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_nested_meta_path_with_nested_record_path(self, state_data):
+        # GH 27220
+        result = json_normalize(
+            data=state_data,
+            record_path=["counties"],
+            meta=["state", "shortname", ["info", "governor"]],
+            errors="ignore",
+        )
+
+        ex_data = {
+            "name": ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"],
+            "population": [12345, 40000, 60000, 1234, 1337],
+            "state": ["Florida"] * 3 + ["Ohio"] * 2,
+            "shortname": ["FL"] * 3 + ["OH"] * 2,
+            "info.governor": ["Rick Scott"] * 3 + ["John Kasich"] * 2,
+        }
+
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_meta_name_conflict(self):
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        msg = r"Conflicting metadata name (foo|bar), need distinguishing prefix"
+        with pytest.raises(ValueError, match=msg):
+            json_normalize(data, "data", meta=["foo", "bar"])
+
+        result = json_normalize(data, "data", meta=["foo", "bar"], meta_prefix="meta")
+
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+
+    def test_meta_parameter_not_modified(self):
+        # GH 18610
+        data = [
+            {
+                "foo": "hello",
+                "bar": "there",
+                "data": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        COLUMNS = ["foo", "bar"]
+        result = json_normalize(data, "data", meta=COLUMNS, meta_prefix="meta")
+
+        assert COLUMNS == ["foo", "bar"]
+        for val in ["metafoo", "metabar", "foo", "bar"]:
+            assert val in result
+
+    def test_record_prefix(self, state_data):
+        result = json_normalize(state_data[0], "counties")
+        expected = DataFrame(state_data[0]["counties"])
+        tm.assert_frame_equal(result, expected)
+
+        result = json_normalize(
+            state_data, "counties", meta="state", record_prefix="county_"
+        )
+
+        expected = []
+        for rec in state_data:
+            expected.extend(rec["counties"])
+        expected = DataFrame(expected)
+        expected = expected.rename(columns=lambda x: "county_" + x)
+        expected["state"] = np.array(["Florida", "Ohio"]).repeat([3, 2])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_non_ascii_key(self):
+        testjson = (
+            b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
+            b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
+        ).decode("utf8")
+
+        testdata = {
+            b"\xc3\x9cnic\xc3\xb8de".decode("utf8"): [0, 1],
+            "sub.A": [1, 3],
+            "sub.B": [2, 4],
+        }
+        expected = DataFrame(testdata)
+
+        result = json_normalize(json.loads(testjson))
+        tm.assert_frame_equal(result, expected)
+
+    def test_missing_field(self, author_missing_data):
+        # GH20030:
+        result = json_normalize(author_missing_data)
+        ex_data = [
+            {
+                "info": np.nan,
+                "info.created_at": np.nan,
+                "info.last_updated": np.nan,
+                "author_name.first": np.nan,
+                "author_name.last_name": np.nan,
+            },
+            {
+                "info": None,
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+            },
+        ]
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "max_level,expected",
+        [
+            (
+                0,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField.Id": "ID001",
+                        "UserField.Name": "Name001",
+                        "CreatedBy": {"Name": "User001"},
+                        "Image": {"a": "b"},
+                    },
+                ],
+            ),
+        ],
+    )
+    def test_max_level_with_records_path(self, max_level, expected):
+        # GH23843: Enhanced JSON normalize
+        test_input = [
+            {
+                "CreatedBy": {"Name": "User001"},
+                "Lookup": [
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                    {
+                        "TextField": "Some text",
+                        "UserField": {"Id": "ID001", "Name": "Name001"},
+                    },
+                ],
+                "Image": {"a": "b"},
+                "tags": [
+                    {"foo": "something", "bar": "else"},
+                    {"foo": "something2", "bar": "else2"},
+                ],
+            }
+        ]
+
+        result = json_normalize(
+            test_input,
+            record_path=["Lookup"],
+            meta=[["CreatedBy"], ["Image"]],
+            max_level=max_level,
+        )
+        expected_df = DataFrame(data=expected, columns=result.columns.values)
+        tm.assert_equal(expected_df, result)
+
+    def test_nested_flattening_consistent(self):
+        # see gh-21537
+        df1 = json_normalize([{"A": {"B": 1}}])
+        df2 = json_normalize({"dummy": [{"A": {"B": 1}}]}, "dummy")
+
+        # They should be the same.
+        tm.assert_frame_equal(df1, df2)
+
+    def test_nonetype_record_path(self, nulls_fixture):
+        # see gh-30148
+        # should not raise TypeError
+        result = json_normalize(
+            [
+                {"state": "Texas", "info": nulls_fixture},
+                {"state": "Florida", "info": [{"i": 2}]},
+            ],
+            record_path=["info"],
+        )
+        expected = DataFrame({"i": 2}, index=[0])
+        tm.assert_equal(result, expected)
+
+    @pytest.mark.parametrize("value", ["false", "true", "{}", "1", '"text"'])
+    def test_non_list_record_path_errors(self, value):
+        # see gh-30148, GH 26284
+        parsed_value = json.loads(value)
+        test_input = {"state": "Texas", "info": parsed_value}
+        test_path = "info"
+        msg = (
+            f"{test_input} has non list value {parsed_value} for path {test_path}. "
+            "Must be list or null."
+        )
+        with pytest.raises(TypeError, match=msg):
+            json_normalize([test_input], record_path=[test_path])
+
+    def test_meta_non_iterable(self):
+        # GH 31507
+        data = """[{"id": 99, "data": [{"one": 1, "two": 2}]}]"""
+
+        result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
+        expected = DataFrame(
+            {"one": [1], "two": [2], "id": np.array([99], dtype=object)}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_generator(self, state_data):
+        # GH35923 Fix pd.json_normalize to not skip the first element of a
+        # generator input
+        def generator_data():
+            yield from state_data[0]["counties"]
+
+        result = json_normalize(generator_data())
+        expected = DataFrame(state_data[0]["counties"])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_top_column_with_leading_underscore(self):
+        # 49861
+        data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
+        result = json_normalize(data, sep="_")
+        expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestNestedToRecord:
+    def test_flat_stays_flat(self):
+        recs = [{"flat1": 1, "flat2": 2}, {"flat3": 3, "flat2": 4}]
+        result = nested_to_record(recs)
+        expected = recs
+        assert result == expected
+
+    def test_one_level_deep_flattens(self):
+        data = {"flat1": 1, "dict1": {"c": 1, "d": 2}}
+
+        result = nested_to_record(data)
+        expected = {"dict1.c": 1, "dict1.d": 2, "flat1": 1}
+
+        assert result == expected
+
+    def test_nested_flattens(self):
+        data = {
+            "flat1": 1,
+            "dict1": {"c": 1, "d": 2},
+            "nested": {"e": {"c": 1, "d": 2}, "d": 2},
+        }
+
+        result = nested_to_record(data)
+        expected = {
+            "dict1.c": 1,
+            "dict1.d": 2,
+            "flat1": 1,
+            "nested.d": 2,
+            "nested.e.c": 1,
+            "nested.e.d": 2,
+        }
+
+        assert result == expected
+
+    def test_json_normalize_errors(self, missing_metadata):
+        # GH14583:
+        # If meta keys are not always present a new option to set
+        # errors='ignore' has been implemented
+
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path="addresses",
+                meta="name",
+                errors="raise",
+            )
+
+    def test_missing_meta(self, missing_metadata):
+        # GH25468
+        # If metadata is nullable with errors set to ignore, the null values
+        # should be numpy.nan values
+        result = json_normalize(
+            data=missing_metadata, record_path="addresses", meta="name", errors="ignore"
+        )
+        ex_data = [
+            [9562, "Morris St.", "Massillon", "OH", 44646, "Alice"],
+            [8449, "Spring St.", "Elizabethton", "TN", 37643, np.nan],
+        ]
+        columns = ["number", "street", "city", "state", "zip", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_missing_nested_meta(self):
+        # GH44312
+        # If errors="ignore" and nested metadata is null, we should return nan
+        data = {"meta": "foo", "nested_meta": None, "value": [{"rec": 1}, {"rec": 2}]}
+        result = json_normalize(
+            data,
+            record_path="value",
+            meta=["meta", ["nested_meta", "leaf"]],
+            errors="ignore",
+        )
+        ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
+        columns = ["rec", "meta", "nested_meta.leaf"]
+        expected = DataFrame(ex_data, columns=columns).astype(
+            {"nested_meta.leaf": object}
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # If errors="raise" and nested metadata is null, we should raise with the
+        # key of the first missing level
+        with pytest.raises(KeyError, match="'leaf' not found"):
+            json_normalize(
+                data,
+                record_path="value",
+                meta=["meta", ["nested_meta", "leaf"]],
+                errors="raise",
+            )
+
+    def test_missing_meta_multilevel_record_path_errors_raise(self, missing_metadata):
+        # GH41876
+        # Ensure errors='raise' works as intended even when a record_path of length
+        # greater than one is passed in
+        msg = (
+            "Key 'name' not found. To replace missing values of "
+            "'name' with np.nan, pass in errors='ignore'"
+        )
+        with pytest.raises(KeyError, match=msg):
+            json_normalize(
+                data=missing_metadata,
+                record_path=["previous_residences", "cities"],
+                meta="name",
+                errors="raise",
+            )
+
+    def test_missing_meta_multilevel_record_path_errors_ignore(self, missing_metadata):
+        # GH41876
+        # Ensure errors='ignore' works as intended even when a record_path of length
+        # greater than one is passed in
+        result = json_normalize(
+            data=missing_metadata,
+            record_path=["previous_residences", "cities"],
+            meta="name",
+            errors="ignore",
+        )
+        ex_data = [
+            ["Foo York City", "Alice"],
+            ["Barmingham", np.nan],
+        ]
+        columns = ["city_name", "name"]
+        expected = DataFrame(ex_data, columns=columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_donot_drop_nonevalues(self):
+        # GH21356
+        data = [
+            {"info": None, "author_name": {"first": "Smith", "last_name": "Appleseed"}},
+            {
+                "info": {"created_at": "11/08/1993", "last_updated": "26/05/2012"},
+                "author_name": {"first": "Jane", "last_name": "Doe"},
+            },
+        ]
+        result = nested_to_record(data)
+        expected = [
+            {
+                "info": None,
+                "author_name.first": "Smith",
+                "author_name.last_name": "Appleseed",
+            },
+            {
+                "author_name.first": "Jane",
+                "author_name.last_name": "Doe",
+                "info.created_at": "11/08/1993",
+                "info.last_updated": "26/05/2012",
+            },
+        ]
+
+        assert result == expected
+
+    def test_nonetype_top_level_bottom_level(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "country": {
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "id": None,
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    }
+                }
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+
+    def test_nonetype_multiple_levels(self):
+        # GH21158: If inner level json has a key with a null value
+        # make sure it does not do a new_d.pop twice and except
+        data = {
+            "id": None,
+            "location": {
+                "id": None,
+                "country": {
+                    "id": None,
+                    "state": {
+                        "id": None,
+                        "town.info": {
+                            "region": None,
+                            "x": 49.151580810546875,
+                            "y": -33.148521423339844,
+                            "z": 27.572303771972656,
+                        },
+                    },
+                },
+            },
+        }
+        result = nested_to_record(data)
+        expected = {
+            "id": None,
+            "location.id": None,
+            "location.country.id": None,
+            "location.country.state.id": None,
+            "location.country.state.town.info.region": None,
+            "location.country.state.town.info.x": 49.151580810546875,
+            "location.country.state.town.info.y": -33.148521423339844,
+            "location.country.state.town.info.z": 27.572303771972656,
+        }
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "max_level, expected",
+        [
+            (
+                None,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField.Id": "ID001",
+                        "Lookup.UserField.Name": "Name001",
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+            (
+                0,
+                [
+                    {
+                        "CreatedBy": {"Name": "User001"},
+                        "Lookup": {
+                            "TextField": "Some text",
+                            "UserField": {"Id": "ID001", "Name": "Name001"},
+                        },
+                        "Image": {"a": "b"},
+                    }
+                ],
+            ),
+            (
+                1,
+                [
+                    {
+                        "CreatedBy.Name": "User001",
+                        "Lookup.TextField": "Some text",
+                        "Lookup.UserField": {"Id": "ID001", "Name": "Name001"},
+                        "Image.a": "b",
+                    }
+                ],
+            ),
+        ],
+    )
+    def test_with_max_level(self, max_level, expected, max_level_test_input_data):
+        # GH23843: Enhanced JSON normalize
+        output = nested_to_record(max_level_test_input_data, max_level=max_level)
+        assert output == expected
+
+    def test_with_large_max_level(self):
+        # GH23843: Enhanced JSON normalize
+        max_level = 100
+        input_data = [
+            {
+                "CreatedBy": {
+                    "user": {
+                        "name": {"firstname": "Leo", "LastName": "Thomson"},
+                        "family_tree": {
+                            "father": {
+                                "name": "Father001",
+                                "father": {
+                                    "Name": "Father002",
+                                    "father": {
+                                        "name": "Father003",
+                                        "father": {"Name": "Father004"},
+                                    },
+                                },
+                            }
+                        },
+                    }
+                }
+            }
+        ]
+        expected = [
+            {
+                "CreatedBy.user.name.firstname": "Leo",
+                "CreatedBy.user.name.LastName": "Thomson",
+                "CreatedBy.user.family_tree.father.name": "Father001",
+                "CreatedBy.user.family_tree.father.father.Name": "Father002",
+                "CreatedBy.user.family_tree.father.father.father.name": "Father003",
+                "CreatedBy.user.family_tree.father.father.father.father.Name": "Father004",  # noqa: E501
+            }
+        ]
+        output = nested_to_record(input_data, max_level=max_level)
+        assert output == expected
+
+    def test_series_non_zero_index(self):
+        # GH 19020
+        data = {
+            0: {"id": 1, "name": "Foo", "elements": {"a": 1}},
+            1: {"id": 2, "name": "Bar", "elements": {"b": 2}},
+            2: {"id": 3, "name": "Baz", "elements": {"c": 3}},
+        }
+        s = Series(data)
+        s.index = [1, 2, 3]
+        result = json_normalize(s)
+        expected = DataFrame(
+            {
+                "id": [1, 2, 3],
+                "name": ["Foo", "Bar", "Baz"],
+                "elements.a": [1.0, np.nan, np.nan],
+                "elements.b": [np.nan, 2.0, np.nan],
+                "elements.c": [np.nan, np.nan, 3.0],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_pandas.py
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_readlines.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_readlines.py
@ -0,0 +1,543 @@
+from collections.abc import Iterator
+from io import StringIO
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    read_json,
+)
+import pandas._testing as tm
+
+from pandas.io.json._json import JsonReader
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
+)
+
+
+@pytest.fixture
+def lines_json_df():
+    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    return df.to_json(lines=True, orient="records")
+
+
+@pytest.fixture(params=["ujson", "pyarrow"])
+def engine(request):
+    if request.param == "pyarrow":
+        pytest.importorskip("pyarrow.json")
+    return request.param
+
+
+def test_read_jsonl():
+    # GH9180
+    result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
+    expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_engine_pyarrow(datapath, engine):
+    result = read_json(
+        datapath("io", "json", "data", "line_delimited.json"),
+        lines=True,
+        engine=engine,
+    )
+    expected = DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_datetime(request, engine):
+    # GH33787
+    if engine == "pyarrow":
+        # GH 48893
+        reason = "Pyarrow only supports a file path as an input and line delimited json"
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    df = DataFrame(
+        [([1, 2], ["2020-03-05", "2020-04-08T09:58:49+00:00"], "hector")],
+        columns=["accounts", "date", "name"],
+    )
+    json_line = df.to_json(lines=True, orient="records")
+
+    if engine == "pyarrow":
+        result = read_json(StringIO(json_line), engine=engine)
+    else:
+        result = read_json(StringIO(json_line), engine=engine)
+    expected = DataFrame(
+        [[1, "2020-03-05", "hector"], [2, "2020-04-08T09:58:49+00:00", "hector"]],
+        columns=["accounts", "date", "name"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_unicode_chars():
+    # GH15132: non-ascii unicode characters
+    # \u201d == RIGHT DOUBLE QUOTATION MARK
+
+    # simulate file handle
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    json = StringIO(json)
+    result = read_json(json, lines=True)
+    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+    # simulate string
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    result = read_json(StringIO(json), lines=True)
+    expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_to_jsonl():
+    # GH9180
+    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
+    assert result == expected
+
+    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
+    assert result == expected
+    tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+
+    # GH15096: escaped characters in columns and data
+    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
+    assert result == expected
+    tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
+
+
+def test_to_jsonl_count_new_lines():
+    # GH36888
+    df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
+    actual_new_lines_count = df.to_json(orient="records", lines=True).count("\n")
+    expected_new_lines_count = 2
+    assert actual_new_lines_count == expected_new_lines_count
+
+
+@pytest.mark.parametrize("chunksize", [1, 1.0])
+def test_readjson_chunks(request, lines_json_df, chunksize, engine):
+    # Basic test that read_json(chunks=True) gives the same result as
+    # read_json(chunks=False)
+    # GH17048: memory usage when lines=True
+
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    unchunked = read_json(StringIO(lines_json_df), lines=True)
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+    ) as reader:
+        chunked = pd.concat(reader)
+
+    tm.assert_frame_equal(chunked, unchunked)
+
+
+def test_readjson_chunksize_requires_lines(lines_json_df, engine):
+    msg = "chunksize can only be passed if lines=True"
+    with pytest.raises(ValueError, match=msg):
+        with read_json(
+            StringIO(lines_json_df), lines=False, chunksize=2, engine=engine
+        ) as _:
+            pass
+
+
+def test_readjson_chunks_series(request, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    # Test reading line-format JSON to Series with chunksize param
+    s = pd.Series({"A": 1, "B": 2})
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    unchunked = read_json(strio, lines=True, typ="Series", engine=engine)
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    with read_json(
+        strio, lines=True, typ="Series", chunksize=1, engine=engine
+    ) as reader:
+        chunked = pd.concat(reader)
+
+    tm.assert_series_equal(chunked, unchunked)
+
+
+def test_readjson_each_chunk(request, lines_json_df, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    # Other tests check that the final result of read_json(chunksize=True)
+    # is correct. This checks the intermediate chunks.
+    with read_json(
+        StringIO(lines_json_df), lines=True, chunksize=2, engine=engine
+    ) as reader:
+        chunks = list(reader)
+    assert chunks[0].shape == (2, 2)
+    assert chunks[1].shape == (1, 2)
+
+
+def test_readjson_chunks_from_file(request, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    with tm.ensure_clean("test.json") as path:
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        with read_json(path, lines=True, chunksize=1, engine=engine) as reader:
+            chunked = pd.concat(reader)
+        unchunked = read_json(path, lines=True, engine=engine)
+        tm.assert_frame_equal(unchunked, chunked)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_readjson_chunks_closes(chunksize):
+    with tm.ensure_clean("test.json") as path:
+        df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        reader = JsonReader(
+            path,
+            orient=None,
+            typ="frame",
+            dtype=True,
+            convert_axes=True,
+            convert_dates=True,
+            keep_default_dates=True,
+            precise_float=False,
+            date_unit=None,
+            encoding=None,
+            lines=True,
+            chunksize=chunksize,
+            compression=None,
+            nrows=None,
+        )
+        with reader:
+            reader.read()
+        assert (
+            reader.handles.handle.closed
+        ), f"didn't close stream with chunksize = {chunksize}"
+
+
+@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
+def test_readjson_invalid_chunksize(lines_json_df, chunksize, engine):
+    msg = r"'chunksize' must be an integer >=1"
+
+    with pytest.raises(ValueError, match=msg):
+        with read_json(
+            StringIO(lines_json_df), lines=True, chunksize=chunksize, engine=engine
+        ) as _:
+            pass
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 2])
+def test_readjson_chunks_multiple_empty_lines(chunksize):
+    j = """
+
+    {"A":1,"B":4}
+
+
+
+    {"A":2,"B":5}
+
+
+
+
+
+
+
+    {"A":3,"B":6}
+    """
+    orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
+    test = read_json(StringIO(j), lines=True, chunksize=chunksize)
+    if chunksize is not None:
+        with test:
+            test = pd.concat(test)
+    tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}")
+
+
+def test_readjson_unicode(request, monkeypatch, engine):
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    with tm.ensure_clean("test.json") as path:
+        monkeypatch.setattr("locale.getpreferredencoding", lambda do_setlocale: "cp949")
+        with open(path, "w", encoding="utf-8") as f:
+            f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}')
+
+        result = read_json(path, engine=engine)
+        expected = DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]})
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows", [1, 2])
+def test_readjson_nrows(nrows, engine):
+    # GH 33916
+    # Test reading line-format JSON to Series with nrows param
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+    result = read_json(StringIO(jsonl), lines=True, nrows=nrows)
+    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("nrows,chunksize", [(2, 2), (4, 2)])
+def test_readjson_nrows_chunks(request, nrows, chunksize, engine):
+    # GH 33916
+    # Test reading line-format JSON to Series with nrows and chunksize param
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+
+    if engine != "pyarrow":
+        with read_json(
+            StringIO(jsonl), lines=True, nrows=nrows, chunksize=chunksize, engine=engine
+        ) as reader:
+            chunked = pd.concat(reader)
+    else:
+        with read_json(
+            jsonl, lines=True, nrows=nrows, chunksize=chunksize, engine=engine
+        ) as reader:
+            chunked = pd.concat(reader)
+    expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
+    tm.assert_frame_equal(chunked, expected)
+
+
+def test_readjson_nrows_requires_lines(engine):
+    # GH 33916
+    # Test ValueError raised if nrows is set without setting lines in read_json
+    jsonl = """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}"""
+    msg = "nrows can only be passed if lines=True"
+    with pytest.raises(ValueError, match=msg):
+        read_json(jsonl, lines=False, nrows=2, engine=engine)
+
+
+def test_readjson_lines_chunks_fileurl(request, datapath, engine):
+    # GH 27135
+    # Test reading line-format JSON from file url
+    if engine == "pyarrow":
+        # GH 48893
+        reason = (
+            "Pyarrow only supports a file path as an input and line delimited json"
+            "and doesn't support chunksize parameter."
+        )
+        request.applymarker(pytest.mark.xfail(reason=reason, raises=ValueError))
+
+    df_list_expected = [
+        DataFrame([[1, 2]], columns=["a", "b"], index=[0]),
+        DataFrame([[3, 4]], columns=["a", "b"], index=[1]),
+        DataFrame([[5, 6]], columns=["a", "b"], index=[2]),
+    ]
+    os_path = datapath("io", "json", "data", "line_delimited.json")
+    file_url = Path(os_path).as_uri()
+    with read_json(file_url, lines=True, chunksize=1, engine=engine) as url_reader:
+        for index, chuck in enumerate(url_reader):
+            tm.assert_frame_equal(chuck, df_list_expected[index])
+
+
+def test_chunksize_is_incremental():
+    # See https://github.com/pandas-dev/pandas/issues/34548
+    jsonl = (
+        """{"a": 1, "b": 2}
+        {"a": 3, "b": 4}
+        {"a": 5, "b": 6}
+        {"a": 7, "b": 8}\n"""
+        * 1000
+    )
+
+    class MyReader:
+        def __init__(self, contents) -> None:
+            self.read_count = 0
+            self.stringio = StringIO(contents)
+
+        def read(self, *args):
+            self.read_count += 1
+            return self.stringio.read(*args)
+
+        def __iter__(self) -> Iterator:
+            self.read_count += 1
+            return iter(self.stringio)
+
+    reader = MyReader(jsonl)
+    assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
+    assert reader.read_count > 10
+
+
+@pytest.mark.parametrize("orient_", ["split", "index", "table"])
+def test_to_json_append_orient(orient_):
+    # GH 35849
+    # Test ValueError when orient is not 'records'
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        r"mode='a' \(append\) is only supported when "
+        "lines is True and orient is 'records'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", orient=orient_)
+
+
+def test_to_json_append_lines():
+    # GH 35849
+    # Test ValueError when lines is not True
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        r"mode='a' \(append\) is only supported when "
+        "lines is True and orient is 'records'"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode="a", lines=False, orient="records")
+
+
+@pytest.mark.parametrize("mode_", ["r", "x"])
+def test_to_json_append_mode(mode_):
+    # GH 35849
+    # Test ValueError when mode is not supported option
+    df = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    msg = (
+        f"mode={mode_} is not a valid option."
+        "Only 'w' and 'a' are currently supported."
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.to_json(mode=mode_, lines=False, orient="records")
+
+
+def test_to_json_append_output_consistent_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing same columns, new rows
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+
+    expected = DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_inconsistent_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing one new column, one old column, new rows
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+
+    expected = DataFrame(
+        {
+            "col1": [1, 2, None, None],
+            "col2": ["a", "b", "e", "f"],
+            "col3": [np.nan, np.nan, "!", "#"],
+        }
+    )
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_different_columns():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing same, differing and new columns
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+    df4 = DataFrame({"col4": [True, False]})
+
+    expected = DataFrame(
+        {
+            "col1": [1, 2, 3, 4, None, None, None, None],
+            "col2": ["a", "b", "c", "d", "e", "f", np.nan, np.nan],
+            "col3": [np.nan, np.nan, np.nan, np.nan, "!", "#", np.nan, np.nan],
+            "col4": [None, None, None, None, None, None, True, False],
+        }
+    ).astype({"col4": "float"})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df1.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df4.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
+
+
+def test_to_json_append_output_different_columns_reordered():
+    # GH 35849
+    # Testing that resulting output reads in as expected.
+    # Testing specific result column order.
+    df1 = DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+    df2 = DataFrame({"col1": [3, 4], "col2": ["c", "d"]})
+    df3 = DataFrame({"col2": ["e", "f"], "col3": ["!", "#"]})
+    df4 = DataFrame({"col4": [True, False]})
+
+    # df4, df3, df2, df1 (in that order)
+    expected = DataFrame(
+        {
+            "col4": [True, False, None, None, None, None, None, None],
+            "col2": [np.nan, np.nan, "e", "f", "c", "d", "a", "b"],
+            "col3": [np.nan, np.nan, "!", "#", np.nan, np.nan, np.nan, np.nan],
+            "col1": [None, None, None, None, 3, 4, 1, 2],
+        }
+    ).astype({"col4": "float"})
+    with tm.ensure_clean("test.json") as path:
+        # Save dataframes to the same file
+        df4.to_json(path, mode="a", lines=True, orient="records")
+        df3.to_json(path, mode="a", lines=True, orient="records")
+        df2.to_json(path, mode="a", lines=True, orient="records")
+        df1.to_json(path, mode="a", lines=True, orient="records")
+
+        # Read path file
+        result = read_json(path, lines=True)
+        tm.assert_frame_equal(result, expected)
--- a/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py
+++ b/lib/python3.11/site-packages/pandas/tests/io/json/test_ujson.py