done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/io/json/init.py
+++ b/lib/python3.11/site-packages/pandas/io/json/init.py
@ -0,0 +1,15 @@
+from pandas.io.json._json import (
+    read_json,
+    to_json,
+    ujson_dumps,
+    ujson_loads,
+)
+from pandas.io.json._table_schema import build_table_schema
+
+__all__ = [
+    "ujson_dumps",
+    "ujson_loads",
+    "read_json",
+    "to_json",
+    "build_table_schema",
+]
--- a/lib/python3.11/site-packages/pandas/io/json/_json.py
+++ b/lib/python3.11/site-packages/pandas/io/json/_json.py
--- a/lib/python3.11/site-packages/pandas/io/json/_normalize.py
+++ b/lib/python3.11/site-packages/pandas/io/json/_normalize.py
@ -0,0 +1,544 @@
+# ---------------------------------------------------------------------
+# JSON normalization routines
+from __future__ import annotations
+
+from collections import (
+    abc,
+    defaultdict,
+)
+import copy
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    DefaultDict,
+)
+
+import numpy as np
+
+from pandas._libs.writers import convert_json_to_lines
+
+import pandas as pd
+from pandas import DataFrame
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from pandas._typing import (
+        IgnoreRaise,
+        Scalar,
+    )
+
+
+def convert_to_line_delimits(s: str) -> str:
+    """
+    Helper function that converts JSON lists to line delimited JSON.
+    """
+    # Determine we have a JSON list to turn to lines otherwise just return the
+    # json object, only lists can
+    if not s[0] == "[" and s[-1] == "]":
+        return s
+    s = s[1:-1]
+
+    return convert_json_to_lines(s)
+
+
+def nested_to_record(
+    ds,
+    prefix: str = "",
+    sep: str = ".",
+    level: int = 0,
+    max_level: int | None = None,
+):
+    """
+    A simplified json_normalize
+
+    Converts a nested dict into a flat dict ("record"), unlike json_normalize,
+    it does not attempt to extract a subset of the data.
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+    prefix: the prefix, optional, default: ""
+    sep : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+    level: int, optional, default: 0
+        The number of levels in the json string.
+
+    max_level: int, optional, default: None
+        The max depth to normalize.
+
+    Returns
+    -------
+    d - dict or list of dicts, matching `ds`
+
+    Examples
+    --------
+    >>> nested_to_record(
+    ...     dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
+    ... )
+    {\
+'flat1': 1, \
+'dict1.c': 1, \
+'dict1.d': 2, \
+'nested.e.c': 1, \
+'nested.e.d': 2, \
+'nested.d': 2\
+}
+    """
+    singleton = False
+    if isinstance(ds, dict):
+        ds = [ds]
+        singleton = True
+    new_ds = []
+    for d in ds:
+        new_d = copy.deepcopy(d)
+        for k, v in d.items():
+            # each key gets renamed with prefix
+            if not isinstance(k, str):
+                k = str(k)
+            if level == 0:
+                newkey = k
+            else:
+                newkey = prefix + sep + k
+
+            # flatten if type is dict and
+            # current dict level  < maximum level provided and
+            # only dicts gets recurse-flattened
+            # only at level>1 do we rename the rest of the keys
+            if not isinstance(v, dict) or (
+                max_level is not None and level >= max_level
+            ):
+                if level != 0:  # so we skip copying for top level, common case
+                    v = new_d.pop(k)
+                    new_d[newkey] = v
+                continue
+
+            v = new_d.pop(k)
+            new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level))
+        new_ds.append(new_d)
+
+    if singleton:
+        return new_ds[0]
+    return new_ds
+
+
+def _normalise_json(
+    data: Any,
+    key_string: str,
+    normalized_dict: dict[str, Any],
+    separator: str,
+) -> dict[str, Any]:
+    """
+    Main recursive function
+    Designed for the most basic use case of pd.json_normalize(data)
+    intended as a performance improvement, see #15621
+
+    Parameters
+    ----------
+    data : Any
+        Type dependent on types contained within nested Json
+    key_string : str
+        New key (with separator(s) in) for data
+    normalized_dict : dict
+        The new normalized/flattened Json dict
+    separator : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+    """
+    if isinstance(data, dict):
+        for key, value in data.items():
+            new_key = f"{key_string}{separator}{key}"
+
+            if not key_string:
+                new_key = new_key.removeprefix(separator)
+
+            _normalise_json(
+                data=value,
+                key_string=new_key,
+                normalized_dict=normalized_dict,
+                separator=separator,
+            )
+    else:
+        normalized_dict[key_string] = data
+    return normalized_dict
+
+
+def _normalise_json_ordered(data: dict[str, Any], separator: str) -> dict[str, Any]:
+    """
+    Order the top level keys and then recursively go to depth
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+    separator : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+    Returns
+    -------
+    dict or list of dicts, matching `normalised_json_object`
+    """
+    top_dict_ = {k: v for k, v in data.items() if not isinstance(v, dict)}
+    nested_dict_ = _normalise_json(
+        data={k: v for k, v in data.items() if isinstance(v, dict)},
+        key_string="",
+        normalized_dict={},
+        separator=separator,
+    )
+    return {**top_dict_, **nested_dict_}
+
+
+def _simple_json_normalize(
+    ds: dict | list[dict],
+    sep: str = ".",
+) -> dict | list[dict] | Any:
+    """
+    A optimized basic json_normalize
+
+    Converts a nested dict into a flat dict ("record"), unlike
+    json_normalize and nested_to_record it doesn't do anything clever.
+    But for the most basic use cases it enhances performance.
+    E.g. pd.json_normalize(data)
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+    sep : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+    Returns
+    -------
+    frame : DataFrame
+    d - dict or list of dicts, matching `normalised_json_object`
+
+    Examples
+    --------
+    >>> _simple_json_normalize(
+    ...     {
+    ...         "flat1": 1,
+    ...         "dict1": {"c": 1, "d": 2},
+    ...         "nested": {"e": {"c": 1, "d": 2}, "d": 2},
+    ...     }
+    ... )
+    {\
+'flat1': 1, \
+'dict1.c': 1, \
+'dict1.d': 2, \
+'nested.e.c': 1, \
+'nested.e.d': 2, \
+'nested.d': 2\
+}
+
+    """
+    normalised_json_object = {}
+    # expect a dictionary, as most jsons are. However, lists are perfectly valid
+    if isinstance(ds, dict):
+        normalised_json_object = _normalise_json_ordered(data=ds, separator=sep)
+    elif isinstance(ds, list):
+        normalised_json_list = [_simple_json_normalize(row, sep=sep) for row in ds]
+        return normalised_json_list
+    return normalised_json_object
+
+
+def json_normalize(
+    data: dict | list[dict],
+    record_path: str | list | None = None,
+    meta: str | list[str | list[str]] | None = None,
+    meta_prefix: str | None = None,
+    record_prefix: str | None = None,
+    errors: IgnoreRaise = "raise",
+    sep: str = ".",
+    max_level: int | None = None,
+) -> DataFrame:
+    """
+    Normalize semi-structured JSON data into a flat table.
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+        Unserialized JSON objects.
+    record_path : str or list of str, default None
+        Path in each object to list of records. If not passed, data will be
+        assumed to be an array of records.
+    meta : list of paths (str or list of str), default None
+        Fields to use as metadata for each record in resulting table.
+    meta_prefix : str, default None
+        If True, prefix records with dotted (?) path, e.g. foo.bar.field if
+        meta is ['foo', 'bar'].
+    record_prefix : str, default None
+        If True, prefix records with dotted (?) path, e.g. foo.bar.field if
+        path to records is ['foo', 'bar'].
+    errors : {'raise', 'ignore'}, default 'raise'
+        Configures error handling.
+
+        * 'ignore' : will ignore KeyError if keys listed in meta are not
+          always present.
+        * 'raise' : will raise KeyError if keys listed in meta are not
+          always present.
+    sep : str, default '.'
+        Nested records will generate names separated by sep.
+        e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar.
+    max_level : int, default None
+        Max number of levels(depth of dict) to normalize.
+        if None, normalizes all levels.
+
+    Returns
+    -------
+    frame : DataFrame
+    Normalize semi-structured JSON data into a flat table.
+
+    Examples
+    --------
+    >>> data = [
+    ...     {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
+    ...     {"name": {"given": "Mark", "family": "Regner"}},
+    ...     {"id": 2, "name": "Faye Raker"},
+    ... ]
+    >>> pd.json_normalize(data)
+        id name.first name.last name.given name.family        name
+    0  1.0     Coleen      Volk        NaN         NaN         NaN
+    1  NaN        NaN       NaN       Mark      Regner         NaN
+    2  2.0        NaN       NaN        NaN         NaN  Faye Raker
+
+    >>> data = [
+    ...     {
+    ...         "id": 1,
+    ...         "name": "Cole Volk",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ...     {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
+    ...     {
+    ...         "id": 2,
+    ...         "name": "Faye Raker",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ... ]
+    >>> pd.json_normalize(data, max_level=0)
+        id        name                        fitness
+    0  1.0   Cole Volk  {'height': 130, 'weight': 60}
+    1  NaN    Mark Reg  {'height': 130, 'weight': 60}
+    2  2.0  Faye Raker  {'height': 130, 'weight': 60}
+
+    Normalizes nested data up to level 1.
+
+    >>> data = [
+    ...     {
+    ...         "id": 1,
+    ...         "name": "Cole Volk",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ...     {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
+    ...     {
+    ...         "id": 2,
+    ...         "name": "Faye Raker",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ... ]
+    >>> pd.json_normalize(data, max_level=1)
+        id        name  fitness.height  fitness.weight
+    0  1.0   Cole Volk             130              60
+    1  NaN    Mark Reg             130              60
+    2  2.0  Faye Raker             130              60
+
+    >>> data = [
+    ...     {
+    ...         "state": "Florida",
+    ...         "shortname": "FL",
+    ...         "info": {"governor": "Rick Scott"},
+    ...         "counties": [
+    ...             {"name": "Dade", "population": 12345},
+    ...             {"name": "Broward", "population": 40000},
+    ...             {"name": "Palm Beach", "population": 60000},
+    ...         ],
+    ...     },
+    ...     {
+    ...         "state": "Ohio",
+    ...         "shortname": "OH",
+    ...         "info": {"governor": "John Kasich"},
+    ...         "counties": [
+    ...             {"name": "Summit", "population": 1234},
+    ...             {"name": "Cuyahoga", "population": 1337},
+    ...         ],
+    ...     },
+    ... ]
+    >>> result = pd.json_normalize(
+    ...     data, "counties", ["state", "shortname", ["info", "governor"]]
+    ... )
+    >>> result
+             name  population    state shortname info.governor
+    0        Dade       12345   Florida    FL    Rick Scott
+    1     Broward       40000   Florida    FL    Rick Scott
+    2  Palm Beach       60000   Florida    FL    Rick Scott
+    3      Summit        1234   Ohio       OH    John Kasich
+    4    Cuyahoga        1337   Ohio       OH    John Kasich
+
+    >>> data = {"A": [1, 2]}
+    >>> pd.json_normalize(data, "A", record_prefix="Prefix.")
+        Prefix.0
+    0          1
+    1          2
+
+    Returns normalized data with columns prefixed with the given string.
+    """
+
+    def _pull_field(
+        js: dict[str, Any], spec: list | str, extract_record: bool = False
+    ) -> Scalar | Iterable:
+        """Internal function to pull field"""
+        result = js
+        try:
+            if isinstance(spec, list):
+                for field in spec:
+                    if result is None:
+                        raise KeyError(field)
+                    result = result[field]
+            else:
+                result = result[spec]
+        except KeyError as e:
+            if extract_record:
+                raise KeyError(
+                    f"Key {e} not found. If specifying a record_path, all elements of "
+                    f"data should have the path."
+                ) from e
+            if errors == "ignore":
+                return np.nan
+            else:
+                raise KeyError(
+                    f"Key {e} not found. To replace missing values of {e} with "
+                    f"np.nan, pass in errors='ignore'"
+                ) from e
+
+        return result
+
+    def _pull_records(js: dict[str, Any], spec: list | str) -> list:
+        """
+        Internal function to pull field for records, and similar to
+        _pull_field, but require to return list. And will raise error
+        if has non iterable value.
+        """
+        result = _pull_field(js, spec, extract_record=True)
+
+        # GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not
+        # null, otherwise return an empty list
+        if not isinstance(result, list):
+            if pd.isnull(result):
+                result = []
+            else:
+                raise TypeError(
+                    f"{js} has non list value {result} for path {spec}. "
+                    "Must be list or null."
+                )
+        return result
+
+    if isinstance(data, list) and not data:
+        return DataFrame()
+    elif isinstance(data, dict):
+        # A bit of a hackjob
+        data = [data]
+    elif isinstance(data, abc.Iterable) and not isinstance(data, str):
+        # GH35923 Fix pd.json_normalize to not skip the first element of a
+        # generator input
+        data = list(data)
+    else:
+        raise NotImplementedError
+
+    # check to see if a simple recursive function is possible to
+    # improve performance (see #15621) but only for cases such
+    # as pd.Dataframe(data) or pd.Dataframe(data, sep)
+    if (
+        record_path is None
+        and meta is None
+        and meta_prefix is None
+        and record_prefix is None
+        and max_level is None
+    ):
+        return DataFrame(_simple_json_normalize(data, sep=sep))
+
+    if record_path is None:
+        if any([isinstance(x, dict) for x in y.values()] for y in data):
+            # naive normalization, this is idempotent for flat records
+            # and potentially will inflate the data considerably for
+            # deeply nested structures:
+            #  {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
+            #
+            # TODO: handle record value which are lists, at least error
+            #       reasonably
+            data = nested_to_record(data, sep=sep, max_level=max_level)
+        return DataFrame(data)
+    elif not isinstance(record_path, list):
+        record_path = [record_path]
+
+    if meta is None:
+        meta = []
+    elif not isinstance(meta, list):
+        meta = [meta]
+
+    _meta = [m if isinstance(m, list) else [m] for m in meta]
+
+    # Disastrously inefficient for now
+    records: list = []
+    lengths = []
+
+    meta_vals: DefaultDict = defaultdict(list)
+    meta_keys = [sep.join(val) for val in _meta]
+
+    def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
+        if isinstance(data, dict):
+            data = [data]
+        if len(path) > 1:
+            for obj in data:
+                for val, key in zip(_meta, meta_keys):
+                    if level + 1 == len(val):
+                        seen_meta[key] = _pull_field(obj, val[-1])
+
+                _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
+        else:
+            for obj in data:
+                recs = _pull_records(obj, path[0])
+                recs = [
+                    nested_to_record(r, sep=sep, max_level=max_level)
+                    if isinstance(r, dict)
+                    else r
+                    for r in recs
+                ]
+
+                # For repeating the metadata later
+                lengths.append(len(recs))
+                for val, key in zip(_meta, meta_keys):
+                    if level + 1 > len(val):
+                        meta_val = seen_meta[key]
+                    else:
+                        meta_val = _pull_field(obj, val[level:])
+                    meta_vals[key].append(meta_val)
+                records.extend(recs)
+
+    _recursive_extract(data, record_path, {}, level=0)
+
+    result = DataFrame(records)
+
+    if record_prefix is not None:
+        result = result.rename(columns=lambda x: f"{record_prefix}{x}")
+
+    # Data types, a problem
+    for k, v in meta_vals.items():
+        if meta_prefix is not None:
+            k = meta_prefix + k
+
+        if k in result:
+            raise ValueError(
+                f"Conflicting metadata name {k}, need distinguishing prefix "
+            )
+        # GH 37782
+
+        values = np.array(v, dtype=object)
+
+        if values.ndim > 1:
+            # GH 37782
+            values = np.empty((len(v),), dtype=object)
+            for i, v in enumerate(v):
+                values[i] = v
+
+        result[k] = values.repeat(lengths)
+    return result
--- a/lib/python3.11/site-packages/pandas/io/json/_table_schema.py
+++ b/lib/python3.11/site-packages/pandas/io/json/_table_schema.py
@ -0,0 +1,387 @@
+"""
+Table Schema builders
+
+https://specs.frictionlessdata.io/table-schema/
+"""
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    cast,
+)
+import warnings
+
+from pandas._libs import lib
+from pandas._libs.json import ujson_loads
+from pandas._libs.tslibs import timezones
+from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.base import _registry as registry
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_integer_dtype,
+    is_numeric_dtype,
+    is_string_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    ExtensionDtype,
+    PeriodDtype,
+)
+
+from pandas import DataFrame
+import pandas.core.common as com
+
+from pandas.tseries.frequencies import to_offset
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        DtypeObj,
+        JSONSerializable,
+    )
+
+    from pandas import Series
+    from pandas.core.indexes.multi import MultiIndex
+
+
+TABLE_SCHEMA_VERSION = "1.4.0"
+
+
+def as_json_table_type(x: DtypeObj) -> str:
+    """
+    Convert a NumPy / pandas type to its corresponding json_table.
+
+    Parameters
+    ----------
+    x : np.dtype or ExtensionDtype
+
+    Returns
+    -------
+    str
+        the Table Schema data types
+
+    Notes
+    -----
+    This table shows the relationship between NumPy / pandas dtypes,
+    and Table Schema dtypes.
+
+    ==============  =================
+    Pandas type     Table Schema type
+    ==============  =================
+    int64           integer
+    float64         number
+    bool            boolean
+    datetime64[ns]  datetime
+    timedelta64[ns] duration
+    object          str
+    categorical     any
+    =============== =================
+    """
+    if is_integer_dtype(x):
+        return "integer"
+    elif is_bool_dtype(x):
+        return "boolean"
+    elif is_numeric_dtype(x):
+        return "number"
+    elif lib.is_np_dtype(x, "M") or isinstance(x, (DatetimeTZDtype, PeriodDtype)):
+        return "datetime"
+    elif lib.is_np_dtype(x, "m"):
+        return "duration"
+    elif is_string_dtype(x):
+        return "string"
+    else:
+        return "any"
+
+
+def set_default_names(data):
+    """Sets index names to 'index' for regular, or 'level_x' for Multi"""
+    if com.all_not_none(*data.index.names):
+        nms = data.index.names
+        if len(nms) == 1 and data.index.name == "index":
+            warnings.warn(
+                "Index name of 'index' is not round-trippable.",
+                stacklevel=find_stack_level(),
+            )
+        elif len(nms) > 1 and any(x.startswith("level_") for x in nms):
+            warnings.warn(
+                "Index names beginning with 'level_' are not round-trippable.",
+                stacklevel=find_stack_level(),
+            )
+        return data
+
+    data = data.copy()
+    if data.index.nlevels > 1:
+        data.index.names = com.fill_missing_names(data.index.names)
+    else:
+        data.index.name = data.index.name or "index"
+    return data
+
+
+def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
+    dtype = arr.dtype
+    name: JSONSerializable
+    if arr.name is None:
+        name = "values"
+    else:
+        name = arr.name
+    field: dict[str, JSONSerializable] = {
+        "name": name,
+        "type": as_json_table_type(dtype),
+    }
+
+    if isinstance(dtype, CategoricalDtype):
+        cats = dtype.categories
+        ordered = dtype.ordered
+
+        field["constraints"] = {"enum": list(cats)}
+        field["ordered"] = ordered
+    elif isinstance(dtype, PeriodDtype):
+        field["freq"] = dtype.freq.freqstr
+    elif isinstance(dtype, DatetimeTZDtype):
+        if timezones.is_utc(dtype.tz):
+            # timezone.utc has no "zone" attr
+            field["tz"] = "UTC"
+        else:
+            # error: "tzinfo" has no attribute "zone"
+            field["tz"] = dtype.tz.zone  # type: ignore[attr-defined]
+    elif isinstance(dtype, ExtensionDtype):
+        field["extDtype"] = dtype.name
+    return field
+
+
+def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
+    """
+    Converts a JSON field descriptor into its corresponding NumPy / pandas type
+
+    Parameters
+    ----------
+    field
+        A JSON field descriptor
+
+    Returns
+    -------
+    dtype
+
+    Raises
+    ------
+    ValueError
+        If the type of the provided field is unknown or currently unsupported
+
+    Examples
+    --------
+    >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"})
+    'int64'
+
+    >>> convert_json_field_to_pandas_type(
+    ...     {
+    ...         "name": "a_categorical",
+    ...         "type": "any",
+    ...         "constraints": {"enum": ["a", "b", "c"]},
+    ...         "ordered": True,
+    ...     }
+    ... )
+    CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=object)
+
+    >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
+    'datetime64[ns]'
+
+    >>> convert_json_field_to_pandas_type(
+    ...     {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"}
+    ... )
+    'datetime64[ns, US/Central]'
+    """
+    typ = field["type"]
+    if typ == "string":
+        return field.get("extDtype", None)
+    elif typ == "integer":
+        return field.get("extDtype", "int64")
+    elif typ == "number":
+        return field.get("extDtype", "float64")
+    elif typ == "boolean":
+        return field.get("extDtype", "bool")
+    elif typ == "duration":
+        return "timedelta64"
+    elif typ == "datetime":
+        if field.get("tz"):
+            return f"datetime64[ns, {field['tz']}]"
+        elif field.get("freq"):
+            # GH#9586 rename frequency M to ME for offsets
+            offset = to_offset(field["freq"])
+            freq_n, freq_name = offset.n, offset.name
+            freq = freq_to_period_freqstr(freq_n, freq_name)
+            # GH#47747 using datetime over period to minimize the change surface
+            return f"period[{freq}]"
+        else:
+            return "datetime64[ns]"
+    elif typ == "any":
+        if "constraints" in field and "ordered" in field:
+            return CategoricalDtype(
+                categories=field["constraints"]["enum"], ordered=field["ordered"]
+            )
+        elif "extDtype" in field:
+            return registry.find(field["extDtype"])
+        else:
+            return "object"
+
+    raise ValueError(f"Unsupported or invalid field type: {typ}")
+
+
+def build_table_schema(
+    data: DataFrame | Series,
+    index: bool = True,
+    primary_key: bool | None = None,
+    version: bool = True,
+) -> dict[str, JSONSerializable]:
+    """
+    Create a Table schema from ``data``.
+
+    Parameters
+    ----------
+    data : Series, DataFrame
+    index : bool, default True
+        Whether to include ``data.index`` in the schema.
+    primary_key : bool or None, default True
+        Column names to designate as the primary key.
+        The default `None` will set `'primaryKey'` to the index
+        level or levels if the index is unique.
+    version : bool, default True
+        Whether to include a field `pandas_version` with the version
+        of pandas that last revised the table schema. This version
+        can be different from the installed pandas version.
+
+    Returns
+    -------
+    dict
+
+    Notes
+    -----
+    See `Table Schema
+    <https://pandas.pydata.org/docs/user_guide/io.html#table-schema>`__ for
+    conversion types.
+    Timedeltas as converted to ISO8601 duration format with
+    9 decimal places after the seconds field for nanosecond precision.
+
+    Categoricals are converted to the `any` dtype, and use the `enum` field
+    constraint to list the allowed values. The `ordered` attribute is included
+    in an `ordered` field.
+
+    Examples
+    --------
+    >>> from pandas.io.json._table_schema import build_table_schema
+    >>> df = pd.DataFrame(
+    ...     {'A': [1, 2, 3],
+    ...      'B': ['a', 'b', 'c'],
+    ...      'C': pd.date_range('2016-01-01', freq='d', periods=3),
+    ...     }, index=pd.Index(range(3), name='idx'))
+    >>> build_table_schema(df)
+    {'fields': \
+[{'name': 'idx', 'type': 'integer'}, \
+{'name': 'A', 'type': 'integer'}, \
+{'name': 'B', 'type': 'string'}, \
+{'name': 'C', 'type': 'datetime'}], \
+'primaryKey': ['idx'], \
+'pandas_version': '1.4.0'}
+    """
+    if index is True:
+        data = set_default_names(data)
+
+    schema: dict[str, Any] = {}
+    fields = []
+
+    if index:
+        if data.index.nlevels > 1:
+            data.index = cast("MultiIndex", data.index)
+            for level, name in zip(data.index.levels, data.index.names):
+                new_field = convert_pandas_type_to_json_field(level)
+                new_field["name"] = name
+                fields.append(new_field)
+        else:
+            fields.append(convert_pandas_type_to_json_field(data.index))
+
+    if data.ndim > 1:
+        for column, s in data.items():
+            fields.append(convert_pandas_type_to_json_field(s))
+    else:
+        fields.append(convert_pandas_type_to_json_field(data))
+
+    schema["fields"] = fields
+    if index and data.index.is_unique and primary_key is None:
+        if data.index.nlevels == 1:
+            schema["primaryKey"] = [data.index.name]
+        else:
+            schema["primaryKey"] = data.index.names
+    elif primary_key is not None:
+        schema["primaryKey"] = primary_key
+
+    if version:
+        schema["pandas_version"] = TABLE_SCHEMA_VERSION
+    return schema
+
+
+def parse_table_schema(json, precise_float: bool) -> DataFrame:
+    """
+    Builds a DataFrame from a given schema
+
+    Parameters
+    ----------
+    json :
+        A JSON table schema
+    precise_float : bool
+        Flag controlling precision when decoding string to double values, as
+        dictated by ``read_json``
+
+    Returns
+    -------
+    df : DataFrame
+
+    Raises
+    ------
+    NotImplementedError
+        If the JSON table schema contains either timezone or timedelta data
+
+    Notes
+    -----
+        Because :func:`DataFrame.to_json` uses the string 'index' to denote a
+        name-less :class:`Index`, this function sets the name of the returned
+        :class:`DataFrame` to ``None`` when said string is encountered with a
+        normal :class:`Index`. For a :class:`MultiIndex`, the same limitation
+        applies to any strings beginning with 'level_'. Therefore, an
+        :class:`Index` name of 'index'  and :class:`MultiIndex` names starting
+        with 'level_' are not supported.
+
+    See Also
+    --------
+    build_table_schema : Inverse function.
+    pandas.read_json
+    """
+    table = ujson_loads(json, precise_float=precise_float)
+    col_order = [field["name"] for field in table["schema"]["fields"]]
+    df = DataFrame(table["data"], columns=col_order)[col_order]
+
+    dtypes = {
+        field["name"]: convert_json_field_to_pandas_type(field)
+        for field in table["schema"]["fields"]
+    }
+
+    # No ISO constructor for Timedelta as of yet, so need to raise
+    if "timedelta64" in dtypes.values():
+        raise NotImplementedError(
+            'table="orient" can not yet read ISO-formatted Timedelta data'
+        )
+
+    df = df.astype(dtypes)
+
+    if "primaryKey" in table["schema"]:
+        df = df.set_index(table["schema"]["primaryKey"])
+        if len(df.index.names) == 1:
+            if df.index.name == "index":
+                df.index.name = None
+        else:
+            df.index.names = [
+                None if x.startswith("level_") else x for x in df.index.names
+            ]
+
+    return df