done

2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions
--- a/lib/python3.11/site-packages/pandas/core/reshape/init.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/init.py
--- a/lib/python3.11/site-packages/pandas/core/reshape/api.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/api.py
@ -0,0 +1,41 @@
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.encoding import (
+    from_dummies,
+    get_dummies,
+)
+from pandas.core.reshape.melt import (
+    lreshape,
+    melt,
+    wide_to_long,
+)
+from pandas.core.reshape.merge import (
+    merge,
+    merge_asof,
+    merge_ordered,
+)
+from pandas.core.reshape.pivot import (
+    crosstab,
+    pivot,
+    pivot_table,
+)
+from pandas.core.reshape.tile import (
+    cut,
+    qcut,
+)
+
+__all__ = [
+    "concat",
+    "crosstab",
+    "cut",
+    "from_dummies",
+    "get_dummies",
+    "lreshape",
+    "melt",
+    "merge",
+    "merge_asof",
+    "merge_ordered",
+    "pivot",
+    "pivot_table",
+    "qcut",
+    "wide_to_long",
+]
--- a/lib/python3.11/site-packages/pandas/core/reshape/concat.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/concat.py
@ -0,0 +1,894 @@
+"""
+Concat routines.
+"""
+from __future__ import annotations
+
+from collections import abc
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Literal,
+    cast,
+    overload,
+)
+import warnings
+
+import numpy as np
+
+from pandas._config import using_copy_on_write
+
+from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.common import (
+    is_bool,
+    is_iterator,
+)
+from pandas.core.dtypes.concat import concat_compat
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCSeries,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas.core.arrays.categorical import (
+    factorize_from_iterable,
+    factorize_from_iterables,
+)
+import pandas.core.common as com
+from pandas.core.indexes.api import (
+    Index,
+    MultiIndex,
+    all_indexes_same,
+    default_index,
+    ensure_index,
+    get_objs_combined_axis,
+    get_unanimous_names,
+)
+from pandas.core.internals import concatenate_managers
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Hashable,
+        Iterable,
+        Mapping,
+    )
+
+    from pandas._typing import (
+        Axis,
+        AxisInt,
+        HashableT,
+    )
+
+    from pandas import (
+        DataFrame,
+        Series,
+    )
+
+# ---------------------------------------------------------------------
+# Concatenate DataFrame objects
+
+
+@overload
+def concat(
+    objs: Iterable[DataFrame] | Mapping[HashableT, DataFrame],
+    *,
+    axis: Literal[0, "index"] = ...,
+    join: str = ...,
+    ignore_index: bool = ...,
+    keys: Iterable[Hashable] | None = ...,
+    levels=...,
+    names: list[HashableT] | None = ...,
+    verify_integrity: bool = ...,
+    sort: bool = ...,
+    copy: bool | None = ...,
+) -> DataFrame:
+    ...
+
+
+@overload
+def concat(
+    objs: Iterable[Series] | Mapping[HashableT, Series],
+    *,
+    axis: Literal[0, "index"] = ...,
+    join: str = ...,
+    ignore_index: bool = ...,
+    keys: Iterable[Hashable] | None = ...,
+    levels=...,
+    names: list[HashableT] | None = ...,
+    verify_integrity: bool = ...,
+    sort: bool = ...,
+    copy: bool | None = ...,
+) -> Series:
+    ...
+
+
+@overload
+def concat(
+    objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+    *,
+    axis: Literal[0, "index"] = ...,
+    join: str = ...,
+    ignore_index: bool = ...,
+    keys: Iterable[Hashable] | None = ...,
+    levels=...,
+    names: list[HashableT] | None = ...,
+    verify_integrity: bool = ...,
+    sort: bool = ...,
+    copy: bool | None = ...,
+) -> DataFrame | Series:
+    ...
+
+
+@overload
+def concat(
+    objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+    *,
+    axis: Literal[1, "columns"],
+    join: str = ...,
+    ignore_index: bool = ...,
+    keys: Iterable[Hashable] | None = ...,
+    levels=...,
+    names: list[HashableT] | None = ...,
+    verify_integrity: bool = ...,
+    sort: bool = ...,
+    copy: bool | None = ...,
+) -> DataFrame:
+    ...
+
+
+@overload
+def concat(
+    objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+    *,
+    axis: Axis = ...,
+    join: str = ...,
+    ignore_index: bool = ...,
+    keys: Iterable[Hashable] | None = ...,
+    levels=...,
+    names: list[HashableT] | None = ...,
+    verify_integrity: bool = ...,
+    sort: bool = ...,
+    copy: bool | None = ...,
+) -> DataFrame | Series:
+    ...
+
+
+def concat(
+    objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+    *,
+    axis: Axis = 0,
+    join: str = "outer",
+    ignore_index: bool = False,
+    keys: Iterable[Hashable] | None = None,
+    levels=None,
+    names: list[HashableT] | None = None,
+    verify_integrity: bool = False,
+    sort: bool = False,
+    copy: bool | None = None,
+) -> DataFrame | Series:
+    """
+    Concatenate pandas objects along a particular axis.
+
+    Allows optional set logic along the other axes.
+
+    Can also add a layer of hierarchical indexing on the concatenation axis,
+    which may be useful if the labels are the same (or overlapping) on
+    the passed axis number.
+
+    Parameters
+    ----------
+    objs : a sequence or mapping of Series or DataFrame objects
+        If a mapping is passed, the sorted keys will be used as the `keys`
+        argument, unless it is passed, in which case the values will be
+        selected (see below). Any None objects will be dropped silently unless
+        they are all None in which case a ValueError will be raised.
+    axis : {0/'index', 1/'columns'}, default 0
+        The axis to concatenate along.
+    join : {'inner', 'outer'}, default 'outer'
+        How to handle indexes on other axis (or axes).
+    ignore_index : bool, default False
+        If True, do not use the index values along the concatenation axis. The
+        resulting axis will be labeled 0, ..., n - 1. This is useful if you are
+        concatenating objects where the concatenation axis does not have
+        meaningful indexing information. Note the index values on the other
+        axes are still respected in the join.
+    keys : sequence, default None
+        If multiple levels passed, should contain tuples. Construct
+        hierarchical index using the passed keys as the outermost level.
+    levels : list of sequences, default None
+        Specific levels (unique values) to use for constructing a
+        MultiIndex. Otherwise they will be inferred from the keys.
+    names : list, default None
+        Names for the levels in the resulting hierarchical index.
+    verify_integrity : bool, default False
+        Check whether the new concatenated axis contains duplicates. This can
+        be very expensive relative to the actual data concatenation.
+    sort : bool, default False
+        Sort non-concatenation axis if it is not already aligned. One exception to
+        this is when the non-concatentation axis is a DatetimeIndex and join='outer'
+        and the axis is not already aligned. In that case, the non-concatenation
+        axis is always sorted lexicographically.
+    copy : bool, default True
+        If False, do not copy data unnecessarily.
+
+    Returns
+    -------
+    object, type of objs
+        When concatenating all ``Series`` along the index (axis=0), a
+        ``Series`` is returned. When ``objs`` contains at least one
+        ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
+        the columns (axis=1), a ``DataFrame`` is returned.
+
+    See Also
+    --------
+    DataFrame.join : Join DataFrames using indexes.
+    DataFrame.merge : Merge DataFrames by indexes or columns.
+
+    Notes
+    -----
+    The keys, levels, and names arguments are all optional.
+
+    A walkthrough of how this method fits in with other tools for combining
+    pandas objects can be found `here
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
+
+    It is not recommended to build DataFrames by adding single rows in a
+    for loop. Build a list of rows and make a DataFrame in a single concat.
+
+    Examples
+    --------
+    Combine two ``Series``.
+
+    >>> s1 = pd.Series(['a', 'b'])
+    >>> s2 = pd.Series(['c', 'd'])
+    >>> pd.concat([s1, s2])
+    0    a
+    1    b
+    0    c
+    1    d
+    dtype: object
+
+    Clear the existing index and reset it in the result
+    by setting the ``ignore_index`` option to ``True``.
+
+    >>> pd.concat([s1, s2], ignore_index=True)
+    0    a
+    1    b
+    2    c
+    3    d
+    dtype: object
+
+    Add a hierarchical index at the outermost level of
+    the data with the ``keys`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'])
+    s1  0    a
+        1    b
+    s2  0    c
+        1    d
+    dtype: object
+
+    Label the index keys you create with the ``names`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'],
+    ...           names=['Series name', 'Row ID'])
+    Series name  Row ID
+    s1           0         a
+                 1         b
+    s2           0         c
+                 1         d
+    dtype: object
+
+    Combine two ``DataFrame`` objects with identical columns.
+
+    >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
+    ...                    columns=['letter', 'number'])
+    >>> df1
+      letter  number
+    0      a       1
+    1      b       2
+    >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
+    ...                    columns=['letter', 'number'])
+    >>> df2
+      letter  number
+    0      c       3
+    1      d       4
+    >>> pd.concat([df1, df2])
+      letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return everything. Columns outside the intersection will
+    be filled with ``NaN`` values.
+
+    >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
+    ...                    columns=['letter', 'number', 'animal'])
+    >>> df3
+      letter  number animal
+    0      c       3    cat
+    1      d       4    dog
+    >>> pd.concat([df1, df3], sort=False)
+      letter  number animal
+    0      a       1    NaN
+    1      b       2    NaN
+    0      c       3    cat
+    1      d       4    dog
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return only those that are shared by passing ``inner`` to
+    the ``join`` keyword argument.
+
+    >>> pd.concat([df1, df3], join="inner")
+      letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects horizontally along the x axis by
+    passing in ``axis=1``.
+
+    >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
+    ...                    columns=['animal', 'name'])
+    >>> pd.concat([df1, df4], axis=1)
+      letter  number  animal    name
+    0      a       1    bird   polly
+    1      b       2  monkey  george
+
+    Prevent the result from including duplicate index values with the
+    ``verify_integrity`` option.
+
+    >>> df5 = pd.DataFrame([1], index=['a'])
+    >>> df5
+       0
+    a  1
+    >>> df6 = pd.DataFrame([2], index=['a'])
+    >>> df6
+       0
+    a  2
+    >>> pd.concat([df5, df6], verify_integrity=True)
+    Traceback (most recent call last):
+        ...
+    ValueError: Indexes have overlapping values: ['a']
+
+    Append a single row to the end of a ``DataFrame`` object.
+
+    >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+    >>> df7
+        a   b
+    0   1   2
+    >>> new_row = pd.Series({'a': 3, 'b': 4})
+    >>> new_row
+    a    3
+    b    4
+    dtype: int64
+    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
+        a   b
+    0   1   2
+    1   3   4
+    """
+    if copy is None:
+        if using_copy_on_write():
+            copy = False
+        else:
+            copy = True
+    elif copy and using_copy_on_write():
+        copy = False
+
+    op = _Concatenator(
+        objs,
+        axis=axis,
+        ignore_index=ignore_index,
+        join=join,
+        keys=keys,
+        levels=levels,
+        names=names,
+        verify_integrity=verify_integrity,
+        copy=copy,
+        sort=sort,
+    )
+
+    return op.get_result()
+
+
+class _Concatenator:
+    """
+    Orchestrates a concatenation operation for BlockManagers
+    """
+
+    sort: bool
+
+    def __init__(
+        self,
+        objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+        axis: Axis = 0,
+        join: str = "outer",
+        keys: Iterable[Hashable] | None = None,
+        levels=None,
+        names: list[HashableT] | None = None,
+        ignore_index: bool = False,
+        verify_integrity: bool = False,
+        copy: bool = True,
+        sort: bool = False,
+    ) -> None:
+        if isinstance(objs, (ABCSeries, ABCDataFrame, str)):
+            raise TypeError(
+                "first argument must be an iterable of pandas "
+                f'objects, you passed an object of type "{type(objs).__name__}"'
+            )
+
+        if join == "outer":
+            self.intersect = False
+        elif join == "inner":
+            self.intersect = True
+        else:  # pragma: no cover
+            raise ValueError(
+                "Only can inner (intersect) or outer (union) join the other axis"
+            )
+
+        if not is_bool(sort):
+            raise ValueError(
+                f"The 'sort' keyword only accepts boolean values; {sort} was passed."
+            )
+        # Incompatible types in assignment (expression has type "Union[bool, bool_]",
+        # variable has type "bool")
+        self.sort = sort  # type: ignore[assignment]
+
+        self.ignore_index = ignore_index
+        self.verify_integrity = verify_integrity
+        self.copy = copy
+
+        objs, keys = self._clean_keys_and_objs(objs, keys)
+
+        # figure out what our result ndim is going to be
+        ndims = self._get_ndims(objs)
+        sample, objs = self._get_sample_object(objs, ndims, keys, names, levels)
+
+        # Standardize axis parameter to int
+        if sample.ndim == 1:
+            from pandas import DataFrame
+
+            axis = DataFrame._get_axis_number(axis)
+            self._is_frame = False
+            self._is_series = True
+        else:
+            axis = sample._get_axis_number(axis)
+            self._is_frame = True
+            self._is_series = False
+
+            # Need to flip BlockManager axis in the DataFrame special case
+            axis = sample._get_block_manager_axis(axis)
+
+        # if we have mixed ndims, then convert to highest ndim
+        # creating column numbers as needed
+        if len(ndims) > 1:
+            objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis)
+
+        self.objs = objs
+
+        # note: this is the BlockManager axis (since DataFrame is transposed)
+        self.bm_axis = axis
+        self.axis = 1 - self.bm_axis if self._is_frame else 0
+        self.keys = keys
+        self.names = names or getattr(keys, "names", None)
+        self.levels = levels
+
+    def _get_ndims(self, objs: list[Series | DataFrame]) -> set[int]:
+        # figure out what our result ndim is going to be
+        ndims = set()
+        for obj in objs:
+            if not isinstance(obj, (ABCSeries, ABCDataFrame)):
+                msg = (
+                    f"cannot concatenate object of type '{type(obj)}'; "
+                    "only Series and DataFrame objs are valid"
+                )
+                raise TypeError(msg)
+
+            ndims.add(obj.ndim)
+        return ndims
+
+    def _clean_keys_and_objs(
+        self,
+        objs: Iterable[Series | DataFrame] | Mapping[HashableT, Series | DataFrame],
+        keys,
+    ) -> tuple[list[Series | DataFrame], Index | None]:
+        if isinstance(objs, abc.Mapping):
+            if keys is None:
+                keys = list(objs.keys())
+            objs_list = [objs[k] for k in keys]
+        else:
+            objs_list = list(objs)
+
+        if len(objs_list) == 0:
+            raise ValueError("No objects to concatenate")
+
+        if keys is None:
+            objs_list = list(com.not_none(*objs_list))
+        else:
+            # GH#1649
+            clean_keys = []
+            clean_objs = []
+            if is_iterator(keys):
+                keys = list(keys)
+            if len(keys) != len(objs_list):
+                # GH#43485
+                warnings.warn(
+                    "The behavior of pd.concat with len(keys) != len(objs) is "
+                    "deprecated. In a future version this will raise instead of "
+                    "truncating to the smaller of the two sequences",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            for k, v in zip(keys, objs_list):
+                if v is None:
+                    continue
+                clean_keys.append(k)
+                clean_objs.append(v)
+            objs_list = clean_objs
+
+            if isinstance(keys, MultiIndex):
+                # TODO: retain levels?
+                keys = type(keys).from_tuples(clean_keys, names=keys.names)
+            else:
+                name = getattr(keys, "name", None)
+                keys = Index(clean_keys, name=name, dtype=getattr(keys, "dtype", None))
+
+        if len(objs_list) == 0:
+            raise ValueError("All objects passed were None")
+
+        return objs_list, keys
+
+    def _get_sample_object(
+        self,
+        objs: list[Series | DataFrame],
+        ndims: set[int],
+        keys,
+        names,
+        levels,
+    ) -> tuple[Series | DataFrame, list[Series | DataFrame]]:
+        # get the sample
+        # want the highest ndim that we have, and must be non-empty
+        # unless all objs are empty
+        sample: Series | DataFrame | None = None
+        if len(ndims) > 1:
+            max_ndim = max(ndims)
+            for obj in objs:
+                if obj.ndim == max_ndim and np.sum(obj.shape):
+                    sample = obj
+                    break
+
+        else:
+            # filter out the empties if we have not multi-index possibilities
+            # note to keep empty Series as it affect to result columns / name
+            non_empties = [obj for obj in objs if sum(obj.shape) > 0 or obj.ndim == 1]
+
+            if len(non_empties) and (
+                keys is None and names is None and levels is None and not self.intersect
+            ):
+                objs = non_empties
+                sample = objs[0]
+
+        if sample is None:
+            sample = objs[0]
+        return sample, objs
+
+    def _sanitize_mixed_ndim(
+        self,
+        objs: list[Series | DataFrame],
+        sample: Series | DataFrame,
+        ignore_index: bool,
+        axis: AxisInt,
+    ) -> list[Series | DataFrame]:
+        # if we have mixed ndims, then convert to highest ndim
+        # creating column numbers as needed
+
+        new_objs = []
+
+        current_column = 0
+        max_ndim = sample.ndim
+        for obj in objs:
+            ndim = obj.ndim
+            if ndim == max_ndim:
+                pass
+
+            elif ndim != max_ndim - 1:
+                raise ValueError(
+                    "cannot concatenate unaligned mixed dimensional NDFrame objects"
+                )
+
+            else:
+                name = getattr(obj, "name", None)
+                if ignore_index or name is None:
+                    if axis == 1:
+                        # doing a row-wise concatenation so need everything
+                        # to line up
+                        name = 0
+                    else:
+                        # doing a column-wise concatenation so need series
+                        # to have unique names
+                        name = current_column
+                        current_column += 1
+
+                obj = sample._constructor({name: obj}, copy=False)
+
+            new_objs.append(obj)
+
+        return new_objs
+
+    def get_result(self):
+        cons: Callable[..., DataFrame | Series]
+        sample: DataFrame | Series
+
+        # series only
+        if self._is_series:
+            sample = cast("Series", self.objs[0])
+
+            # stack blocks
+            if self.bm_axis == 0:
+                name = com.consensus_name_attr(self.objs)
+                cons = sample._constructor
+
+                arrs = [ser._values for ser in self.objs]
+
+                res = concat_compat(arrs, axis=0)
+
+                new_index: Index
+                if self.ignore_index:
+                    # We can avoid surprisingly-expensive _get_concat_axis
+                    new_index = default_index(len(res))
+                else:
+                    new_index = self.new_axes[0]
+
+                mgr = type(sample._mgr).from_array(res, index=new_index)
+
+                result = sample._constructor_from_mgr(mgr, axes=mgr.axes)
+                result._name = name
+                return result.__finalize__(self, method="concat")
+
+            # combine as columns in a frame
+            else:
+                data = dict(zip(range(len(self.objs)), self.objs))
+
+                # GH28330 Preserves subclassed objects through concat
+                cons = sample._constructor_expanddim
+
+                index, columns = self.new_axes
+                df = cons(data, index=index, copy=self.copy)
+                df.columns = columns
+                return df.__finalize__(self, method="concat")
+
+        # combine block managers
+        else:
+            sample = cast("DataFrame", self.objs[0])
+
+            mgrs_indexers = []
+            for obj in self.objs:
+                indexers = {}
+                for ax, new_labels in enumerate(self.new_axes):
+                    # ::-1 to convert BlockManager ax to DataFrame ax
+                    if ax == self.bm_axis:
+                        # Suppress reindexing on concat axis
+                        continue
+
+                    # 1-ax to convert BlockManager axis to DataFrame axis
+                    obj_labels = obj.axes[1 - ax]
+                    if not new_labels.equals(obj_labels):
+                        indexers[ax] = obj_labels.get_indexer(new_labels)
+
+                mgrs_indexers.append((obj._mgr, indexers))
+
+            new_data = concatenate_managers(
+                mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy
+            )
+            if not self.copy and not using_copy_on_write():
+                new_data._consolidate_inplace()
+
+            out = sample._constructor_from_mgr(new_data, axes=new_data.axes)
+            return out.__finalize__(self, method="concat")
+
+    def _get_result_dim(self) -> int:
+        if self._is_series and self.bm_axis == 1:
+            return 2
+        else:
+            return self.objs[0].ndim
+
+    @cache_readonly
+    def new_axes(self) -> list[Index]:
+        ndim = self._get_result_dim()
+        return [
+            self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
+            for i in range(ndim)
+        ]
+
+    def _get_comb_axis(self, i: AxisInt) -> Index:
+        data_axis = self.objs[0]._get_block_manager_axis(i)
+        return get_objs_combined_axis(
+            self.objs,
+            axis=data_axis,
+            intersect=self.intersect,
+            sort=self.sort,
+            copy=self.copy,
+        )
+
+    @cache_readonly
+    def _get_concat_axis(self) -> Index:
+        """
+        Return index to be used along concatenation axis.
+        """
+        if self._is_series:
+            if self.bm_axis == 0:
+                indexes = [x.index for x in self.objs]
+            elif self.ignore_index:
+                idx = default_index(len(self.objs))
+                return idx
+            elif self.keys is None:
+                names: list[Hashable] = [None] * len(self.objs)
+                num = 0
+                has_names = False
+                for i, x in enumerate(self.objs):
+                    if x.ndim != 1:
+                        raise TypeError(
+                            f"Cannot concatenate type 'Series' with "
+                            f"object of type '{type(x).__name__}'"
+                        )
+                    if x.name is not None:
+                        names[i] = x.name
+                        has_names = True
+                    else:
+                        names[i] = num
+                        num += 1
+                if has_names:
+                    return Index(names)
+                else:
+                    return default_index(len(self.objs))
+            else:
+                return ensure_index(self.keys).set_names(self.names)
+        else:
+            indexes = [x.axes[self.axis] for x in self.objs]
+
+        if self.ignore_index:
+            idx = default_index(sum(len(i) for i in indexes))
+            return idx
+
+        if self.keys is None:
+            if self.levels is not None:
+                raise ValueError("levels supported only when keys is not None")
+            concat_axis = _concat_indexes(indexes)
+        else:
+            concat_axis = _make_concat_multiindex(
+                indexes, self.keys, self.levels, self.names
+            )
+
+        self._maybe_check_integrity(concat_axis)
+
+        return concat_axis
+
+    def _maybe_check_integrity(self, concat_index: Index):
+        if self.verify_integrity:
+            if not concat_index.is_unique:
+                overlap = concat_index[concat_index.duplicated()].unique()
+                raise ValueError(f"Indexes have overlapping values: {overlap}")
+
+
+def _concat_indexes(indexes) -> Index:
+    return indexes[0].append(indexes[1:])
+
+
+def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex:
+    if (levels is None and isinstance(keys[0], tuple)) or (
+        levels is not None and len(levels) > 1
+    ):
+        zipped = list(zip(*keys))
+        if names is None:
+            names = [None] * len(zipped)
+
+        if levels is None:
+            _, levels = factorize_from_iterables(zipped)
+        else:
+            levels = [ensure_index(x) for x in levels]
+    else:
+        zipped = [keys]
+        if names is None:
+            names = [None]
+
+        if levels is None:
+            levels = [ensure_index(keys).unique()]
+        else:
+            levels = [ensure_index(x) for x in levels]
+
+    for level in levels:
+        if not level.is_unique:
+            raise ValueError(f"Level values not unique: {level.tolist()}")
+
+    if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):
+        codes_list = []
+
+        # things are potentially different sizes, so compute the exact codes
+        # for each level and pass those to MultiIndex.from_arrays
+
+        for hlevel, level in zip(zipped, levels):
+            to_concat = []
+            if isinstance(hlevel, Index) and hlevel.equals(level):
+                lens = [len(idx) for idx in indexes]
+                codes_list.append(np.repeat(np.arange(len(hlevel)), lens))
+            else:
+                for key, index in zip(hlevel, indexes):
+                    # Find matching codes, include matching nan values as equal.
+                    mask = (isna(level) & isna(key)) | (level == key)
+                    if not mask.any():
+                        raise ValueError(f"Key {key} not in level {level}")
+                    i = np.nonzero(mask)[0][0]
+
+                    to_concat.append(np.repeat(i, len(index)))
+                codes_list.append(np.concatenate(to_concat))
+
+        concat_index = _concat_indexes(indexes)
+
+        # these go at the end
+        if isinstance(concat_index, MultiIndex):
+            levels.extend(concat_index.levels)
+            codes_list.extend(concat_index.codes)
+        else:
+            codes, categories = factorize_from_iterable(concat_index)
+            levels.append(categories)
+            codes_list.append(codes)
+
+        if len(names) == len(levels):
+            names = list(names)
+        else:
+            # make sure that all of the passed indices have the same nlevels
+            if not len({idx.nlevels for idx in indexes}) == 1:
+                raise AssertionError(
+                    "Cannot concat indices that do not have the same number of levels"
+                )
+
+            # also copies
+            names = list(names) + list(get_unanimous_names(*indexes))
+
+        return MultiIndex(
+            levels=levels, codes=codes_list, names=names, verify_integrity=False
+        )
+
+    new_index = indexes[0]
+    n = len(new_index)
+    kpieces = len(indexes)
+
+    # also copies
+    new_names = list(names)
+    new_levels = list(levels)
+
+    # construct codes
+    new_codes = []
+
+    # do something a bit more speedy
+
+    for hlevel, level in zip(zipped, levels):
+        hlevel_index = ensure_index(hlevel)
+        mapped = level.get_indexer(hlevel_index)
+
+        mask = mapped == -1
+        if mask.any():
+            raise ValueError(
+                f"Values not found in passed level: {hlevel_index[mask]!s}"
+            )
+
+        new_codes.append(np.repeat(mapped, n))
+
+    if isinstance(new_index, MultiIndex):
+        new_levels.extend(new_index.levels)
+        new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])
+    else:
+        new_levels.append(new_index.unique())
+        single_codes = new_index.unique().get_indexer(new_index)
+        new_codes.append(np.tile(single_codes, kpieces))
+
+    if len(new_names) < len(new_levels):
+        new_names.extend(new_index.names)
+
+    return MultiIndex(
+        levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
+    )
--- a/lib/python3.11/site-packages/pandas/core/reshape/encoding.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/encoding.py
@ -0,0 +1,571 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from collections.abc import (
+    Hashable,
+    Iterable,
+)
+import itertools
+from typing import (
+    TYPE_CHECKING,
+    cast,
+)
+
+import numpy as np
+
+from pandas._libs import missing as libmissing
+from pandas._libs.sparse import IntIndex
+
+from pandas.core.dtypes.common import (
+    is_integer_dtype,
+    is_list_like,
+    is_object_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    ArrowDtype,
+    CategoricalDtype,
+)
+
+from pandas.core.arrays import SparseArray
+from pandas.core.arrays.categorical import factorize_from_iterable
+from pandas.core.arrays.string_ import StringDtype
+from pandas.core.frame import DataFrame
+from pandas.core.indexes.api import (
+    Index,
+    default_index,
+)
+from pandas.core.series import Series
+
+if TYPE_CHECKING:
+    from pandas._typing import NpDtype
+
+
+def get_dummies(
+    data,
+    prefix=None,
+    prefix_sep: str | Iterable[str] | dict[str, str] = "_",
+    dummy_na: bool = False,
+    columns=None,
+    sparse: bool = False,
+    drop_first: bool = False,
+    dtype: NpDtype | None = None,
+) -> DataFrame:
+    """
+    Convert categorical variable into dummy/indicator variables.
+
+    Each variable is converted in as many 0/1 variables as there are different
+    values. Columns in the output are each named after a value; if the input is
+    a DataFrame, the name of the original variable is prepended to the value.
+
+    Parameters
+    ----------
+    data : array-like, Series, or DataFrame
+        Data of which to get dummy indicators.
+    prefix : str, list of str, or dict of str, default None
+        String to append DataFrame column names.
+        Pass a list with length equal to the number of columns
+        when calling get_dummies on a DataFrame. Alternatively, `prefix`
+        can be a dictionary mapping column names to prefixes.
+    prefix_sep : str, default '_'
+        If appending prefix, separator/delimiter to use. Or pass a
+        list or dictionary as with `prefix`.
+    dummy_na : bool, default False
+        Add a column to indicate NaNs, if False NaNs are ignored.
+    columns : list-like, default None
+        Column names in the DataFrame to be encoded.
+        If `columns` is None then all the columns with
+        `object`, `string`, or `category` dtype will be converted.
+    sparse : bool, default False
+        Whether the dummy-encoded columns should be backed by
+        a :class:`SparseArray` (True) or a regular NumPy array (False).
+    drop_first : bool, default False
+        Whether to get k-1 dummies out of k categorical levels by removing the
+        first level.
+    dtype : dtype, default bool
+        Data type for new columns. Only a single dtype is allowed.
+
+    Returns
+    -------
+    DataFrame
+        Dummy-coded data. If `data` contains other columns than the
+        dummy-coded one(s), these will be prepended, unaltered, to the result.
+
+    See Also
+    --------
+    Series.str.get_dummies : Convert Series of strings to dummy codes.
+    :func:`~pandas.from_dummies` : Convert dummy codes to categorical ``DataFrame``.
+
+    Notes
+    -----
+    Reference :ref:`the user guide <reshaping.dummies>` for more examples.
+
+    Examples
+    --------
+    >>> s = pd.Series(list('abca'))
+
+    >>> pd.get_dummies(s)
+           a      b      c
+    0   True  False  False
+    1  False   True  False
+    2  False  False   True
+    3   True  False  False
+
+    >>> s1 = ['a', 'b', np.nan]
+
+    >>> pd.get_dummies(s1)
+           a      b
+    0   True  False
+    1  False   True
+    2  False  False
+
+    >>> pd.get_dummies(s1, dummy_na=True)
+           a      b    NaN
+    0   True  False  False
+    1  False   True  False
+    2  False  False   True
+
+    >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
+    ...                    'C': [1, 2, 3]})
+
+    >>> pd.get_dummies(df, prefix=['col1', 'col2'])
+       C  col1_a  col1_b  col2_a  col2_b  col2_c
+    0  1    True   False   False    True   False
+    1  2   False    True    True   False   False
+    2  3    True   False   False   False    True
+
+    >>> pd.get_dummies(pd.Series(list('abcaa')))
+           a      b      c
+    0   True  False  False
+    1  False   True  False
+    2  False  False   True
+    3   True  False  False
+    4   True  False  False
+
+    >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
+           b      c
+    0  False  False
+    1   True  False
+    2  False   True
+    3  False  False
+    4  False  False
+
+    >>> pd.get_dummies(pd.Series(list('abc')), dtype=float)
+         a    b    c
+    0  1.0  0.0  0.0
+    1  0.0  1.0  0.0
+    2  0.0  0.0  1.0
+    """
+    from pandas.core.reshape.concat import concat
+
+    dtypes_to_encode = ["object", "string", "category"]
+
+    if isinstance(data, DataFrame):
+        # determine columns being encoded
+        if columns is None:
+            data_to_encode = data.select_dtypes(include=dtypes_to_encode)
+        elif not is_list_like(columns):
+            raise TypeError("Input must be a list-like for parameter `columns`")
+        else:
+            data_to_encode = data[columns]
+
+        # validate prefixes and separator to avoid silently dropping cols
+        def check_len(item, name: str):
+            if is_list_like(item):
+                if not len(item) == data_to_encode.shape[1]:
+                    len_msg = (
+                        f"Length of '{name}' ({len(item)}) did not match the "
+                        "length of the columns being encoded "
+                        f"({data_to_encode.shape[1]})."
+                    )
+                    raise ValueError(len_msg)
+
+        check_len(prefix, "prefix")
+        check_len(prefix_sep, "prefix_sep")
+
+        if isinstance(prefix, str):
+            prefix = itertools.cycle([prefix])
+        if isinstance(prefix, dict):
+            prefix = [prefix[col] for col in data_to_encode.columns]
+
+        if prefix is None:
+            prefix = data_to_encode.columns
+
+        # validate separators
+        if isinstance(prefix_sep, str):
+            prefix_sep = itertools.cycle([prefix_sep])
+        elif isinstance(prefix_sep, dict):
+            prefix_sep = [prefix_sep[col] for col in data_to_encode.columns]
+
+        with_dummies: list[DataFrame]
+        if data_to_encode.shape == data.shape:
+            # Encoding the entire df, do not prepend any dropped columns
+            with_dummies = []
+        elif columns is not None:
+            # Encoding only cols specified in columns. Get all cols not in
+            # columns to prepend to result.
+            with_dummies = [data.drop(columns, axis=1)]
+        else:
+            # Encoding only object and category dtype columns. Get remaining
+            # columns to prepend to result.
+            with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)]
+
+        for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep):
+            # col is (column_name, column), use just column data here
+            dummy = _get_dummies_1d(
+                col[1],
+                prefix=pre,
+                prefix_sep=sep,
+                dummy_na=dummy_na,
+                sparse=sparse,
+                drop_first=drop_first,
+                dtype=dtype,
+            )
+            with_dummies.append(dummy)
+        result = concat(with_dummies, axis=1)
+    else:
+        result = _get_dummies_1d(
+            data,
+            prefix,
+            prefix_sep,
+            dummy_na,
+            sparse=sparse,
+            drop_first=drop_first,
+            dtype=dtype,
+        )
+    return result
+
+
+def _get_dummies_1d(
+    data,
+    prefix,
+    prefix_sep: str | Iterable[str] | dict[str, str] = "_",
+    dummy_na: bool = False,
+    sparse: bool = False,
+    drop_first: bool = False,
+    dtype: NpDtype | None = None,
+) -> DataFrame:
+    from pandas.core.reshape.concat import concat
+
+    # Series avoids inconsistent NaN handling
+    codes, levels = factorize_from_iterable(Series(data, copy=False))
+
+    if dtype is None and hasattr(data, "dtype"):
+        input_dtype = data.dtype
+        if isinstance(input_dtype, CategoricalDtype):
+            input_dtype = input_dtype.categories.dtype
+
+        if isinstance(input_dtype, ArrowDtype):
+            import pyarrow as pa
+
+            dtype = ArrowDtype(pa.bool_())  # type: ignore[assignment]
+        elif (
+            isinstance(input_dtype, StringDtype)
+            and input_dtype.na_value is libmissing.NA
+        ):
+            dtype = pandas_dtype("boolean")  # type: ignore[assignment]
+        else:
+            dtype = np.dtype(bool)
+    elif dtype is None:
+        dtype = np.dtype(bool)
+
+    _dtype = pandas_dtype(dtype)
+
+    if is_object_dtype(_dtype):
+        raise ValueError("dtype=object is not a valid dtype for get_dummies")
+
+    def get_empty_frame(data) -> DataFrame:
+        index: Index | np.ndarray
+        if isinstance(data, Series):
+            index = data.index
+        else:
+            index = default_index(len(data))
+        return DataFrame(index=index)
+
+    # if all NaN
+    if not dummy_na and len(levels) == 0:
+        return get_empty_frame(data)
+
+    codes = codes.copy()
+    if dummy_na:
+        codes[codes == -1] = len(levels)
+        levels = levels.insert(len(levels), np.nan)
+
+    # if dummy_na, we just fake a nan level. drop_first will drop it again
+    if drop_first and len(levels) == 1:
+        return get_empty_frame(data)
+
+    number_of_cols = len(levels)
+
+    if prefix is None:
+        dummy_cols = levels
+    else:
+        dummy_cols = Index([f"{prefix}{prefix_sep}{level}" for level in levels])
+
+    index: Index | None
+    if isinstance(data, Series):
+        index = data.index
+    else:
+        index = None
+
+    if sparse:
+        fill_value: bool | float
+        if is_integer_dtype(dtype):
+            fill_value = 0
+        elif dtype == np.dtype(bool):
+            fill_value = False
+        else:
+            fill_value = 0.0
+
+        sparse_series = []
+        N = len(data)
+        sp_indices: list[list] = [[] for _ in range(len(dummy_cols))]
+        mask = codes != -1
+        codes = codes[mask]
+        n_idx = np.arange(N)[mask]
+
+        for ndx, code in zip(n_idx, codes):
+            sp_indices[code].append(ndx)
+
+        if drop_first:
+            # remove first categorical level to avoid perfect collinearity
+            # GH12042
+            sp_indices = sp_indices[1:]
+            dummy_cols = dummy_cols[1:]
+        for col, ixs in zip(dummy_cols, sp_indices):
+            sarr = SparseArray(
+                np.ones(len(ixs), dtype=dtype),
+                sparse_index=IntIndex(N, ixs),
+                fill_value=fill_value,
+                dtype=dtype,
+            )
+            sparse_series.append(Series(data=sarr, index=index, name=col, copy=False))
+
+        return concat(sparse_series, axis=1, copy=False)
+
+    else:
+        # ensure ndarray layout is column-major
+        shape = len(codes), number_of_cols
+        dummy_dtype: NpDtype
+        if isinstance(_dtype, np.dtype):
+            dummy_dtype = _dtype
+        else:
+            dummy_dtype = np.bool_
+        dummy_mat = np.zeros(shape=shape, dtype=dummy_dtype, order="F")
+        dummy_mat[np.arange(len(codes)), codes] = 1
+
+        if not dummy_na:
+            # reset NaN GH4446
+            dummy_mat[codes == -1] = 0
+
+        if drop_first:
+            # remove first GH12042
+            dummy_mat = dummy_mat[:, 1:]
+            dummy_cols = dummy_cols[1:]
+        return DataFrame(dummy_mat, index=index, columns=dummy_cols, dtype=_dtype)
+
+
+def from_dummies(
+    data: DataFrame,
+    sep: None | str = None,
+    default_category: None | Hashable | dict[str, Hashable] = None,
+) -> DataFrame:
+    """
+    Create a categorical ``DataFrame`` from a ``DataFrame`` of dummy variables.
+
+    Inverts the operation performed by :func:`~pandas.get_dummies`.
+
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    data : DataFrame
+        Data which contains dummy-coded variables in form of integer columns of
+        1's and 0's.
+    sep : str, default None
+        Separator used in the column names of the dummy categories they are
+        character indicating the separation of the categorical names from the prefixes.
+        For example, if your column names are 'prefix_A' and 'prefix_B',
+        you can strip the underscore by specifying sep='_'.
+    default_category : None, Hashable or dict of Hashables, default None
+        The default category is the implied category when a value has none of the
+        listed categories specified with a one, i.e. if all dummies in a row are
+        zero. Can be a single value for all variables or a dict directly mapping
+        the default categories to a prefix of a variable.
+
+    Returns
+    -------
+    DataFrame
+        Categorical data decoded from the dummy input-data.
+
+    Raises
+    ------
+    ValueError
+        * When the input ``DataFrame`` ``data`` contains NA values.
+        * When the input ``DataFrame`` ``data`` contains column names with separators
+          that do not match the separator specified with ``sep``.
+        * When a ``dict`` passed to ``default_category`` does not include an implied
+          category for each prefix.
+        * When a value in ``data`` has more than one category assigned to it.
+        * When ``default_category=None`` and a value in ``data`` has no category
+          assigned to it.
+    TypeError
+        * When the input ``data`` is not of type ``DataFrame``.
+        * When the input ``DataFrame`` ``data`` contains non-dummy data.
+        * When the passed ``sep`` is of a wrong data type.
+        * When the passed ``default_category`` is of a wrong data type.
+
+    See Also
+    --------
+    :func:`~pandas.get_dummies` : Convert ``Series`` or ``DataFrame`` to dummy codes.
+    :class:`~pandas.Categorical` : Represent a categorical variable in classic.
+
+    Notes
+    -----
+    The columns of the passed dummy data should only include 1's and 0's,
+    or boolean values.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0],
+    ...                    "c": [0, 0, 1, 0]})
+
+    >>> df
+       a  b  c
+    0  1  0  0
+    1  0  1  0
+    2  0  0  1
+    3  1  0  0
+
+    >>> pd.from_dummies(df)
+    0     a
+    1     b
+    2     c
+    3     a
+
+    >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0],
+    ...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
+    ...                    "col2_c": [0, 0, 1]})
+
+    >>> df
+          col1_a  col1_b  col2_a  col2_b  col2_c
+    0       1       0       0       1       0
+    1       0       1       1       0       0
+    2       1       0       0       0       1
+
+    >>> pd.from_dummies(df, sep="_")
+        col1    col2
+    0    a       b
+    1    b       a
+    2    a       c
+
+    >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0],
+    ...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
+    ...                    "col2_c": [0, 0, 0]})
+
+    >>> df
+          col1_a  col1_b  col2_a  col2_b  col2_c
+    0       1       0       0       1       0
+    1       0       1       1       0       0
+    2       0       0       0       0       0
+
+    >>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"})
+        col1    col2
+    0    a       b
+    1    b       a
+    2    d       e
+    """
+    from pandas.core.reshape.concat import concat
+
+    if not isinstance(data, DataFrame):
+        raise TypeError(
+            "Expected 'data' to be a 'DataFrame'; "
+            f"Received 'data' of type: {type(data).__name__}"
+        )
+
+    col_isna_mask = cast(Series, data.isna().any())
+
+    if col_isna_mask.any():
+        raise ValueError(
+            "Dummy DataFrame contains NA value in column: "
+            f"'{col_isna_mask.idxmax()}'"
+        )
+
+    # index data with a list of all columns that are dummies
+    try:
+        data_to_decode = data.astype("boolean", copy=False)
+    except TypeError:
+        raise TypeError("Passed DataFrame contains non-dummy data")
+
+    # collect prefixes and get lists to slice data for each prefix
+    variables_slice = defaultdict(list)
+    if sep is None:
+        variables_slice[""] = list(data.columns)
+    elif isinstance(sep, str):
+        for col in data_to_decode.columns:
+            prefix = col.split(sep)[0]
+            if len(prefix) == len(col):
+                raise ValueError(f"Separator not specified for column: {col}")
+            variables_slice[prefix].append(col)
+    else:
+        raise TypeError(
+            "Expected 'sep' to be of type 'str' or 'None'; "
+            f"Received 'sep' of type: {type(sep).__name__}"
+        )
+
+    if default_category is not None:
+        if isinstance(default_category, dict):
+            if not len(default_category) == len(variables_slice):
+                len_msg = (
+                    f"Length of 'default_category' ({len(default_category)}) "
+                    f"did not match the length of the columns being encoded "
+                    f"({len(variables_slice)})"
+                )
+                raise ValueError(len_msg)
+        elif isinstance(default_category, Hashable):
+            default_category = dict(
+                zip(variables_slice, [default_category] * len(variables_slice))
+            )
+        else:
+            raise TypeError(
+                "Expected 'default_category' to be of type "
+                "'None', 'Hashable', or 'dict'; "
+                "Received 'default_category' of type: "
+                f"{type(default_category).__name__}"
+            )
+
+    cat_data = {}
+    for prefix, prefix_slice in variables_slice.items():
+        if sep is None:
+            cats = prefix_slice.copy()
+        else:
+            cats = [col[len(prefix + sep) :] for col in prefix_slice]
+        assigned = data_to_decode.loc[:, prefix_slice].sum(axis=1)
+        if any(assigned > 1):
+            raise ValueError(
+                "Dummy DataFrame contains multi-assignment(s); "
+                f"First instance in row: {assigned.idxmax()}"
+            )
+        if any(assigned == 0):
+            if isinstance(default_category, dict):
+                cats.append(default_category[prefix])
+            else:
+                raise ValueError(
+                    "Dummy DataFrame contains unassigned value(s); "
+                    f"First instance in row: {assigned.idxmin()}"
+                )
+            data_slice = concat(
+                (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1
+            )
+        else:
+            data_slice = data_to_decode.loc[:, prefix_slice]
+        cats_array = data._constructor_sliced(cats, dtype=data.columns.dtype)
+        # get indices of True entries along axis=1
+        true_values = data_slice.idxmax(axis=1)
+        indexer = data_slice.columns.get_indexer_for(true_values)
+        cat_data[prefix] = cats_array.take(indexer).set_axis(data.index)
+
+    result = DataFrame(cat_data)
+    if sep is not None:
+        result.columns = result.columns.astype(data.columns.dtype)
+    return result
--- a/lib/python3.11/site-packages/pandas/core/reshape/melt.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/melt.py
@ -0,0 +1,512 @@
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.util._decorators import Appender
+
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.concat import concat_compat
+from pandas.core.dtypes.missing import notna
+
+import pandas.core.algorithms as algos
+from pandas.core.indexes.api import MultiIndex
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.util import tile_compat
+from pandas.core.shared_docs import _shared_docs
+from pandas.core.tools.numeric import to_numeric
+
+if TYPE_CHECKING:
+    from collections.abc import Hashable
+
+    from pandas._typing import AnyArrayLike
+
+    from pandas import DataFrame
+
+
+def ensure_list_vars(arg_vars, variable: str, columns) -> list:
+    if arg_vars is not None:
+        if not is_list_like(arg_vars):
+            return [arg_vars]
+        elif isinstance(columns, MultiIndex) and not isinstance(arg_vars, list):
+            raise ValueError(
+                f"{variable} must be a list of tuples when columns are a MultiIndex"
+            )
+        else:
+            return list(arg_vars)
+    else:
+        return []
+
+
+@Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"})
+def melt(
+    frame: DataFrame,
+    id_vars=None,
+    value_vars=None,
+    var_name=None,
+    value_name: Hashable = "value",
+    col_level=None,
+    ignore_index: bool = True,
+) -> DataFrame:
+    if value_name in frame.columns:
+        raise ValueError(
+            f"value_name ({value_name}) cannot match an element in "
+            "the DataFrame columns."
+        )
+    id_vars = ensure_list_vars(id_vars, "id_vars", frame.columns)
+    value_vars_was_not_none = value_vars is not None
+    value_vars = ensure_list_vars(value_vars, "value_vars", frame.columns)
+
+    if id_vars or value_vars:
+        if col_level is not None:
+            level = frame.columns.get_level_values(col_level)
+        else:
+            level = frame.columns
+        labels = id_vars + value_vars
+        idx = level.get_indexer_for(labels)
+        missing = idx == -1
+        if missing.any():
+            missing_labels = [
+                lab for lab, not_found in zip(labels, missing) if not_found
+            ]
+            raise KeyError(
+                "The following id_vars or value_vars are not present in "
+                f"the DataFrame: {missing_labels}"
+            )
+        if value_vars_was_not_none:
+            frame = frame.iloc[:, algos.unique(idx)]
+        else:
+            frame = frame.copy()
+    else:
+        frame = frame.copy()
+
+    if col_level is not None:  # allow list or other?
+        # frame is a copy
+        frame.columns = frame.columns.get_level_values(col_level)
+
+    if var_name is None:
+        if isinstance(frame.columns, MultiIndex):
+            if len(frame.columns.names) == len(set(frame.columns.names)):
+                var_name = frame.columns.names
+            else:
+                var_name = [f"variable_{i}" for i in range(len(frame.columns.names))]
+        else:
+            var_name = [
+                frame.columns.name if frame.columns.name is not None else "variable"
+            ]
+    elif is_list_like(var_name):
+        raise ValueError(f"{var_name=} must be a scalar.")
+    else:
+        var_name = [var_name]
+
+    num_rows, K = frame.shape
+    num_cols_adjusted = K - len(id_vars)
+
+    mdata: dict[Hashable, AnyArrayLike] = {}
+    for col in id_vars:
+        id_data = frame.pop(col)
+        if not isinstance(id_data.dtype, np.dtype):
+            # i.e. ExtensionDtype
+            if num_cols_adjusted > 0:
+                mdata[col] = concat([id_data] * num_cols_adjusted, ignore_index=True)
+            else:
+                # We can't concat empty list. (GH 46044)
+                mdata[col] = type(id_data)([], name=id_data.name, dtype=id_data.dtype)
+        else:
+            mdata[col] = np.tile(id_data._values, num_cols_adjusted)
+
+    mcolumns = id_vars + var_name + [value_name]
+
+    if frame.shape[1] > 0 and not any(
+        not isinstance(dt, np.dtype) and dt._supports_2d for dt in frame.dtypes
+    ):
+        mdata[value_name] = concat(
+            [frame.iloc[:, i] for i in range(frame.shape[1])]
+        ).values
+    else:
+        mdata[value_name] = frame._values.ravel("F")
+    for i, col in enumerate(var_name):
+        mdata[col] = frame.columns._get_level_values(i).repeat(num_rows)
+
+    result = frame._constructor(mdata, columns=mcolumns)
+
+    if not ignore_index:
+        result.index = tile_compat(frame.index, num_cols_adjusted)
+
+    return result
+
+
+def lreshape(data: DataFrame, groups: dict, dropna: bool = True) -> DataFrame:
+    """
+    Reshape wide-format data to long. Generalized inverse of DataFrame.pivot.
+
+    Accepts a dictionary, ``groups``, in which each key is a new column name
+    and each value is a list of old column names that will be "melted" under
+    the new column name as part of the reshape.
+
+    Parameters
+    ----------
+    data : DataFrame
+        The wide-format DataFrame.
+    groups : dict
+        {new_name : list_of_columns}.
+    dropna : bool, default True
+        Do not include columns whose entries are all NaN.
+
+    Returns
+    -------
+    DataFrame
+        Reshaped DataFrame.
+
+    See Also
+    --------
+    melt : Unpivot a DataFrame from wide to long format, optionally leaving
+        identifiers set.
+    pivot : Create a spreadsheet-style pivot table as a DataFrame.
+    DataFrame.pivot : Pivot without aggregation that can handle
+        non-numeric data.
+    DataFrame.pivot_table : Generalization of pivot that can handle
+        duplicate values for one index/column pair.
+    DataFrame.unstack : Pivot based on the index values instead of a
+        column.
+    wide_to_long : Wide panel to long format. Less flexible but more
+        user-friendly than melt.
+
+    Examples
+    --------
+    >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
+    ...                      'team': ['Red Sox', 'Yankees'],
+    ...                      'year1': [2007, 2007], 'year2': [2008, 2008]})
+    >>> data
+       hr1  hr2     team  year1  year2
+    0  514  545  Red Sox   2007   2008
+    1  573  526  Yankees   2007   2008
+
+    >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
+          team  year   hr
+    0  Red Sox  2007  514
+    1  Yankees  2007  573
+    2  Red Sox  2008  545
+    3  Yankees  2008  526
+    """
+    mdata = {}
+    pivot_cols = []
+    all_cols: set[Hashable] = set()
+    K = len(next(iter(groups.values())))
+    for target, names in groups.items():
+        if len(names) != K:
+            raise ValueError("All column lists must be same length")
+        to_concat = [data[col]._values for col in names]
+
+        mdata[target] = concat_compat(to_concat)
+        pivot_cols.append(target)
+        all_cols = all_cols.union(names)
+
+    id_cols = list(data.columns.difference(all_cols))
+    for col in id_cols:
+        mdata[col] = np.tile(data[col]._values, K)
+
+    if dropna:
+        mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool)
+        for c in pivot_cols:
+            mask &= notna(mdata[c])
+        if not mask.all():
+            mdata = {k: v[mask] for k, v in mdata.items()}
+
+    return data._constructor(mdata, columns=id_cols + pivot_cols)
+
+
+def wide_to_long(
+    df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
+) -> DataFrame:
+    r"""
+    Unpivot a DataFrame from wide to long format.
+
+    Less flexible but more user-friendly than melt.
+
+    With stubnames ['A', 'B'], this function expects to find one or more
+    group of columns with format
+    A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,...
+    You specify what you want to call this suffix in the resulting long format
+    with `j` (for example `j='year'`)
+
+    Each row of these wide variables are assumed to be uniquely identified by
+    `i` (can be a single column name or a list of column names)
+
+    All remaining variables in the data frame are left intact.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The wide-format DataFrame.
+    stubnames : str or list-like
+        The stub name(s). The wide format variables are assumed to
+        start with the stub names.
+    i : str or list-like
+        Column(s) to use as id variable(s).
+    j : str
+        The name of the sub-observation variable. What you wish to name your
+        suffix in the long format.
+    sep : str, default ""
+        A character indicating the separation of the variable names
+        in the wide format, to be stripped from the names in the long format.
+        For example, if your column names are A-suffix1, A-suffix2, you
+        can strip the hyphen by specifying `sep='-'`.
+    suffix : str, default '\\d+'
+        A regular expression capturing the wanted suffixes. '\\d+' captures
+        numeric suffixes. Suffixes with no numbers could be specified with the
+        negated character class '\\D+'. You can also further disambiguate
+        suffixes, for example, if your wide variables are of the form A-one,
+        B-two,.., and you have an unrelated column A-rating, you can ignore the
+        last one by specifying `suffix='(!?one|two)'`. When all suffixes are
+        numeric, they are cast to int64/float64.
+
+    Returns
+    -------
+    DataFrame
+        A DataFrame that contains each stub name as a variable, with new index
+        (i, j).
+
+    See Also
+    --------
+    melt : Unpivot a DataFrame from wide to long format, optionally leaving
+        identifiers set.
+    pivot : Create a spreadsheet-style pivot table as a DataFrame.
+    DataFrame.pivot : Pivot without aggregation that can handle
+        non-numeric data.
+    DataFrame.pivot_table : Generalization of pivot that can handle
+        duplicate values for one index/column pair.
+    DataFrame.unstack : Pivot based on the index values instead of a
+        column.
+
+    Notes
+    -----
+    All extra variables are left untouched. This simply uses
+    `pandas.melt` under the hood, but is hard-coded to "do the right thing"
+    in a typical case.
+
+    Examples
+    --------
+    >>> np.random.seed(123)
+    >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
+    ...                    "A1980" : {0 : "d", 1 : "e", 2 : "f"},
+    ...                    "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
+    ...                    "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
+    ...                    "X"     : dict(zip(range(3), np.random.randn(3)))
+    ...                   })
+    >>> df["id"] = df.index
+    >>> df
+      A1970 A1980  B1970  B1980         X  id
+    0     a     d    2.5    3.2 -1.085631   0
+    1     b     e    1.2    1.3  0.997345   1
+    2     c     f    0.7    0.1  0.282978   2
+    >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
+    ... # doctest: +NORMALIZE_WHITESPACE
+                    X  A    B
+    id year
+    0  1970 -1.085631  a  2.5
+    1  1970  0.997345  b  1.2
+    2  1970  0.282978  c  0.7
+    0  1980 -1.085631  d  3.2
+    1  1980  0.997345  e  1.3
+    2  1980  0.282978  f  0.1
+
+    With multiple id columns
+
+    >>> df = pd.DataFrame({
+    ...     'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+    ...     'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
+    ...     'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
+    ...     'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
+    ... })
+    >>> df
+       famid  birth  ht1  ht2
+    0      1      1  2.8  3.4
+    1      1      2  2.9  3.8
+    2      1      3  2.2  2.9
+    3      2      1  2.0  3.2
+    4      2      2  1.8  2.8
+    5      2      3  1.9  2.4
+    6      3      1  2.2  3.3
+    7      3      2  2.3  3.4
+    8      3      3  2.1  2.9
+    >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
+    >>> l
+    ... # doctest: +NORMALIZE_WHITESPACE
+                      ht
+    famid birth age
+    1     1     1    2.8
+                2    3.4
+          2     1    2.9
+                2    3.8
+          3     1    2.2
+                2    2.9
+    2     1     1    2.0
+                2    3.2
+          2     1    1.8
+                2    2.8
+          3     1    1.9
+                2    2.4
+    3     1     1    2.2
+                2    3.3
+          2     1    2.3
+                2    3.4
+          3     1    2.1
+                2    2.9
+
+    Going from long back to wide just takes some creative use of `unstack`
+
+    >>> w = l.unstack()
+    >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format)
+    >>> w.reset_index()
+       famid  birth  ht1  ht2
+    0      1      1  2.8  3.4
+    1      1      2  2.9  3.8
+    2      1      3  2.2  2.9
+    3      2      1  2.0  3.2
+    4      2      2  1.8  2.8
+    5      2      3  1.9  2.4
+    6      3      1  2.2  3.3
+    7      3      2  2.3  3.4
+    8      3      3  2.1  2.9
+
+    Less wieldy column names are also handled
+
+    >>> np.random.seed(0)
+    >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3),
+    ...                    'A(weekly)-2011': np.random.rand(3),
+    ...                    'B(weekly)-2010': np.random.rand(3),
+    ...                    'B(weekly)-2011': np.random.rand(3),
+    ...                    'X' : np.random.randint(3, size=3)})
+    >>> df['id'] = df.index
+    >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+       A(weekly)-2010  A(weekly)-2011  B(weekly)-2010  B(weekly)-2011  X  id
+    0        0.548814        0.544883        0.437587        0.383442  0   0
+    1        0.715189        0.423655        0.891773        0.791725  1   1
+    2        0.602763        0.645894        0.963663        0.528895  1   2
+
+    >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id',
+    ...                 j='year', sep='-')
+    ... # doctest: +NORMALIZE_WHITESPACE
+             X  A(weekly)  B(weekly)
+    id year
+    0  2010  0   0.548814   0.437587
+    1  2010  1   0.715189   0.891773
+    2  2010  1   0.602763   0.963663
+    0  2011  0   0.544883   0.383442
+    1  2011  1   0.423655   0.791725
+    2  2011  1   0.645894   0.528895
+
+    If we have many columns, we could also use a regex to find our
+    stubnames and pass that list on to wide_to_long
+
+    >>> stubnames = sorted(
+    ...     set([match[0] for match in df.columns.str.findall(
+    ...         r'[A-B]\(.*\)').values if match != []])
+    ... )
+    >>> list(stubnames)
+    ['A(weekly)', 'B(weekly)']
+
+    All of the above examples have integers as suffixes. It is possible to
+    have non-integers as suffixes.
+
+    >>> df = pd.DataFrame({
+    ...     'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+    ...     'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
+    ...     'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
+    ...     'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
+    ... })
+    >>> df
+       famid  birth  ht_one  ht_two
+    0      1      1     2.8     3.4
+    1      1      2     2.9     3.8
+    2      1      3     2.2     2.9
+    3      2      1     2.0     3.2
+    4      2      2     1.8     2.8
+    5      2      3     1.9     2.4
+    6      3      1     2.2     3.3
+    7      3      2     2.3     3.4
+    8      3      3     2.1     2.9
+
+    >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age',
+    ...                     sep='_', suffix=r'\w+')
+    >>> l
+    ... # doctest: +NORMALIZE_WHITESPACE
+                      ht
+    famid birth age
+    1     1     one  2.8
+                two  3.4
+          2     one  2.9
+                two  3.8
+          3     one  2.2
+                two  2.9
+    2     1     one  2.0
+                two  3.2
+          2     one  1.8
+                two  2.8
+          3     one  1.9
+                two  2.4
+    3     1     one  2.2
+                two  3.3
+          2     one  2.3
+                two  3.4
+          3     one  2.1
+                two  2.9
+    """
+
+    def get_var_names(df, stub: str, sep: str, suffix: str):
+        regex = rf"^{re.escape(stub)}{re.escape(sep)}{suffix}$"
+        return df.columns[df.columns.str.match(regex)]
+
+    def melt_stub(df, stub: str, i, j, value_vars, sep: str):
+        newdf = melt(
+            df,
+            id_vars=i,
+            value_vars=value_vars,
+            value_name=stub.rstrip(sep),
+            var_name=j,
+        )
+        newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "", regex=True)
+
+        # GH17627 Cast numerics suffixes to int/float
+        try:
+            newdf[j] = to_numeric(newdf[j])
+        except (TypeError, ValueError, OverflowError):
+            # TODO: anything else to catch?
+            pass
+
+        return newdf.set_index(i + [j])
+
+    if not is_list_like(stubnames):
+        stubnames = [stubnames]
+    else:
+        stubnames = list(stubnames)
+
+    if df.columns.isin(stubnames).any():
+        raise ValueError("stubname can't be identical to a column name")
+
+    if not is_list_like(i):
+        i = [i]
+    else:
+        i = list(i)
+
+    if df[i].duplicated().any():
+        raise ValueError("the id variables need to uniquely identify each row")
+
+    _melted = []
+    value_vars_flattened = []
+    for stub in stubnames:
+        value_var = get_var_names(df, stub, sep, suffix)
+        value_vars_flattened.extend(value_var)
+        _melted.append(melt_stub(df, stub, i, j, value_var, sep))
+
+    melted = concat(_melted, axis=1)
+    id_vars = df.columns.difference(value_vars_flattened)
+    new = df[id_vars]
+
+    if len(i) == 1:
+        return new.set_index(i).join(melted)
+    else:
+        return new.merge(melted.reset_index(), on=i).set_index(i + [j])
--- a/lib/python3.11/site-packages/pandas/core/reshape/merge.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/merge.py
--- a/lib/python3.11/site-packages/pandas/core/reshape/pivot.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/pivot.py
@ -0,0 +1,899 @@
+from __future__ import annotations
+
+from collections.abc import (
+    Hashable,
+    Sequence,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Literal,
+    cast,
+)
+import warnings
+
+import numpy as np
+
+from pandas._libs import lib
+from pandas.util._decorators import (
+    Appender,
+    Substitution,
+)
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+from pandas.core.dtypes.common import (
+    is_list_like,
+    is_nested_list_like,
+    is_scalar,
+)
+from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCSeries,
+)
+
+import pandas.core.common as com
+from pandas.core.frame import _shared_docs
+from pandas.core.groupby import Grouper
+from pandas.core.indexes.api import (
+    Index,
+    MultiIndex,
+    get_objs_combined_axis,
+)
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.util import cartesian_product
+from pandas.core.series import Series
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        AggFuncType,
+        AggFuncTypeBase,
+        AggFuncTypeDict,
+        IndexLabel,
+    )
+
+    from pandas import DataFrame
+
+
+# Note: We need to make sure `frame` is imported before `pivot`, otherwise
+# _shared_docs['pivot_table'] will not yet exist.  TODO: Fix this dependency
+@Substitution("\ndata : DataFrame")
+@Appender(_shared_docs["pivot_table"], indents=1)
+def pivot_table(
+    data: DataFrame,
+    values=None,
+    index=None,
+    columns=None,
+    aggfunc: AggFuncType = "mean",
+    fill_value=None,
+    margins: bool = False,
+    dropna: bool = True,
+    margins_name: Hashable = "All",
+    observed: bool | lib.NoDefault = lib.no_default,
+    sort: bool = True,
+) -> DataFrame:
+    index = _convert_by(index)
+    columns = _convert_by(columns)
+
+    if isinstance(aggfunc, list):
+        pieces: list[DataFrame] = []
+        keys = []
+        for func in aggfunc:
+            _table = __internal_pivot_table(
+                data,
+                values=values,
+                index=index,
+                columns=columns,
+                fill_value=fill_value,
+                aggfunc=func,
+                margins=margins,
+                dropna=dropna,
+                margins_name=margins_name,
+                observed=observed,
+                sort=sort,
+            )
+            pieces.append(_table)
+            keys.append(getattr(func, "__name__", func))
+
+        table = concat(pieces, keys=keys, axis=1)
+        return table.__finalize__(data, method="pivot_table")
+
+    table = __internal_pivot_table(
+        data,
+        values,
+        index,
+        columns,
+        aggfunc,
+        fill_value,
+        margins,
+        dropna,
+        margins_name,
+        observed,
+        sort,
+    )
+    return table.__finalize__(data, method="pivot_table")
+
+
+def __internal_pivot_table(
+    data: DataFrame,
+    values,
+    index,
+    columns,
+    aggfunc: AggFuncTypeBase | AggFuncTypeDict,
+    fill_value,
+    margins: bool,
+    dropna: bool,
+    margins_name: Hashable,
+    observed: bool | lib.NoDefault,
+    sort: bool,
+) -> DataFrame:
+    """
+    Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``.
+    """
+    keys = index + columns
+
+    values_passed = values is not None
+    if values_passed:
+        if is_list_like(values):
+            values_multi = True
+            values = list(values)
+        else:
+            values_multi = False
+            values = [values]
+
+        # GH14938 Make sure value labels are in data
+        for i in values:
+            if i not in data:
+                raise KeyError(i)
+
+        to_filter = []
+        for x in keys + values:
+            if isinstance(x, Grouper):
+                x = x.key
+            try:
+                if x in data:
+                    to_filter.append(x)
+            except TypeError:
+                pass
+        if len(to_filter) < len(data.columns):
+            data = data[to_filter]
+
+    else:
+        values = data.columns
+        for key in keys:
+            try:
+                values = values.drop(key)
+            except (TypeError, ValueError, KeyError):
+                pass
+        values = list(values)
+
+    observed_bool = False if observed is lib.no_default else observed
+    grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
+    if observed is lib.no_default and any(
+        ping._passed_categorical for ping in grouped._grouper.groupings
+    ):
+        warnings.warn(
+            "The default value of observed=False is deprecated and will change "
+            "to observed=True in a future version of pandas. Specify "
+            "observed=False to silence this warning and retain the current behavior",
+            category=FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+    agged = grouped.agg(aggfunc)
+
+    if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
+        agged = agged.dropna(how="all")
+
+    table = agged
+
+    # GH17038, this check should only happen if index is defined (not None)
+    if table.index.nlevels > 1 and index:
+        # Related GH #17123
+        # If index_names are integers, determine whether the integers refer
+        # to the level position or name.
+        index_names = agged.index.names[: len(index)]
+        to_unstack = []
+        for i in range(len(index), len(keys)):
+            name = agged.index.names[i]
+            if name is None or name in index_names:
+                to_unstack.append(i)
+            else:
+                to_unstack.append(name)
+        table = agged.unstack(to_unstack, fill_value=fill_value)
+
+    if not dropna:
+        if isinstance(table.index, MultiIndex):
+            m = MultiIndex.from_arrays(
+                cartesian_product(table.index.levels), names=table.index.names
+            )
+            table = table.reindex(m, axis=0, fill_value=fill_value)
+
+        if isinstance(table.columns, MultiIndex):
+            m = MultiIndex.from_arrays(
+                cartesian_product(table.columns.levels), names=table.columns.names
+            )
+            table = table.reindex(m, axis=1, fill_value=fill_value)
+
+    if sort is True and isinstance(table, ABCDataFrame):
+        table = table.sort_index(axis=1)
+
+    if fill_value is not None:
+        table = table.fillna(fill_value)
+        if aggfunc is len and not observed and lib.is_integer(fill_value):
+            # TODO: can we avoid this?  this used to be handled by
+            #  downcast="infer" in fillna
+            table = table.astype(np.int64)
+
+    if margins:
+        if dropna:
+            data = data[data.notna().all(axis=1)]
+        table = _add_margins(
+            table,
+            data,
+            values,
+            rows=index,
+            cols=columns,
+            aggfunc=aggfunc,
+            observed=dropna,
+            margins_name=margins_name,
+            fill_value=fill_value,
+        )
+
+    # discard the top level
+    if values_passed and not values_multi and table.columns.nlevels > 1:
+        table.columns = table.columns.droplevel(0)
+    if len(index) == 0 and len(columns) > 0:
+        table = table.T
+
+    # GH 15193 Make sure empty columns are removed if dropna=True
+    if isinstance(table, ABCDataFrame) and dropna:
+        table = table.dropna(how="all", axis=1)
+
+    return table
+
+
+def _add_margins(
+    table: DataFrame | Series,
+    data: DataFrame,
+    values,
+    rows,
+    cols,
+    aggfunc,
+    observed: bool,
+    margins_name: Hashable = "All",
+    fill_value=None,
+):
+    if not isinstance(margins_name, str):
+        raise ValueError("margins_name argument must be a string")
+
+    msg = f'Conflicting name "{margins_name}" in margins'
+    for level in table.index.names:
+        if margins_name in table.index.get_level_values(level):
+            raise ValueError(msg)
+
+    grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name)
+
+    if table.ndim == 2:
+        # i.e. DataFrame
+        for level in table.columns.names[1:]:
+            if margins_name in table.columns.get_level_values(level):
+                raise ValueError(msg)
+
+    key: str | tuple[str, ...]
+    if len(rows) > 1:
+        key = (margins_name,) + ("",) * (len(rows) - 1)
+    else:
+        key = margins_name
+
+    if not values and isinstance(table, ABCSeries):
+        # If there are no values and the table is a series, then there is only
+        # one column in the data. Compute grand margin and return it.
+        return table._append(table._constructor({key: grand_margin[margins_name]}))
+
+    elif values:
+        marginal_result_set = _generate_marginal_results(
+            table, data, values, rows, cols, aggfunc, observed, margins_name
+        )
+        if not isinstance(marginal_result_set, tuple):
+            return marginal_result_set
+        result, margin_keys, row_margin = marginal_result_set
+    else:
+        # no values, and table is a DataFrame
+        assert isinstance(table, ABCDataFrame)
+        marginal_result_set = _generate_marginal_results_without_values(
+            table, data, rows, cols, aggfunc, observed, margins_name
+        )
+        if not isinstance(marginal_result_set, tuple):
+            return marginal_result_set
+        result, margin_keys, row_margin = marginal_result_set
+
+    row_margin = row_margin.reindex(result.columns, fill_value=fill_value)
+    # populate grand margin
+    for k in margin_keys:
+        if isinstance(k, str):
+            row_margin[k] = grand_margin[k]
+        else:
+            row_margin[k] = grand_margin[k[0]]
+
+    from pandas import DataFrame
+
+    margin_dummy = DataFrame(row_margin, columns=Index([key])).T
+
+    row_names = result.index.names
+    # check the result column and leave floats
+
+    for dtype in set(result.dtypes):
+        if isinstance(dtype, ExtensionDtype):
+            # Can hold NA already
+            continue
+
+        cols = result.select_dtypes([dtype]).columns
+        margin_dummy[cols] = margin_dummy[cols].apply(
+            maybe_downcast_to_dtype, args=(dtype,)
+        )
+    result = result._append(margin_dummy)
+    result.index.names = row_names
+
+    return result
+
+
+def _compute_grand_margin(
+    data: DataFrame, values, aggfunc, margins_name: Hashable = "All"
+):
+    if values:
+        grand_margin = {}
+        for k, v in data[values].items():
+            try:
+                if isinstance(aggfunc, str):
+                    grand_margin[k] = getattr(v, aggfunc)()
+                elif isinstance(aggfunc, dict):
+                    if isinstance(aggfunc[k], str):
+                        grand_margin[k] = getattr(v, aggfunc[k])()
+                    else:
+                        grand_margin[k] = aggfunc[k](v)
+                else:
+                    grand_margin[k] = aggfunc(v)
+            except TypeError:
+                pass
+        return grand_margin
+    else:
+        return {margins_name: aggfunc(data.index)}
+
+
+def _generate_marginal_results(
+    table,
+    data: DataFrame,
+    values,
+    rows,
+    cols,
+    aggfunc,
+    observed: bool,
+    margins_name: Hashable = "All",
+):
+    margin_keys: list | Index
+    if len(cols) > 0:
+        # need to "interleave" the margins
+        table_pieces = []
+        margin_keys = []
+
+        def _all_key(key):
+            return (key, margins_name) + ("",) * (len(cols) - 1)
+
+        if len(rows) > 0:
+            margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc)
+            cat_axis = 1
+
+            for key, piece in table.T.groupby(level=0, observed=observed):
+                piece = piece.T
+                all_key = _all_key(key)
+
+                # we are going to mutate this, so need to copy!
+                piece = piece.copy()
+                piece[all_key] = margin[key]
+
+                table_pieces.append(piece)
+                margin_keys.append(all_key)
+        else:
+            from pandas import DataFrame
+
+            cat_axis = 0
+            for key, piece in table.groupby(level=0, observed=observed):
+                if len(cols) > 1:
+                    all_key = _all_key(key)
+                else:
+                    all_key = margins_name
+                table_pieces.append(piece)
+                # GH31016 this is to calculate margin for each group, and assign
+                # corresponded key as index
+                transformed_piece = DataFrame(piece.apply(aggfunc)).T
+                if isinstance(piece.index, MultiIndex):
+                    # We are adding an empty level
+                    transformed_piece.index = MultiIndex.from_tuples(
+                        [all_key], names=piece.index.names + [None]
+                    )
+                else:
+                    transformed_piece.index = Index([all_key], name=piece.index.name)
+
+                # append piece for margin into table_piece
+                table_pieces.append(transformed_piece)
+                margin_keys.append(all_key)
+
+        if not table_pieces:
+            # GH 49240
+            return table
+        else:
+            result = concat(table_pieces, axis=cat_axis)
+
+        if len(rows) == 0:
+            return result
+    else:
+        result = table
+        margin_keys = table.columns
+
+    if len(cols) > 0:
+        row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc)
+        row_margin = row_margin.stack(future_stack=True)
+
+        # GH#26568. Use names instead of indices in case of numeric names
+        new_order_indices = [len(cols)] + list(range(len(cols)))
+        new_order_names = [row_margin.index.names[i] for i in new_order_indices]
+        row_margin.index = row_margin.index.reorder_levels(new_order_names)
+    else:
+        row_margin = data._constructor_sliced(np.nan, index=result.columns)
+
+    return result, margin_keys, row_margin
+
+
+def _generate_marginal_results_without_values(
+    table: DataFrame,
+    data: DataFrame,
+    rows,
+    cols,
+    aggfunc,
+    observed: bool,
+    margins_name: Hashable = "All",
+):
+    margin_keys: list | Index
+    if len(cols) > 0:
+        # need to "interleave" the margins
+        margin_keys = []
+
+        def _all_key():
+            if len(cols) == 1:
+                return margins_name
+            return (margins_name,) + ("",) * (len(cols) - 1)
+
+        if len(rows) > 0:
+            margin = data.groupby(rows, observed=observed)[rows].apply(aggfunc)
+            all_key = _all_key()
+            table[all_key] = margin
+            result = table
+            margin_keys.append(all_key)
+
+        else:
+            margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc)
+            all_key = _all_key()
+            table[all_key] = margin
+            result = table
+            margin_keys.append(all_key)
+            return result
+    else:
+        result = table
+        margin_keys = table.columns
+
+    if len(cols):
+        row_margin = data.groupby(cols, observed=observed)[cols].apply(aggfunc)
+    else:
+        row_margin = Series(np.nan, index=result.columns)
+
+    return result, margin_keys, row_margin
+
+
+def _convert_by(by):
+    if by is None:
+        by = []
+    elif (
+        is_scalar(by)
+        or isinstance(by, (np.ndarray, Index, ABCSeries, Grouper))
+        or callable(by)
+    ):
+        by = [by]
+    else:
+        by = list(by)
+    return by
+
+
+@Substitution("\ndata : DataFrame")
+@Appender(_shared_docs["pivot"], indents=1)
+def pivot(
+    data: DataFrame,
+    *,
+    columns: IndexLabel,
+    index: IndexLabel | lib.NoDefault = lib.no_default,
+    values: IndexLabel | lib.NoDefault = lib.no_default,
+) -> DataFrame:
+    columns_listlike = com.convert_to_list_like(columns)
+
+    # If columns is None we will create a MultiIndex level with None as name
+    # which might cause duplicated names because None is the default for
+    # level names
+    data = data.copy(deep=False)
+    data.index = data.index.copy()
+    data.index.names = [
+        name if name is not None else lib.no_default for name in data.index.names
+    ]
+
+    indexed: DataFrame | Series
+    if values is lib.no_default:
+        if index is not lib.no_default:
+            cols = com.convert_to_list_like(index)
+        else:
+            cols = []
+
+        append = index is lib.no_default
+        # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
+        # error: Unsupported left operand type for + ("ExtensionArray")
+        indexed = data.set_index(
+            cols + columns_listlike, append=append  # type: ignore[operator]
+        )
+    else:
+        index_list: list[Index] | list[Series]
+        if index is lib.no_default:
+            if isinstance(data.index, MultiIndex):
+                # GH 23955
+                index_list = [
+                    data.index.get_level_values(i) for i in range(data.index.nlevels)
+                ]
+            else:
+                index_list = [
+                    data._constructor_sliced(data.index, name=data.index.name)
+                ]
+        else:
+            index_list = [data[idx] for idx in com.convert_to_list_like(index)]
+
+        data_columns = [data[col] for col in columns_listlike]
+        index_list.extend(data_columns)
+        multiindex = MultiIndex.from_arrays(index_list)
+
+        if is_list_like(values) and not isinstance(values, tuple):
+            # Exclude tuple because it is seen as a single column name
+            values = cast(Sequence[Hashable], values)
+            indexed = data._constructor(
+                data[values]._values, index=multiindex, columns=values
+            )
+        else:
+            indexed = data._constructor_sliced(data[values]._values, index=multiindex)
+    # error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union
+    # [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected
+    # "Hashable"
+    result = indexed.unstack(columns_listlike)  # type: ignore[arg-type]
+    result.index.names = [
+        name if name is not lib.no_default else None for name in result.index.names
+    ]
+
+    return result
+
+
+def crosstab(
+    index,
+    columns,
+    values=None,
+    rownames=None,
+    colnames=None,
+    aggfunc=None,
+    margins: bool = False,
+    margins_name: Hashable = "All",
+    dropna: bool = True,
+    normalize: bool | Literal[0, 1, "all", "index", "columns"] = False,
+) -> DataFrame:
+    """
+    Compute a simple cross tabulation of two (or more) factors.
+
+    By default, computes a frequency table of the factors unless an
+    array of values and an aggregation function are passed.
+
+    Parameters
+    ----------
+    index : array-like, Series, or list of arrays/Series
+        Values to group by in the rows.
+    columns : array-like, Series, or list of arrays/Series
+        Values to group by in the columns.
+    values : array-like, optional
+        Array of values to aggregate according to the factors.
+        Requires `aggfunc` be specified.
+    rownames : sequence, default None
+        If passed, must match number of row arrays passed.
+    colnames : sequence, default None
+        If passed, must match number of column arrays passed.
+    aggfunc : function, optional
+        If specified, requires `values` be specified as well.
+    margins : bool, default False
+        Add row/column margins (subtotals).
+    margins_name : str, default 'All'
+        Name of the row/column that will contain the totals
+        when margins is True.
+    dropna : bool, default True
+        Do not include columns whose entries are all NaN.
+    normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False
+        Normalize by dividing all values by the sum of values.
+
+        - If passed 'all' or `True`, will normalize over all values.
+        - If passed 'index' will normalize over each row.
+        - If passed 'columns' will normalize over each column.
+        - If margins is `True`, will also normalize margin values.
+
+    Returns
+    -------
+    DataFrame
+        Cross tabulation of the data.
+
+    See Also
+    --------
+    DataFrame.pivot : Reshape data based on column values.
+    pivot_table : Create a pivot table as a DataFrame.
+
+    Notes
+    -----
+    Any Series passed will have their name attributes used unless row or column
+    names for the cross-tabulation are specified.
+
+    Any input passed containing Categorical data will have **all** of its
+    categories included in the cross-tabulation, even if the actual data does
+    not contain any instances of a particular category.
+
+    In the event that there aren't overlapping indexes an empty DataFrame will
+    be returned.
+
+    Reference :ref:`the user guide <reshaping.crosstabulations>` for more examples.
+
+    Examples
+    --------
+    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
+    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
+    >>> b = np.array(["one", "one", "one", "two", "one", "one",
+    ...               "one", "two", "two", "two", "one"], dtype=object)
+    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
+    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
+    ...              dtype=object)
+    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
+    b   one        two
+    c   dull shiny dull shiny
+    a
+    bar    1     2    1     0
+    foo    2     2    1     2
+
+    Here 'c' and 'f' are not represented in the data and will not be
+    shown in the output because dropna is True by default. Set
+    dropna=False to preserve categories with no data.
+
+    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
+    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
+    >>> pd.crosstab(foo, bar)
+    col_0  d  e
+    row_0
+    a      1  0
+    b      0  1
+    >>> pd.crosstab(foo, bar, dropna=False)
+    col_0  d  e  f
+    row_0
+    a      1  0  0
+    b      0  1  0
+    c      0  0  0
+    """
+    if values is None and aggfunc is not None:
+        raise ValueError("aggfunc cannot be used without values.")
+
+    if values is not None and aggfunc is None:
+        raise ValueError("values cannot be used without an aggfunc.")
+
+    if not is_nested_list_like(index):
+        index = [index]
+    if not is_nested_list_like(columns):
+        columns = [columns]
+
+    common_idx = None
+    pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))]
+    if pass_objs:
+        common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False)
+
+    rownames = _get_names(index, rownames, prefix="row")
+    colnames = _get_names(columns, colnames, prefix="col")
+
+    # duplicate names mapped to unique names for pivot op
+    (
+        rownames_mapper,
+        unique_rownames,
+        colnames_mapper,
+        unique_colnames,
+    ) = _build_names_mapper(rownames, colnames)
+
+    from pandas import DataFrame
+
+    data = {
+        **dict(zip(unique_rownames, index)),
+        **dict(zip(unique_colnames, columns)),
+    }
+    df = DataFrame(data, index=common_idx)
+
+    if values is None:
+        df["__dummy__"] = 0
+        kwargs = {"aggfunc": len, "fill_value": 0}
+    else:
+        df["__dummy__"] = values
+        kwargs = {"aggfunc": aggfunc}
+
+    # error: Argument 7 to "pivot_table" of "DataFrame" has incompatible type
+    # "**Dict[str, object]"; expected "Union[...]"
+    table = df.pivot_table(
+        "__dummy__",
+        index=unique_rownames,
+        columns=unique_colnames,
+        margins=margins,
+        margins_name=margins_name,
+        dropna=dropna,
+        observed=False,
+        **kwargs,  # type: ignore[arg-type]
+    )
+
+    # Post-process
+    if normalize is not False:
+        table = _normalize(
+            table, normalize=normalize, margins=margins, margins_name=margins_name
+        )
+
+    table = table.rename_axis(index=rownames_mapper, axis=0)
+    table = table.rename_axis(columns=colnames_mapper, axis=1)
+
+    return table
+
+
+def _normalize(
+    table: DataFrame, normalize, margins: bool, margins_name: Hashable = "All"
+) -> DataFrame:
+    if not isinstance(normalize, (bool, str)):
+        axis_subs = {0: "index", 1: "columns"}
+        try:
+            normalize = axis_subs[normalize]
+        except KeyError as err:
+            raise ValueError("Not a valid normalize argument") from err
+
+    if margins is False:
+        # Actual Normalizations
+        normalizers: dict[bool | str, Callable] = {
+            "all": lambda x: x / x.sum(axis=1).sum(axis=0),
+            "columns": lambda x: x / x.sum(),
+            "index": lambda x: x.div(x.sum(axis=1), axis=0),
+        }
+
+        normalizers[True] = normalizers["all"]
+
+        try:
+            f = normalizers[normalize]
+        except KeyError as err:
+            raise ValueError("Not a valid normalize argument") from err
+
+        table = f(table)
+        table = table.fillna(0)
+
+    elif margins is True:
+        # keep index and column of pivoted table
+        table_index = table.index
+        table_columns = table.columns
+        last_ind_or_col = table.iloc[-1, :].name
+
+        # check if margin name is not in (for MI cases) and not equal to last
+        # index/column and save the column and index margin
+        if (margins_name not in last_ind_or_col) & (margins_name != last_ind_or_col):
+            raise ValueError(f"{margins_name} not in pivoted DataFrame")
+        column_margin = table.iloc[:-1, -1]
+        index_margin = table.iloc[-1, :-1]
+
+        # keep the core table
+        table = table.iloc[:-1, :-1]
+
+        # Normalize core
+        table = _normalize(table, normalize=normalize, margins=False)
+
+        # Fix Margins
+        if normalize == "columns":
+            column_margin = column_margin / column_margin.sum()
+            table = concat([table, column_margin], axis=1)
+            table = table.fillna(0)
+            table.columns = table_columns
+
+        elif normalize == "index":
+            index_margin = index_margin / index_margin.sum()
+            table = table._append(index_margin)
+            table = table.fillna(0)
+            table.index = table_index
+
+        elif normalize == "all" or normalize is True:
+            column_margin = column_margin / column_margin.sum()
+            index_margin = index_margin / index_margin.sum()
+            index_margin.loc[margins_name] = 1
+            table = concat([table, column_margin], axis=1)
+            table = table._append(index_margin)
+
+            table = table.fillna(0)
+            table.index = table_index
+            table.columns = table_columns
+
+        else:
+            raise ValueError("Not a valid normalize argument")
+
+    else:
+        raise ValueError("Not a valid margins argument")
+
+    return table
+
+
+def _get_names(arrs, names, prefix: str = "row"):
+    if names is None:
+        names = []
+        for i, arr in enumerate(arrs):
+            if isinstance(arr, ABCSeries) and arr.name is not None:
+                names.append(arr.name)
+            else:
+                names.append(f"{prefix}_{i}")
+    else:
+        if len(names) != len(arrs):
+            raise AssertionError("arrays and names must have the same length")
+        if not isinstance(names, list):
+            names = list(names)
+
+    return names
+
+
+def _build_names_mapper(
+    rownames: list[str], colnames: list[str]
+) -> tuple[dict[str, str], list[str], dict[str, str], list[str]]:
+    """
+    Given the names of a DataFrame's rows and columns, returns a set of unique row
+    and column names and mappers that convert to original names.
+
+    A row or column name is replaced if it is duplicate among the rows of the inputs,
+    among the columns of the inputs or between the rows and the columns.
+
+    Parameters
+    ----------
+    rownames: list[str]
+    colnames: list[str]
+
+    Returns
+    -------
+    Tuple(Dict[str, str], List[str], Dict[str, str], List[str])
+
+    rownames_mapper: dict[str, str]
+        a dictionary with new row names as keys and original rownames as values
+    unique_rownames: list[str]
+        a list of rownames with duplicate names replaced by dummy names
+    colnames_mapper: dict[str, str]
+        a dictionary with new column names as keys and original column names as values
+    unique_colnames: list[str]
+        a list of column names with duplicate names replaced by dummy names
+
+    """
+
+    def get_duplicates(names):
+        seen: set = set()
+        return {name for name in names if name not in seen}
+
+    shared_names = set(rownames).intersection(set(colnames))
+    dup_names = get_duplicates(rownames) | get_duplicates(colnames) | shared_names
+
+    rownames_mapper = {
+        f"row_{i}": name for i, name in enumerate(rownames) if name in dup_names
+    }
+    unique_rownames = [
+        f"row_{i}" if name in dup_names else name for i, name in enumerate(rownames)
+    ]
+
+    colnames_mapper = {
+        f"col_{i}": name for i, name in enumerate(colnames) if name in dup_names
+    }
+    unique_colnames = [
+        f"col_{i}" if name in dup_names else name for i, name in enumerate(colnames)
+    ]
+
+    return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames
--- a/lib/python3.11/site-packages/pandas/core/reshape/reshape.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/reshape.py
@ -0,0 +1,989 @@
+from __future__ import annotations
+
+import itertools
+from typing import (
+    TYPE_CHECKING,
+    cast,
+)
+import warnings
+
+import numpy as np
+
+import pandas._libs.reshape as libreshape
+from pandas.errors import PerformanceWarning
+from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.cast import (
+    find_common_type,
+    maybe_promote,
+)
+from pandas.core.dtypes.common import (
+    ensure_platform_int,
+    is_1d_only_ea_dtype,
+    is_integer,
+    needs_i8_conversion,
+)
+from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.missing import notna
+
+import pandas.core.algorithms as algos
+from pandas.core.algorithms import (
+    factorize,
+    unique,
+)
+from pandas.core.arrays.categorical import factorize_from_iterable
+from pandas.core.construction import ensure_wrapped_if_datetimelike
+from pandas.core.frame import DataFrame
+from pandas.core.indexes.api import (
+    Index,
+    MultiIndex,
+    RangeIndex,
+)
+from pandas.core.reshape.concat import concat
+from pandas.core.series import Series
+from pandas.core.sorting import (
+    compress_group_index,
+    decons_obs_group_ids,
+    get_compressed_ids,
+    get_group_index,
+    get_group_index_sorter,
+)
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        ArrayLike,
+        Level,
+        npt,
+    )
+
+    from pandas.core.arrays import ExtensionArray
+    from pandas.core.indexes.frozen import FrozenList
+
+
+class _Unstacker:
+    """
+    Helper class to unstack data / pivot with multi-level index
+
+    Parameters
+    ----------
+    index : MultiIndex
+    level : int or str, default last level
+        Level to "unstack". Accepts a name for the level.
+    fill_value : scalar, optional
+        Default value to fill in missing values if subgroups do not have the
+        same set of labels. By default, missing values will be replaced with
+        the default fill value for that data type, NaN for float, NaT for
+        datetimelike, etc. For integer types, by default data will converted to
+        float and missing values will be set to NaN.
+    constructor : object
+        Pandas ``DataFrame`` or subclass used to create unstacked
+        response.  If None, DataFrame will be used.
+
+    Examples
+    --------
+    >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
+    ...                                    ('two', 'a'), ('two', 'b')])
+    >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
+    >>> s
+    one  a    1
+         b    2
+    two  a    3
+         b    4
+    dtype: int64
+
+    >>> s.unstack(level=-1)
+         a  b
+    one  1  2
+    two  3  4
+
+    >>> s.unstack(level=0)
+       one  two
+    a    1    3
+    b    2    4
+
+    Returns
+    -------
+    unstacked : DataFrame
+    """
+
+    def __init__(
+        self, index: MultiIndex, level: Level, constructor, sort: bool = True
+    ) -> None:
+        self.constructor = constructor
+        self.sort = sort
+
+        self.index = index.remove_unused_levels()
+
+        self.level = self.index._get_level_number(level)
+
+        # when index includes `nan`, need to lift levels/strides by 1
+        self.lift = 1 if -1 in self.index.codes[self.level] else 0
+
+        # Note: the "pop" below alters these in-place.
+        self.new_index_levels = list(self.index.levels)
+        self.new_index_names = list(self.index.names)
+
+        self.removed_name = self.new_index_names.pop(self.level)
+        self.removed_level = self.new_index_levels.pop(self.level)
+        self.removed_level_full = index.levels[self.level]
+        if not self.sort:
+            unique_codes = unique(self.index.codes[self.level])
+            self.removed_level = self.removed_level.take(unique_codes)
+            self.removed_level_full = self.removed_level_full.take(unique_codes)
+
+        # Bug fix GH 20601
+        # If the data frame is too big, the number of unique index combination
+        # will cause int32 overflow on windows environments.
+        # We want to check and raise an warning before this happens
+        num_rows = np.max([index_level.size for index_level in self.new_index_levels])
+        num_columns = self.removed_level.size
+
+        # GH20601: This forces an overflow if the number of cells is too high.
+        num_cells = num_rows * num_columns
+
+        # GH 26314: Previous ValueError raised was too restrictive for many users.
+        if num_cells > np.iinfo(np.int32).max:
+            warnings.warn(
+                f"The following operation may generate {num_cells} cells "
+                f"in the resulting pandas object.",
+                PerformanceWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        self._make_selectors()
+
+    @cache_readonly
+    def _indexer_and_to_sort(
+        self,
+    ) -> tuple[
+        npt.NDArray[np.intp],
+        list[np.ndarray],  # each has _some_ signed integer dtype
+    ]:
+        v = self.level
+
+        codes = list(self.index.codes)
+        levs = list(self.index.levels)
+        to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
+        sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])
+
+        comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
+        ngroups = len(obs_ids)
+
+        indexer = get_group_index_sorter(comp_index, ngroups)
+        return indexer, to_sort
+
+    @cache_readonly
+    def sorted_labels(self) -> list[np.ndarray]:
+        indexer, to_sort = self._indexer_and_to_sort
+        if self.sort:
+            return [line.take(indexer) for line in to_sort]
+        return to_sort
+
+    def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
+        if self.sort:
+            indexer, _ = self._indexer_and_to_sort
+
+            sorted_values = algos.take_nd(values, indexer, axis=0)
+            return sorted_values
+        return values
+
+    def _make_selectors(self):
+        new_levels = self.new_index_levels
+
+        # make the mask
+        remaining_labels = self.sorted_labels[:-1]
+        level_sizes = tuple(len(x) for x in new_levels)
+
+        comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
+        ngroups = len(obs_ids)
+
+        comp_index = ensure_platform_int(comp_index)
+        stride = self.index.levshape[self.level] + self.lift
+        self.full_shape = ngroups, stride
+
+        selector = self.sorted_labels[-1] + stride * comp_index + self.lift
+        mask = np.zeros(np.prod(self.full_shape), dtype=bool)
+        mask.put(selector, True)
+
+        if mask.sum() < len(self.index):
+            raise ValueError("Index contains duplicate entries, cannot reshape")
+
+        self.group_index = comp_index
+        self.mask = mask
+        if self.sort:
+            self.compressor = comp_index.searchsorted(np.arange(ngroups))
+        else:
+            self.compressor = np.sort(np.unique(comp_index, return_index=True)[1])
+
+    @cache_readonly
+    def mask_all(self) -> bool:
+        return bool(self.mask.all())
+
+    @cache_readonly
+    def arange_result(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]:
+        # We cache this for reuse in ExtensionBlock._unstack
+        dummy_arr = np.arange(len(self.index), dtype=np.intp)
+        new_values, mask = self.get_new_values(dummy_arr, fill_value=-1)
+        return new_values, mask.any(0)
+        # TODO: in all tests we have mask.any(0).all(); can we rely on that?
+
+    def get_result(self, values, value_columns, fill_value) -> DataFrame:
+        if values.ndim == 1:
+            values = values[:, np.newaxis]
+
+        if value_columns is None and values.shape[1] != 1:  # pragma: no cover
+            raise ValueError("must pass column labels for multi-column data")
+
+        values, _ = self.get_new_values(values, fill_value)
+        columns = self.get_new_columns(value_columns)
+        index = self.new_index
+
+        return self.constructor(
+            values, index=index, columns=columns, dtype=values.dtype
+        )
+
+    def get_new_values(self, values, fill_value=None):
+        if values.ndim == 1:
+            values = values[:, np.newaxis]
+
+        sorted_values = self._make_sorted_values(values)
+
+        # place the values
+        length, width = self.full_shape
+        stride = values.shape[1]
+        result_width = width * stride
+        result_shape = (length, result_width)
+        mask = self.mask
+        mask_all = self.mask_all
+
+        # we can simply reshape if we don't have a mask
+        if mask_all and len(values):
+            # TODO: Under what circumstances can we rely on sorted_values
+            #  matching values?  When that holds, we can slice instead
+            #  of take (in particular for EAs)
+            new_values = (
+                sorted_values.reshape(length, width, stride)
+                .swapaxes(1, 2)
+                .reshape(result_shape)
+            )
+            new_mask = np.ones(result_shape, dtype=bool)
+            return new_values, new_mask
+
+        dtype = values.dtype
+
+        # if our mask is all True, then we can use our existing dtype
+        if mask_all:
+            dtype = values.dtype
+            new_values = np.empty(result_shape, dtype=dtype)
+        else:
+            if isinstance(dtype, ExtensionDtype):
+                # GH#41875
+                # We are assuming that fill_value can be held by this dtype,
+                #  unlike the non-EA case that promotes.
+                cls = dtype.construct_array_type()
+                new_values = cls._empty(result_shape, dtype=dtype)
+                new_values[:] = fill_value
+            else:
+                dtype, fill_value = maybe_promote(dtype, fill_value)
+                new_values = np.empty(result_shape, dtype=dtype)
+                new_values.fill(fill_value)
+
+        name = dtype.name
+        new_mask = np.zeros(result_shape, dtype=bool)
+
+        # we need to convert to a basic dtype
+        # and possibly coerce an input to our output dtype
+        # e.g. ints -> floats
+        if needs_i8_conversion(values.dtype):
+            sorted_values = sorted_values.view("i8")
+            new_values = new_values.view("i8")
+        else:
+            sorted_values = sorted_values.astype(name, copy=False)
+
+        # fill in our values & mask
+        libreshape.unstack(
+            sorted_values,
+            mask.view("u1"),
+            stride,
+            length,
+            width,
+            new_values,
+            new_mask.view("u1"),
+        )
+
+        # reconstruct dtype if needed
+        if needs_i8_conversion(values.dtype):
+            # view as datetime64 so we can wrap in DatetimeArray and use
+            #  DTA's view method
+            new_values = new_values.view("M8[ns]")
+            new_values = ensure_wrapped_if_datetimelike(new_values)
+            new_values = new_values.view(values.dtype)
+
+        return new_values, new_mask
+
+    def get_new_columns(self, value_columns: Index | None):
+        if value_columns is None:
+            if self.lift == 0:
+                return self.removed_level._rename(name=self.removed_name)
+
+            lev = self.removed_level.insert(0, item=self.removed_level._na_value)
+            return lev.rename(self.removed_name)
+
+        stride = len(self.removed_level) + self.lift
+        width = len(value_columns)
+        propagator = np.repeat(np.arange(width), stride)
+
+        new_levels: FrozenList | list[Index]
+
+        if isinstance(value_columns, MultiIndex):
+            # error: Cannot determine type of "__add__"  [has-type]
+            new_levels = value_columns.levels + (  # type: ignore[has-type]
+                self.removed_level_full,
+            )
+            new_names = value_columns.names + (self.removed_name,)
+
+            new_codes = [lab.take(propagator) for lab in value_columns.codes]
+        else:
+            new_levels = [
+                value_columns,
+                self.removed_level_full,
+            ]
+            new_names = [value_columns.name, self.removed_name]
+            new_codes = [propagator]
+
+        repeater = self._repeater
+
+        # The entire level is then just a repetition of the single chunk:
+        new_codes.append(np.tile(repeater, width))
+        return MultiIndex(
+            levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
+        )
+
+    @cache_readonly
+    def _repeater(self) -> np.ndarray:
+        # The two indices differ only if the unstacked level had unused items:
+        if len(self.removed_level_full) != len(self.removed_level):
+            # In this case, we remap the new codes to the original level:
+            repeater = self.removed_level_full.get_indexer(self.removed_level)
+            if self.lift:
+                repeater = np.insert(repeater, 0, -1)
+        else:
+            # Otherwise, we just use each level item exactly once:
+            stride = len(self.removed_level) + self.lift
+            repeater = np.arange(stride) - self.lift
+
+        return repeater
+
+    @cache_readonly
+    def new_index(self) -> MultiIndex:
+        # Does not depend on values or value_columns
+        result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]]
+
+        # construct the new index
+        if len(self.new_index_levels) == 1:
+            level, level_codes = self.new_index_levels[0], result_codes[0]
+            if (level_codes == -1).any():
+                level = level.insert(len(level), level._na_value)
+            return level.take(level_codes).rename(self.new_index_names[0])
+
+        return MultiIndex(
+            levels=self.new_index_levels,
+            codes=result_codes,
+            names=self.new_index_names,
+            verify_integrity=False,
+        )
+
+
+def _unstack_multiple(
+    data: Series | DataFrame, clocs, fill_value=None, sort: bool = True
+):
+    if len(clocs) == 0:
+        return data
+
+    # NOTE: This doesn't deal with hierarchical columns yet
+
+    index = data.index
+    index = cast(MultiIndex, index)  # caller is responsible for checking
+
+    # GH 19966 Make sure if MultiIndexed index has tuple name, they will be
+    # recognised as a whole
+    if clocs in index.names:
+        clocs = [clocs]
+    clocs = [index._get_level_number(i) for i in clocs]
+
+    rlocs = [i for i in range(index.nlevels) if i not in clocs]
+
+    clevels = [index.levels[i] for i in clocs]
+    ccodes = [index.codes[i] for i in clocs]
+    cnames = [index.names[i] for i in clocs]
+    rlevels = [index.levels[i] for i in rlocs]
+    rcodes = [index.codes[i] for i in rlocs]
+    rnames = [index.names[i] for i in rlocs]
+
+    shape = tuple(len(x) for x in clevels)
+    group_index = get_group_index(ccodes, shape, sort=False, xnull=False)
+
+    comp_ids, obs_ids = compress_group_index(group_index, sort=False)
+    recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False)
+
+    if not rlocs:
+        # Everything is in clocs, so the dummy df has a regular index
+        dummy_index = Index(obs_ids, name="__placeholder__")
+    else:
+        dummy_index = MultiIndex(
+            levels=rlevels + [obs_ids],
+            codes=rcodes + [comp_ids],
+            names=rnames + ["__placeholder__"],
+            verify_integrity=False,
+        )
+
+    if isinstance(data, Series):
+        dummy = data.copy()
+        dummy.index = dummy_index
+
+        unstacked = dummy.unstack("__placeholder__", fill_value=fill_value, sort=sort)
+        new_levels = clevels
+        new_names = cnames
+        new_codes = recons_codes
+    else:
+        if isinstance(data.columns, MultiIndex):
+            result = data
+            while clocs:
+                val = clocs.pop(0)
+                result = result.unstack(val, fill_value=fill_value, sort=sort)
+                clocs = [v if v < val else v - 1 for v in clocs]
+
+            return result
+
+        # GH#42579 deep=False to avoid consolidating
+        dummy_df = data.copy(deep=False)
+        dummy_df.index = dummy_index
+
+        unstacked = dummy_df.unstack(
+            "__placeholder__", fill_value=fill_value, sort=sort
+        )
+        if isinstance(unstacked, Series):
+            unstcols = unstacked.index
+        else:
+            unstcols = unstacked.columns
+        assert isinstance(unstcols, MultiIndex)  # for mypy
+        new_levels = [unstcols.levels[0]] + clevels
+        new_names = [data.columns.name] + cnames
+
+        new_codes = [unstcols.codes[0]]
+        new_codes.extend(rec.take(unstcols.codes[-1]) for rec in recons_codes)
+
+    new_columns = MultiIndex(
+        levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
+    )
+
+    if isinstance(unstacked, Series):
+        unstacked.index = new_columns
+    else:
+        unstacked.columns = new_columns
+
+    return unstacked
+
+
+def unstack(obj: Series | DataFrame, level, fill_value=None, sort: bool = True):
+    if isinstance(level, (tuple, list)):
+        if len(level) != 1:
+            # _unstack_multiple only handles MultiIndexes,
+            # and isn't needed for a single level
+            return _unstack_multiple(obj, level, fill_value=fill_value, sort=sort)
+        else:
+            level = level[0]
+
+    if not is_integer(level) and not level == "__placeholder__":
+        # check if level is valid in case of regular index
+        obj.index._get_level_number(level)
+
+    if isinstance(obj, DataFrame):
+        if isinstance(obj.index, MultiIndex):
+            return _unstack_frame(obj, level, fill_value=fill_value, sort=sort)
+        else:
+            return obj.T.stack(future_stack=True)
+    elif not isinstance(obj.index, MultiIndex):
+        # GH 36113
+        # Give nicer error messages when unstack a Series whose
+        # Index is not a MultiIndex.
+        raise ValueError(
+            f"index must be a MultiIndex to unstack, {type(obj.index)} was passed"
+        )
+    else:
+        if is_1d_only_ea_dtype(obj.dtype):
+            return _unstack_extension_series(obj, level, fill_value, sort=sort)
+        unstacker = _Unstacker(
+            obj.index, level=level, constructor=obj._constructor_expanddim, sort=sort
+        )
+        return unstacker.get_result(
+            obj._values, value_columns=None, fill_value=fill_value
+        )
+
+
+def _unstack_frame(
+    obj: DataFrame, level, fill_value=None, sort: bool = True
+) -> DataFrame:
+    assert isinstance(obj.index, MultiIndex)  # checked by caller
+    unstacker = _Unstacker(
+        obj.index, level=level, constructor=obj._constructor, sort=sort
+    )
+
+    if not obj._can_fast_transpose:
+        mgr = obj._mgr.unstack(unstacker, fill_value=fill_value)
+        return obj._constructor_from_mgr(mgr, axes=mgr.axes)
+    else:
+        return unstacker.get_result(
+            obj._values, value_columns=obj.columns, fill_value=fill_value
+        )
+
+
+def _unstack_extension_series(
+    series: Series, level, fill_value, sort: bool
+) -> DataFrame:
+    """
+    Unstack an ExtensionArray-backed Series.
+
+    The ExtensionDtype is preserved.
+
+    Parameters
+    ----------
+    series : Series
+        A Series with an ExtensionArray for values
+    level : Any
+        The level name or number.
+    fill_value : Any
+        The user-level (not physical storage) fill value to use for
+        missing values introduced by the reshape. Passed to
+        ``series.values.take``.
+    sort : bool
+        Whether to sort the resulting MuliIndex levels
+
+    Returns
+    -------
+    DataFrame
+        Each column of the DataFrame will have the same dtype as
+        the input Series.
+    """
+    # Defer to the logic in ExtensionBlock._unstack
+    df = series.to_frame()
+    result = df.unstack(level=level, fill_value=fill_value, sort=sort)
+
+    # equiv: result.droplevel(level=0, axis=1)
+    #  but this avoids an extra copy
+    result.columns = result.columns._drop_level_numbers([0])
+    return result
+
+
+def stack(frame: DataFrame, level=-1, dropna: bool = True, sort: bool = True):
+    """
+    Convert DataFrame to Series with multi-level Index. Columns become the
+    second level of the resulting hierarchical index
+
+    Returns
+    -------
+    stacked : Series or DataFrame
+    """
+
+    def stack_factorize(index):
+        if index.is_unique:
+            return index, np.arange(len(index))
+        codes, categories = factorize_from_iterable(index)
+        return categories, codes
+
+    N, K = frame.shape
+
+    # Will also convert negative level numbers and check if out of bounds.
+    level_num = frame.columns._get_level_number(level)
+
+    if isinstance(frame.columns, MultiIndex):
+        return _stack_multi_columns(
+            frame, level_num=level_num, dropna=dropna, sort=sort
+        )
+    elif isinstance(frame.index, MultiIndex):
+        new_levels = list(frame.index.levels)
+        new_codes = [lab.repeat(K) for lab in frame.index.codes]
+
+        clev, clab = stack_factorize(frame.columns)
+        new_levels.append(clev)
+        new_codes.append(np.tile(clab, N).ravel())
+
+        new_names = list(frame.index.names)
+        new_names.append(frame.columns.name)
+        new_index = MultiIndex(
+            levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
+        )
+    else:
+        levels, (ilab, clab) = zip(*map(stack_factorize, (frame.index, frame.columns)))
+        codes = ilab.repeat(K), np.tile(clab, N).ravel()
+        new_index = MultiIndex(
+            levels=levels,
+            codes=codes,
+            names=[frame.index.name, frame.columns.name],
+            verify_integrity=False,
+        )
+
+    new_values: ArrayLike
+    if not frame.empty and frame._is_homogeneous_type:
+        # For homogeneous EAs, frame._values will coerce to object. So
+        # we concatenate instead.
+        dtypes = list(frame.dtypes._values)
+        dtype = dtypes[0]
+
+        if isinstance(dtype, ExtensionDtype):
+            arr = dtype.construct_array_type()
+            new_values = arr._concat_same_type(
+                [col._values for _, col in frame.items()]
+            )
+            new_values = _reorder_for_extension_array_stack(new_values, N, K)
+        else:
+            # homogeneous, non-EA
+            new_values = frame._values.ravel()
+
+    else:
+        # non-homogeneous
+        new_values = frame._values.ravel()
+
+    if dropna:
+        mask = notna(new_values)
+        new_values = new_values[mask]
+        new_index = new_index[mask]
+
+    return frame._constructor_sliced(new_values, index=new_index)
+
+
+def stack_multiple(frame: DataFrame, level, dropna: bool = True, sort: bool = True):
+    # If all passed levels match up to column names, no
+    # ambiguity about what to do
+    if all(lev in frame.columns.names for lev in level):
+        result = frame
+        for lev in level:
+            result = stack(result, lev, dropna=dropna, sort=sort)
+
+    # Otherwise, level numbers may change as each successive level is stacked
+    elif all(isinstance(lev, int) for lev in level):
+        # As each stack is done, the level numbers decrease, so we need
+        #  to account for that when level is a sequence of ints
+        result = frame
+        # _get_level_number() checks level numbers are in range and converts
+        # negative numbers to positive
+        level = [frame.columns._get_level_number(lev) for lev in level]
+
+        while level:
+            lev = level.pop(0)
+            result = stack(result, lev, dropna=dropna, sort=sort)
+            # Decrement all level numbers greater than current, as these
+            # have now shifted down by one
+            level = [v if v <= lev else v - 1 for v in level]
+
+    else:
+        raise ValueError(
+            "level should contain all level names or all level "
+            "numbers, not a mixture of the two."
+        )
+
+    return result
+
+
+def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex:
+    """Creates a MultiIndex from the first N-1 levels of this MultiIndex."""
+    if len(columns.levels) <= 2:
+        return columns.levels[0]._rename(name=columns.names[0])
+
+    levs = [
+        [lev[c] if c >= 0 else None for c in codes]
+        for lev, codes in zip(columns.levels[:-1], columns.codes[:-1])
+    ]
+
+    # Remove duplicate tuples in the MultiIndex.
+    tuples = zip(*levs)
+    unique_tuples = (key for key, _ in itertools.groupby(tuples))
+    new_levs = zip(*unique_tuples)
+
+    # The dtype of each level must be explicitly set to avoid inferring the wrong type.
+    # See GH-36991.
+    return MultiIndex.from_arrays(
+        [
+            # Not all indices can accept None values.
+            Index(new_lev, dtype=lev.dtype) if None not in new_lev else new_lev
+            for new_lev, lev in zip(new_levs, columns.levels)
+        ],
+        names=columns.names[:-1],
+    )
+
+
+def _stack_multi_columns(
+    frame: DataFrame, level_num: int = -1, dropna: bool = True, sort: bool = True
+) -> DataFrame:
+    def _convert_level_number(level_num: int, columns: Index):
+        """
+        Logic for converting the level number to something we can safely pass
+        to swaplevel.
+
+        If `level_num` matches a column name return the name from
+        position `level_num`, otherwise return `level_num`.
+        """
+        if level_num in columns.names:
+            return columns.names[level_num]
+
+        return level_num
+
+    this = frame.copy(deep=False)
+    mi_cols = this.columns  # cast(MultiIndex, this.columns)
+    assert isinstance(mi_cols, MultiIndex)  # caller is responsible
+
+    # this makes life much simpler
+    if level_num != mi_cols.nlevels - 1:
+        # roll levels to put selected level at end
+        roll_columns = mi_cols
+        for i in range(level_num, mi_cols.nlevels - 1):
+            # Need to check if the ints conflict with level names
+            lev1 = _convert_level_number(i, roll_columns)
+            lev2 = _convert_level_number(i + 1, roll_columns)
+            roll_columns = roll_columns.swaplevel(lev1, lev2)
+        this.columns = mi_cols = roll_columns
+
+    if not mi_cols._is_lexsorted() and sort:
+        # Workaround the edge case where 0 is one of the column names,
+        # which interferes with trying to sort based on the first
+        # level
+        level_to_sort = _convert_level_number(0, mi_cols)
+        this = this.sort_index(level=level_to_sort, axis=1)
+        mi_cols = this.columns
+
+    mi_cols = cast(MultiIndex, mi_cols)
+    new_columns = _stack_multi_column_index(mi_cols)
+
+    # time to ravel the values
+    new_data = {}
+    level_vals = mi_cols.levels[-1]
+    level_codes = unique(mi_cols.codes[-1])
+    if sort:
+        level_codes = np.sort(level_codes)
+    level_vals_nan = level_vals.insert(len(level_vals), None)
+
+    level_vals_used = np.take(level_vals_nan, level_codes)
+    levsize = len(level_codes)
+    drop_cols = []
+    for key in new_columns:
+        try:
+            loc = this.columns.get_loc(key)
+        except KeyError:
+            drop_cols.append(key)
+            continue
+
+        # can make more efficient?
+        # we almost always return a slice
+        # but if unsorted can get a boolean
+        # indexer
+        if not isinstance(loc, slice):
+            slice_len = len(loc)
+        else:
+            slice_len = loc.stop - loc.start
+
+        if slice_len != levsize:
+            chunk = this.loc[:, this.columns[loc]]
+            chunk.columns = level_vals_nan.take(chunk.columns.codes[-1])
+            value_slice = chunk.reindex(columns=level_vals_used).values
+        else:
+            subset = this.iloc[:, loc]
+            dtype = find_common_type(subset.dtypes.tolist())
+            if isinstance(dtype, ExtensionDtype):
+                # TODO(EA2D): won't need special case, can go through .values
+                #  paths below (might change to ._values)
+                value_slice = dtype.construct_array_type()._concat_same_type(
+                    [x._values.astype(dtype, copy=False) for _, x in subset.items()]
+                )
+                N, K = subset.shape
+                idx = np.arange(N * K).reshape(K, N).T.ravel()
+                value_slice = value_slice.take(idx)
+            else:
+                value_slice = subset.values
+
+        if value_slice.ndim > 1:
+            # i.e. not extension
+            value_slice = value_slice.ravel()
+
+        new_data[key] = value_slice
+
+    if len(drop_cols) > 0:
+        new_columns = new_columns.difference(drop_cols)
+
+    N = len(this)
+
+    if isinstance(this.index, MultiIndex):
+        new_levels = list(this.index.levels)
+        new_names = list(this.index.names)
+        new_codes = [lab.repeat(levsize) for lab in this.index.codes]
+    else:
+        old_codes, old_levels = factorize_from_iterable(this.index)
+        new_levels = [old_levels]
+        new_codes = [old_codes.repeat(levsize)]
+        new_names = [this.index.name]  # something better?
+
+    new_levels.append(level_vals)
+    new_codes.append(np.tile(level_codes, N))
+    new_names.append(frame.columns.names[level_num])
+
+    new_index = MultiIndex(
+        levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
+    )
+
+    result = frame._constructor(new_data, index=new_index, columns=new_columns)
+
+    if frame.columns.nlevels > 1:
+        desired_columns = frame.columns._drop_level_numbers([level_num]).unique()
+        if not result.columns.equals(desired_columns):
+            result = result[desired_columns]
+
+    # more efficient way to go about this? can do the whole masking biz but
+    # will only save a small amount of time...
+    if dropna:
+        result = result.dropna(axis=0, how="all")
+
+    return result
+
+
+def _reorder_for_extension_array_stack(
+    arr: ExtensionArray, n_rows: int, n_columns: int
+) -> ExtensionArray:
+    """
+    Re-orders the values when stacking multiple extension-arrays.
+
+    The indirect stacking method used for EAs requires a followup
+    take to get the order correct.
+
+    Parameters
+    ----------
+    arr : ExtensionArray
+    n_rows, n_columns : int
+        The number of rows and columns in the original DataFrame.
+
+    Returns
+    -------
+    taken : ExtensionArray
+        The original `arr` with elements re-ordered appropriately
+
+    Examples
+    --------
+    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
+    >>> _reorder_for_extension_array_stack(arr, 2, 3)
+    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')
+
+    >>> _reorder_for_extension_array_stack(arr, 3, 2)
+    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
+    """
+    # final take to get the order correct.
+    # idx is an indexer like
+    # [c0r0, c1r0, c2r0, ...,
+    #  c0r1, c1r1, c2r1, ...]
+    idx = np.arange(n_rows * n_columns).reshape(n_columns, n_rows).T.ravel()
+    return arr.take(idx)
+
+
+def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
+    if frame.columns.nunique() != len(frame.columns):
+        raise ValueError("Columns with duplicate values are not supported in stack")
+
+    # If we need to drop `level` from columns, it needs to be in descending order
+    drop_levnums = sorted(level, reverse=True)
+    stack_cols = frame.columns._drop_level_numbers(
+        [k for k in range(frame.columns.nlevels) if k not in level][::-1]
+    )
+    if len(level) > 1:
+        # Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
+        sorter = np.argsort(level)
+        ordered_stack_cols = stack_cols._reorder_ilevels(sorter)
+    else:
+        ordered_stack_cols = stack_cols
+
+    stack_cols_unique = stack_cols.unique()
+    ordered_stack_cols_unique = ordered_stack_cols.unique()
+
+    # Grab data for each unique index to be stacked
+    buf = []
+    for idx in stack_cols_unique:
+        if len(frame.columns) == 1:
+            data = frame.copy()
+        else:
+            # Take the data from frame corresponding to this idx value
+            if len(level) == 1:
+                idx = (idx,)
+            gen = iter(idx)
+            column_indexer = tuple(
+                next(gen) if k in level else slice(None)
+                for k in range(frame.columns.nlevels)
+            )
+            data = frame.loc[:, column_indexer]
+
+        if len(level) < frame.columns.nlevels:
+            data.columns = data.columns._drop_level_numbers(drop_levnums)
+        elif stack_cols.nlevels == 1:
+            if data.ndim == 1:
+                data.name = 0
+            else:
+                data.columns = RangeIndex(len(data.columns))
+        buf.append(data)
+
+    result: Series | DataFrame
+    if len(buf) > 0 and not frame.empty:
+        result = concat(buf)
+        ratio = len(result) // len(frame)
+    else:
+        # input is empty
+        if len(level) < frame.columns.nlevels:
+            # concat column order may be different from dropping the levels
+            new_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
+        else:
+            new_columns = [0]
+        result = DataFrame(columns=new_columns, dtype=frame._values.dtype)
+        ratio = 0
+
+    if len(level) < frame.columns.nlevels:
+        # concat column order may be different from dropping the levels
+        desired_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
+        if not result.columns.equals(desired_columns):
+            result = result[desired_columns]
+
+    # Construct the correct MultiIndex by combining the frame's index and
+    # stacked columns.
+    index_levels: list | FrozenList
+    if isinstance(frame.index, MultiIndex):
+        index_levels = frame.index.levels
+        index_codes = list(np.tile(frame.index.codes, (1, ratio)))
+    else:
+        codes, uniques = factorize(frame.index, use_na_sentinel=False)
+        index_levels = [uniques]
+        index_codes = list(np.tile(codes, (1, ratio)))
+    if isinstance(stack_cols, MultiIndex):
+        column_levels = ordered_stack_cols.levels
+        column_codes = ordered_stack_cols.drop_duplicates().codes
+    else:
+        column_levels = [ordered_stack_cols.unique()]
+        column_codes = [factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0]]
+    column_codes = [np.repeat(codes, len(frame)) for codes in column_codes]
+    result.index = MultiIndex(
+        levels=index_levels + column_levels,
+        codes=index_codes + column_codes,
+        names=frame.index.names + list(ordered_stack_cols.names),
+        verify_integrity=False,
+    )
+
+    # sort result, but faster than calling sort_index since we know the order we need
+    len_df = len(frame)
+    n_uniques = len(ordered_stack_cols_unique)
+    indexer = np.arange(n_uniques)
+    idxs = np.tile(len_df * indexer, len_df) + np.repeat(np.arange(len_df), n_uniques)
+    result = result.take(idxs)
+
+    # Reshape/rename if needed and dropna
+    if result.ndim == 2 and frame.columns.nlevels == len(level):
+        if len(result.columns) == 0:
+            result = Series(index=result.index)
+        else:
+            result = result.iloc[:, 0]
+    if result.ndim == 1:
+        result.name = None
+
+    return result
--- a/lib/python3.11/site-packages/pandas/core/reshape/tile.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/tile.py
@ -0,0 +1,638 @@
+"""
+Quantilization functions and related stuff
+"""
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Literal,
+)
+
+import numpy as np
+
+from pandas._libs import (
+    Timedelta,
+    Timestamp,
+    lib,
+)
+
+from pandas.core.dtypes.common import (
+    ensure_platform_int,
+    is_bool_dtype,
+    is_integer,
+    is_list_like,
+    is_numeric_dtype,
+    is_scalar,
+)
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    ExtensionDtype,
+)
+from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.missing import isna
+
+from pandas import (
+    Categorical,
+    Index,
+    IntervalIndex,
+)
+import pandas.core.algorithms as algos
+from pandas.core.arrays.datetimelike import dtype_to_unit
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        DtypeObj,
+        IntervalLeftRight,
+    )
+
+
+def cut(
+    x,
+    bins,
+    right: bool = True,
+    labels=None,
+    retbins: bool = False,
+    precision: int = 3,
+    include_lowest: bool = False,
+    duplicates: str = "raise",
+    ordered: bool = True,
+):
+    """
+    Bin values into discrete intervals.
+
+    Use `cut` when you need to segment and sort data values into bins. This
+    function is also useful for going from a continuous variable to a
+    categorical variable. For example, `cut` could convert ages to groups of
+    age ranges. Supports binning into an equal number of bins, or a
+    pre-specified array of bins.
+
+    Parameters
+    ----------
+    x : array-like
+        The input array to be binned. Must be 1-dimensional.
+    bins : int, sequence of scalars, or IntervalIndex
+        The criteria to bin by.
+
+        * int : Defines the number of equal-width bins in the range of `x`. The
+          range of `x` is extended by .1% on each side to include the minimum
+          and maximum values of `x`.
+        * sequence of scalars : Defines the bin edges allowing for non-uniform
+          width. No extension of the range of `x` is done.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
+
+    right : bool, default True
+        Indicates whether `bins` includes the rightmost edge or not. If
+        ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
+        indicate (1,2], (2,3], (3,4]. This argument is ignored when
+        `bins` is an IntervalIndex.
+    labels : array or False, default None
+        Specifies the labels for the returned bins. Must be the same length as
+        the resulting bins. If False, returns only integer indicators of the
+        bins. This affects the type of the output container (see below).
+        This argument is ignored when `bins` is an IntervalIndex. If True,
+        raises an error. When `ordered=False`, labels must be provided.
+    retbins : bool, default False
+        Whether to return the bins or not. Useful when bins is provided
+        as a scalar.
+    precision : int, default 3
+        The precision at which to store and display the bins labels.
+    include_lowest : bool, default False
+        Whether the first interval should be left-inclusive or not.
+    duplicates : {default 'raise', 'drop'}, optional
+        If bin edges are not unique, raise ValueError or drop non-uniques.
+    ordered : bool, default True
+        Whether the labels are ordered or not. Applies to returned types
+        Categorical and Series (with Categorical dtype). If True,
+        the resulting categorical will be ordered. If False, the resulting
+        categorical will be unordered (labels must be provided).
+
+    Returns
+    -------
+    out : Categorical, Series, or ndarray
+        An array-like object representing the respective bin for each value
+        of `x`. The type depends on the value of `labels`.
+
+        * None (default) : returns a Series for Series `x` or a
+          Categorical for all other inputs. The values stored within
+          are Interval dtype.
+
+        * sequence of scalars : returns a Series for Series `x` or a
+          Categorical for all other inputs. The values stored within
+          are whatever the type in the sequence is.
+
+        * False : returns an ndarray of integers.
+
+    bins : numpy.ndarray or IntervalIndex.
+        The computed or specified bins. Only returned when `retbins=True`.
+        For scalar or sequence `bins`, this is an ndarray with the computed
+        bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
+        an IntervalIndex `bins`, this is equal to `bins`.
+
+    See Also
+    --------
+    qcut : Discretize variable into equal-sized buckets based on rank
+        or based on sample quantiles.
+    Categorical : Array type for storing data that come from a
+        fixed set of values.
+    Series : One-dimensional array with axis labels (including time series).
+    IntervalIndex : Immutable Index implementing an ordered, sliceable set.
+
+    Notes
+    -----
+    Any NA values will be NA in the result. Out of bounds values will be NA in
+    the resulting Series or Categorical object.
+
+    Reference :ref:`the user guide <reshaping.tile.cut>` for more examples.
+
+    Examples
+    --------
+    Discretize into three equal-sized bins.
+
+    >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
+    ... # doctest: +ELLIPSIS
+    [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
+    Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
+
+    >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
+    ... # doctest: +ELLIPSIS
+    ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
+    Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
+    array([0.994, 3.   , 5.   , 7.   ]))
+
+    Discovers the same bins, but assign them specific labels. Notice that
+    the returned Categorical's categories are `labels` and is ordered.
+
+    >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]),
+    ...        3, labels=["bad", "medium", "good"])
+    ['bad', 'good', 'medium', 'medium', 'good', 'bad']
+    Categories (3, object): ['bad' < 'medium' < 'good']
+
+    ``ordered=False`` will result in unordered categories when labels are passed.
+    This parameter can be used to allow non-unique labels:
+
+    >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
+    ...        labels=["B", "A", "B"], ordered=False)
+    ['B', 'B', 'A', 'A', 'B', 'B']
+    Categories (2, object): ['A', 'B']
+
+    ``labels=False`` implies you just want the bins back.
+
+    >>> pd.cut([0, 1, 1, 2], bins=4, labels=False)
+    array([0, 1, 1, 3])
+
+    Passing a Series as an input returns a Series with categorical dtype:
+
+    >>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
+    ...               index=['a', 'b', 'c', 'd', 'e'])
+    >>> pd.cut(s, 3)
+    ... # doctest: +ELLIPSIS
+    a    (1.992, 4.667]
+    b    (1.992, 4.667]
+    c    (4.667, 7.333]
+    d     (7.333, 10.0]
+    e     (7.333, 10.0]
+    dtype: category
+    Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ...
+
+    Passing a Series as an input returns a Series with mapping value.
+    It is used to map numerically to intervals based on bins.
+
+    >>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
+    ...               index=['a', 'b', 'c', 'd', 'e'])
+    >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False)
+    ... # doctest: +ELLIPSIS
+    (a    1.0
+     b    2.0
+     c    3.0
+     d    4.0
+     e    NaN
+     dtype: float64,
+     array([ 0,  2,  4,  6,  8, 10]))
+
+    Use `drop` optional when bins is not unique
+
+    >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
+    ...        right=False, duplicates='drop')
+    ... # doctest: +ELLIPSIS
+    (a    1.0
+     b    2.0
+     c    3.0
+     d    3.0
+     e    NaN
+     dtype: float64,
+     array([ 0,  2,  4,  6, 10]))
+
+    Passing an IntervalIndex for `bins` results in those categories exactly.
+    Notice that values not covered by the IntervalIndex are set to NaN. 0
+    is to the left of the first bin (which is closed on the right), and 1.5
+    falls between two bins.
+
+    >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
+    >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
+    [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
+    Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]]
+    """
+    # NOTE: this binning code is changed a bit from histogram for var(x) == 0
+
+    original = x
+    x_idx = _preprocess_for_cut(x)
+    x_idx, _ = _coerce_to_type(x_idx)
+
+    if not np.iterable(bins):
+        bins = _nbins_to_bins(x_idx, bins, right)
+
+    elif isinstance(bins, IntervalIndex):
+        if bins.is_overlapping:
+            raise ValueError("Overlapping IntervalIndex is not accepted.")
+
+    else:
+        bins = Index(bins)
+        if not bins.is_monotonic_increasing:
+            raise ValueError("bins must increase monotonically.")
+
+    fac, bins = _bins_to_cuts(
+        x_idx,
+        bins,
+        right=right,
+        labels=labels,
+        precision=precision,
+        include_lowest=include_lowest,
+        duplicates=duplicates,
+        ordered=ordered,
+    )
+
+    return _postprocess_for_cut(fac, bins, retbins, original)
+
+
+def qcut(
+    x,
+    q,
+    labels=None,
+    retbins: bool = False,
+    precision: int = 3,
+    duplicates: str = "raise",
+):
+    """
+    Quantile-based discretization function.
+
+    Discretize variable into equal-sized buckets based on rank or based
+    on sample quantiles. For example 1000 values for 10 quantiles would
+    produce a Categorical object indicating quantile membership for each data point.
+
+    Parameters
+    ----------
+    x : 1d ndarray or Series
+    q : int or list-like of float
+        Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
+        array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles.
+    labels : array or False, default None
+        Used as labels for the resulting bins. Must be of the same length as
+        the resulting bins. If False, return only integer indicators of the
+        bins. If True, raises an error.
+    retbins : bool, optional
+        Whether to return the (bins, labels) or not. Can be useful if bins
+        is given as a scalar.
+    precision : int, optional
+        The precision at which to store and display the bins labels.
+    duplicates : {default 'raise', 'drop'}, optional
+        If bin edges are not unique, raise ValueError or drop non-uniques.
+
+    Returns
+    -------
+    out : Categorical or Series or array of integers if labels is False
+        The return type (Categorical or Series) depends on the input: a Series
+        of type category if input is a Series else Categorical. Bins are
+        represented as categories when categorical data is returned.
+    bins : ndarray of floats
+        Returned only if `retbins` is True.
+
+    Notes
+    -----
+    Out of bounds values will be NA in the resulting Categorical object
+
+    Examples
+    --------
+    >>> pd.qcut(range(5), 4)
+    ... # doctest: +ELLIPSIS
+    [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
+    Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ...
+
+    >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
+    ... # doctest: +SKIP
+    [good, good, medium, bad, bad]
+    Categories (3, object): [good < medium < bad]
+
+    >>> pd.qcut(range(5), 4, labels=False)
+    array([0, 0, 1, 2, 3])
+    """
+    original = x
+    x_idx = _preprocess_for_cut(x)
+    x_idx, _ = _coerce_to_type(x_idx)
+
+    quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
+
+    bins = x_idx.to_series().dropna().quantile(quantiles)
+
+    fac, bins = _bins_to_cuts(
+        x_idx,
+        Index(bins),
+        labels=labels,
+        precision=precision,
+        include_lowest=True,
+        duplicates=duplicates,
+    )
+
+    return _postprocess_for_cut(fac, bins, retbins, original)
+
+
+def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index:
+    """
+    If a user passed an integer N for bins, convert this to a sequence of N
+    equal(ish)-sized bins.
+    """
+    if is_scalar(nbins) and nbins < 1:
+        raise ValueError("`bins` should be a positive integer.")
+
+    if x_idx.size == 0:
+        raise ValueError("Cannot cut empty array")
+
+    rng = (x_idx.min(), x_idx.max())
+    mn, mx = rng
+
+    if is_numeric_dtype(x_idx.dtype) and (np.isinf(mn) or np.isinf(mx)):
+        # GH#24314
+        raise ValueError(
+            "cannot specify integer `bins` when input data contains infinity"
+        )
+
+    if mn == mx:  # adjust end points before binning
+        if _is_dt_or_td(x_idx.dtype):
+            # using seconds=1 is pretty arbitrary here
+            # error: Argument 1 to "dtype_to_unit" has incompatible type
+            # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]"
+            unit = dtype_to_unit(x_idx.dtype)  # type: ignore[arg-type]
+            td = Timedelta(seconds=1).as_unit(unit)
+            # Use DatetimeArray/TimedeltaArray method instead of linspace
+            # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
+            # has no attribute "_generate_range"
+            bins = x_idx._values._generate_range(  # type: ignore[union-attr]
+                start=mn - td, end=mx + td, periods=nbins + 1, freq=None, unit=unit
+            )
+        else:
+            mn -= 0.001 * abs(mn) if mn != 0 else 0.001
+            mx += 0.001 * abs(mx) if mx != 0 else 0.001
+
+            bins = np.linspace(mn, mx, nbins + 1, endpoint=True)
+    else:  # adjust end points after binning
+        if _is_dt_or_td(x_idx.dtype):
+            # Use DatetimeArray/TimedeltaArray method instead of linspace
+
+            # error: Argument 1 to "dtype_to_unit" has incompatible type
+            # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]"
+            unit = dtype_to_unit(x_idx.dtype)  # type: ignore[arg-type]
+            # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
+            # has no attribute "_generate_range"
+            bins = x_idx._values._generate_range(  # type: ignore[union-attr]
+                start=mn, end=mx, periods=nbins + 1, freq=None, unit=unit
+            )
+        else:
+            bins = np.linspace(mn, mx, nbins + 1, endpoint=True)
+        adj = (mx - mn) * 0.001  # 0.1% of the range
+        if right:
+            bins[0] -= adj
+        else:
+            bins[-1] += adj
+
+    return Index(bins)
+
+
+def _bins_to_cuts(
+    x_idx: Index,
+    bins: Index,
+    right: bool = True,
+    labels=None,
+    precision: int = 3,
+    include_lowest: bool = False,
+    duplicates: str = "raise",
+    ordered: bool = True,
+):
+    if not ordered and labels is None:
+        raise ValueError("'labels' must be provided if 'ordered = False'")
+
+    if duplicates not in ["raise", "drop"]:
+        raise ValueError(
+            "invalid value for 'duplicates' parameter, valid options are: raise, drop"
+        )
+
+    result: Categorical | np.ndarray
+
+    if isinstance(bins, IntervalIndex):
+        # we have a fast-path here
+        ids = bins.get_indexer(x_idx)
+        cat_dtype = CategoricalDtype(bins, ordered=True)
+        result = Categorical.from_codes(ids, dtype=cat_dtype, validate=False)
+        return result, bins
+
+    unique_bins = algos.unique(bins)
+    if len(unique_bins) < len(bins) and len(bins) != 2:
+        if duplicates == "raise":
+            raise ValueError(
+                f"Bin edges must be unique: {repr(bins)}.\n"
+                f"You can drop duplicate edges by setting the 'duplicates' kwarg"
+            )
+        bins = unique_bins
+
+    side: Literal["left", "right"] = "left" if right else "right"
+
+    try:
+        ids = bins.searchsorted(x_idx, side=side)
+    except TypeError as err:
+        # e.g. test_datetime_nan_error if bins are DatetimeArray and x_idx
+        #  is integers
+        if x_idx.dtype.kind == "m":
+            raise ValueError("bins must be of timedelta64 dtype") from err
+        elif x_idx.dtype.kind == bins.dtype.kind == "M":
+            raise ValueError(
+                "Cannot use timezone-naive bins with timezone-aware values, "
+                "or vice-versa"
+            ) from err
+        elif x_idx.dtype.kind == "M":
+            raise ValueError("bins must be of datetime64 dtype") from err
+        else:
+            raise
+    ids = ensure_platform_int(ids)
+
+    if include_lowest:
+        ids[x_idx == bins[0]] = 1
+
+    na_mask = isna(x_idx) | (ids == len(bins)) | (ids == 0)
+    has_nas = na_mask.any()
+
+    if labels is not False:
+        if not (labels is None or is_list_like(labels)):
+            raise ValueError(
+                "Bin labels must either be False, None or passed in as a "
+                "list-like argument"
+            )
+
+        if labels is None:
+            labels = _format_labels(
+                bins, precision, right=right, include_lowest=include_lowest
+            )
+        elif ordered and len(set(labels)) != len(labels):
+            raise ValueError(
+                "labels must be unique if ordered=True; pass ordered=False "
+                "for duplicate labels"
+            )
+        else:
+            if len(labels) != len(bins) - 1:
+                raise ValueError(
+                    "Bin labels must be one fewer than the number of bin edges"
+                )
+
+        if not isinstance(getattr(labels, "dtype", None), CategoricalDtype):
+            labels = Categorical(
+                labels,
+                categories=labels if len(set(labels)) == len(labels) else None,
+                ordered=ordered,
+            )
+        # TODO: handle mismatch between categorical label order and pandas.cut order.
+        np.putmask(ids, na_mask, 0)
+        result = algos.take_nd(labels, ids - 1)
+
+    else:
+        result = ids - 1
+        if has_nas:
+            result = result.astype(np.float64)
+            np.putmask(result, na_mask, np.nan)
+
+    return result, bins
+
+
+def _coerce_to_type(x: Index) -> tuple[Index, DtypeObj | None]:
+    """
+    if the passed data is of datetime/timedelta, bool or nullable int type,
+    this method converts it to numeric so that cut or qcut method can
+    handle it
+    """
+    dtype: DtypeObj | None = None
+
+    if _is_dt_or_td(x.dtype):
+        dtype = x.dtype
+    elif is_bool_dtype(x.dtype):
+        # GH 20303
+        x = x.astype(np.int64)
+    # To support cut and qcut for IntegerArray we convert to float dtype.
+    # Will properly support in the future.
+    # https://github.com/pandas-dev/pandas/pull/31290
+    # https://github.com/pandas-dev/pandas/issues/31389
+    elif isinstance(x.dtype, ExtensionDtype) and is_numeric_dtype(x.dtype):
+        x_arr = x.to_numpy(dtype=np.float64, na_value=np.nan)
+        x = Index(x_arr)
+
+    return Index(x), dtype
+
+
+def _is_dt_or_td(dtype: DtypeObj) -> bool:
+    # Note: the dtype here comes from an Index.dtype, so we know that that any
+    #  dt64/td64 dtype is of a supported unit.
+    return isinstance(dtype, DatetimeTZDtype) or lib.is_np_dtype(dtype, "mM")
+
+
+def _format_labels(
+    bins: Index,
+    precision: int,
+    right: bool = True,
+    include_lowest: bool = False,
+):
+    """based on the dtype, return our labels"""
+    closed: IntervalLeftRight = "right" if right else "left"
+
+    formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta]
+
+    if _is_dt_or_td(bins.dtype):
+        # error: Argument 1 to "dtype_to_unit" has incompatible type
+        # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]"
+        unit = dtype_to_unit(bins.dtype)  # type: ignore[arg-type]
+        formatter = lambda x: x
+        adjust = lambda x: x - Timedelta(1, unit=unit).as_unit(unit)
+    else:
+        precision = _infer_precision(precision, bins)
+        formatter = lambda x: _round_frac(x, precision)
+        adjust = lambda x: x - 10 ** (-precision)
+
+    breaks = [formatter(b) for b in bins]
+    if right and include_lowest:
+        # adjust lhs of first interval by precision to account for being right closed
+        breaks[0] = adjust(breaks[0])
+
+    if _is_dt_or_td(bins.dtype):
+        # error: "Index" has no attribute "as_unit"
+        breaks = type(bins)(breaks).as_unit(unit)  # type: ignore[attr-defined]
+
+    return IntervalIndex.from_breaks(breaks, closed=closed)
+
+
+def _preprocess_for_cut(x) -> Index:
+    """
+    handles preprocessing for cut where we convert passed
+    input to array, strip the index information and store it
+    separately
+    """
+    # Check that the passed array is a Pandas or Numpy object
+    # We don't want to strip away a Pandas data-type here (e.g. datetimetz)
+    ndim = getattr(x, "ndim", None)
+    if ndim is None:
+        x = np.asarray(x)
+    if x.ndim != 1:
+        raise ValueError("Input array must be 1 dimensional")
+
+    return Index(x)
+
+
+def _postprocess_for_cut(fac, bins, retbins: bool, original):
+    """
+    handles post processing for the cut method where
+    we combine the index information if the originally passed
+    datatype was a series
+    """
+    if isinstance(original, ABCSeries):
+        fac = original._constructor(fac, index=original.index, name=original.name)
+
+    if not retbins:
+        return fac
+
+    if isinstance(bins, Index) and is_numeric_dtype(bins.dtype):
+        bins = bins._values
+
+    return fac, bins
+
+
+def _round_frac(x, precision: int):
+    """
+    Round the fractional part of the given number
+    """
+    if not np.isfinite(x) or x == 0:
+        return x
+    else:
+        frac, whole = np.modf(x)
+        if whole == 0:
+            digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision
+        else:
+            digits = precision
+        return np.around(x, digits)
+
+
+def _infer_precision(base_precision: int, bins: Index) -> int:
+    """
+    Infer an appropriate precision for _round_frac
+    """
+    for precision in range(base_precision, 20):
+        levels = np.asarray([_round_frac(b, precision) for b in bins])
+        if algos.unique(levels).size == bins.size:
+            return precision
+    return base_precision  # default
--- a/lib/python3.11/site-packages/pandas/core/reshape/util.py
+++ b/lib/python3.11/site-packages/pandas/core/reshape/util.py
@ -0,0 +1,85 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.core.dtypes.common import is_list_like
+
+if TYPE_CHECKING:
+    from pandas._typing import NumpyIndexT
+
+
+def cartesian_product(X) -> list[np.ndarray]:
+    """
+    Numpy version of itertools.product.
+    Sometimes faster (for large inputs)...
+
+    Parameters
+    ----------
+    X : list-like of list-likes
+
+    Returns
+    -------
+    product : list of ndarrays
+
+    Examples
+    --------
+    >>> cartesian_product([list('ABC'), [1, 2]])
+    [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]
+
+    See Also
+    --------
+    itertools.product : Cartesian product of input iterables.  Equivalent to
+        nested for-loops.
+    """
+    msg = "Input must be a list-like of list-likes"
+    if not is_list_like(X):
+        raise TypeError(msg)
+    for x in X:
+        if not is_list_like(x):
+            raise TypeError(msg)
+
+    if len(X) == 0:
+        return []
+
+    lenX = np.fromiter((len(x) for x in X), dtype=np.intp)
+    cumprodX = np.cumprod(lenX)
+
+    if np.any(cumprodX < 0):
+        raise ValueError("Product space too large to allocate arrays!")
+
+    a = np.roll(cumprodX, 1)
+    a[0] = 1
+
+    if cumprodX[-1] != 0:
+        b = cumprodX[-1] / cumprodX
+    else:
+        # if any factor is empty, the cartesian product is empty
+        b = np.zeros_like(cumprodX)
+
+    # error: Argument of type "int_" cannot be assigned to parameter "num" of
+    # type "int" in function "tile_compat"
+    return [
+        tile_compat(
+            np.repeat(x, b[i]),
+            np.prod(a[i]),
+        )
+        for i, x in enumerate(X)
+    ]
+
+
+def tile_compat(arr: NumpyIndexT, num: int) -> NumpyIndexT:
+    """
+    Index compat for np.tile.
+
+    Notes
+    -----
+    Does not support multi-dimensional `num`.
+    """
+    if isinstance(arr, np.ndarray):
+        return np.tile(arr, num)
+
+    # Otherwise we have an Index
+    taker = np.tile(np.arange(len(arr)), num)
+    return arr.take(taker)