This commit is contained in:
2025-09-07 22:09:54 +02:00
parent e1b817252c
commit 2fc0d000b6
7796 changed files with 2159515 additions and 933 deletions

View File

@ -0,0 +1,24 @@
from pandas.core.reshape.concat import concat as concat
from pandas.core.reshape.encoding import (
from_dummies as from_dummies,
get_dummies as get_dummies,
)
from pandas.core.reshape.melt import (
lreshape as lreshape,
melt as melt,
wide_to_long as wide_to_long,
)
from pandas.core.reshape.merge import (
merge as merge,
merge_asof as merge_asof,
merge_ordered as merge_ordered,
)
from pandas.core.reshape.pivot import (
crosstab as crosstab,
pivot as pivot,
pivot_table as pivot_table,
)
from pandas.core.reshape.tile import (
cut as cut,
qcut as qcut,
)

View File

@ -0,0 +1,179 @@
from collections.abc import (
Iterable,
Mapping,
Sequence,
)
from typing import (
Literal,
overload,
)
from pandas import (
DataFrame,
Series,
)
from typing_extensions import Never
from pandas._typing import (
S2,
Axis,
AxisIndex,
HashableT1,
HashableT2,
HashableT3,
HashableT4,
)
@overload
def concat( # type: ignore[overload-overlap]
objs: Iterable[DataFrame] | Mapping[HashableT1, DataFrame],
*,
axis: Axis = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> DataFrame: ...
@overload
def concat( # pyright: ignore[reportOverlappingOverload]
objs: Iterable[Series[S2]],
*,
axis: AxisIndex = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> Series[S2]: ...
@overload
def concat( # type: ignore[overload-overlap]
objs: Iterable[Series] | Mapping[HashableT1, Series],
*,
axis: AxisIndex = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> Series: ...
@overload
def concat( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload]
objs: Iterable[Series | DataFrame] | Mapping[HashableT1, Series | DataFrame],
*,
axis: Axis = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> DataFrame: ...
@overload
def concat(
objs: Iterable[None] | Mapping[HashableT1, None],
*,
axis: Axis = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> Never: ...
@overload
def concat( # type: ignore[overload-overlap]
objs: Iterable[DataFrame | None] | Mapping[HashableT1, DataFrame | None],
*,
axis: Axis = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> DataFrame: ...
@overload
def concat( # type: ignore[overload-overlap]
objs: Iterable[Series | None] | Mapping[HashableT1, Series | None],
*,
axis: AxisIndex = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> Series: ...
@overload
def concat(
objs: (
Iterable[Series | DataFrame | None]
| Mapping[HashableT1, Series | DataFrame | None]
),
*,
axis: Axis = ...,
join: Literal["inner", "outer"] = ...,
ignore_index: bool = ...,
keys: Iterable[HashableT2] | None = ...,
levels: Sequence[list[HashableT3] | tuple[HashableT3, ...]] | None = ...,
names: list[HashableT4] | None = ...,
verify_integrity: bool = ...,
sort: bool = ...,
copy: bool = ...,
) -> DataFrame: ...
# Including either of the next 2 overloads causes mypy to complain about
# test_pandas.py:test_types_concat() in assert_type(pd.concat([s, s2]), pd.Series)
# It thinks that pd.concat([s, s2]) is Any . May be due to Series being
# Generic, or the axis argument being unspecified, and then there is partial
# overlap with the first 2 overloads.
#
# @overload
# def concat(
# objs: Union[
# Iterable[Union[Series, DataFrame]], Mapping[HashableT, Union[Series, DataFrame]]
# ],
# axis: Literal[0, "index"] = ...,
# join: str = ...,
# ignore_index: bool = ...,
# keys=...,
# levels=...,
# names=...,
# verify_integrity: bool = ...,
# sort: bool = ...,
# copy: bool = ...,
# ) -> Union[DataFrame, Series]: ...
# @overload
# def concat(
# objs: Union[
# Iterable[Union[Series, DataFrame]], Mapping[HashableT, Union[Series, DataFrame]]
# ],
# axis: Axis = ...,
# join: str = ...,
# ignore_index: bool = ...,
# keys=...,
# levels=...,
# names=...,
# verify_integrity: bool = ...,
# sort: bool = ...,
# copy: bool = ...,
# ) -> Union[DataFrame, Series]: ...

View File

@ -0,0 +1,29 @@
from collections.abc import (
Hashable,
Iterable,
)
from pandas import DataFrame
from pandas._typing import (
AnyArrayLike,
Dtype,
HashableT1,
HashableT2,
)
def get_dummies(
data: AnyArrayLike | DataFrame,
prefix: str | Iterable[str] | dict[HashableT1, str] | None = None,
prefix_sep: str = "_",
dummy_na: bool = False,
columns: list[HashableT2] | None = None,
sparse: bool = False,
drop_first: bool = False,
dtype: Dtype | None = None,
) -> DataFrame: ...
def from_dummies(
data: DataFrame,
sep: str | None = None,
default_category: Hashable | dict[str, Hashable] | None = None,
) -> DataFrame: ...

View File

@ -0,0 +1,29 @@
from collections.abc import Hashable
import numpy as np
from pandas.core.frame import DataFrame
from pandas._typing import HashableT
def melt(
frame: DataFrame,
id_vars: tuple | list | np.ndarray | None = None,
value_vars: tuple | list | np.ndarray | None = None,
var_name: str | None = None,
value_name: Hashable = "value",
col_level: int | str | None = None,
ignore_index: bool = True,
) -> DataFrame: ...
def lreshape(
data: DataFrame,
groups: dict[HashableT, list[HashableT]],
dropna: bool = True,
) -> DataFrame: ...
def wide_to_long(
df: DataFrame,
stubnames: str | list[str],
i: str | list[str],
j: str,
sep: str = "",
suffix: str = "\\d+",
) -> DataFrame: ...

View File

@ -0,0 +1,93 @@
from datetime import timedelta
from typing import (
Literal,
overload,
)
from pandas import (
DataFrame,
Series,
Timedelta,
)
from pandas._typing import (
AnyArrayLike,
HashableT,
JoinHow,
Label,
MergeHow,
Suffixes,
ValidationOptions,
)
def merge(
left: DataFrame | Series,
right: DataFrame | Series,
how: MergeHow = "inner",
on: Label | list[HashableT] | AnyArrayLike | None = None,
left_on: Label | list[HashableT] | AnyArrayLike | None = None,
right_on: Label | list[HashableT] | AnyArrayLike | None = None,
left_index: bool = False,
right_index: bool = False,
sort: bool = False,
suffixes: Suffixes = ...,
indicator: bool | str = False,
validate: ValidationOptions | None = None,
) -> DataFrame: ...
@overload
def merge_ordered(
left: DataFrame,
right: DataFrame,
on: Label | list[HashableT] | None = ...,
left_on: Label | list[HashableT] | None = ...,
right_on: Label | list[HashableT] | None = ...,
left_by: Label | list[HashableT] | None = ...,
right_by: Label | list[HashableT] | None = ...,
fill_method: Literal["ffill"] | None = ...,
suffixes: Suffixes = ...,
how: JoinHow = ...,
) -> DataFrame: ...
@overload
def merge_ordered(
left: Series,
right: DataFrame | Series,
on: Label | list[HashableT] | None = ...,
left_on: Label | list[HashableT] | None = ...,
right_on: Label | list[HashableT] | None = ...,
left_by: None = ...,
right_by: None = ...,
fill_method: Literal["ffill"] | None = ...,
suffixes: (
list[str | None] | tuple[str, str] | tuple[None, str] | tuple[str, None]
) = ...,
how: JoinHow = ...,
) -> DataFrame: ...
@overload
def merge_ordered(
left: DataFrame | Series,
right: Series,
on: Label | list[HashableT] | None = ...,
left_on: Label | list[HashableT] | None = ...,
right_on: Label | list[HashableT] | None = ...,
left_by: None = ...,
right_by: None = ...,
fill_method: Literal["ffill"] | None = ...,
suffixes: Suffixes = ...,
how: JoinHow = ...,
) -> DataFrame: ...
def merge_asof(
left: DataFrame | Series,
right: DataFrame | Series,
on: Label | None = None,
left_on: Label | None = None,
right_on: Label | None = None,
left_index: bool = False,
right_index: bool = False,
by: Label | list[HashableT] | None = None,
left_by: Label | list[HashableT] | None = None,
right_by: Label | list[HashableT] | None = None,
suffixes: Suffixes = ...,
tolerance: int | timedelta | Timedelta | None = None,
allow_exact_matches: bool = True,
direction: Literal["backward", "forward", "nearest"] = "backward",
) -> DataFrame: ...

View File

@ -0,0 +1,149 @@
from collections.abc import (
Callable,
Hashable,
Mapping,
Sequence,
)
import datetime
from typing import (
Literal,
overload,
)
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.base import Index
from pandas.core.series import Series
from typing_extensions import TypeAlias
from pandas._typing import (
AnyArrayLike,
ArrayLike,
HashableT1,
HashableT2,
HashableT3,
Label,
Scalar,
ScalarT,
npt,
)
_PivotAggCallable: TypeAlias = Callable[[Series], ScalarT]
_PivotAggFunc: TypeAlias = (
_PivotAggCallable
| np.ufunc
| Literal["mean", "sum", "count", "min", "max", "median", "std", "var"]
)
_NonIterableHashable: TypeAlias = (
str
| datetime.date
| datetime.datetime
| datetime.timedelta
| bool
| int
| float
| complex
| pd.Timestamp
| pd.Timedelta
)
_PivotTableIndexTypes: TypeAlias = (
Label | Sequence[HashableT1] | Series | Grouper | None
)
_PivotTableColumnsTypes: TypeAlias = (
Label | Sequence[HashableT2] | Series | Grouper | None
)
_PivotTableValuesTypes: TypeAlias = Label | Sequence[HashableT3] | None
_ExtendedAnyArrayLike: TypeAlias = AnyArrayLike | ArrayLike
@overload
def pivot_table(
data: DataFrame,
values: _PivotTableValuesTypes = ...,
index: _PivotTableIndexTypes = ...,
columns: _PivotTableColumnsTypes = ...,
aggfunc: (
_PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc]
) = ...,
fill_value: Scalar | None = ...,
margins: bool = ...,
dropna: bool = ...,
margins_name: str = ...,
observed: bool = ...,
sort: bool = ...,
) -> DataFrame: ...
# Can only use Index or ndarray when index or columns is a Grouper
@overload
def pivot_table(
data: DataFrame,
values: _PivotTableValuesTypes = ...,
*,
index: Grouper,
columns: _PivotTableColumnsTypes | Index | npt.NDArray = ...,
aggfunc: (
_PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc]
) = ...,
fill_value: Scalar | None = ...,
margins: bool = ...,
dropna: bool = ...,
margins_name: str = ...,
observed: bool = ...,
sort: bool = ...,
) -> DataFrame: ...
@overload
def pivot_table(
data: DataFrame,
values: _PivotTableValuesTypes = ...,
index: _PivotTableIndexTypes | Index | npt.NDArray = ...,
*,
columns: Grouper,
aggfunc: (
_PivotAggFunc | Sequence[_PivotAggFunc] | Mapping[Hashable, _PivotAggFunc]
) = ...,
fill_value: Scalar | None = ...,
margins: bool = ...,
dropna: bool = ...,
margins_name: str = ...,
observed: bool = ...,
sort: bool = ...,
) -> DataFrame: ...
def pivot(
data: DataFrame,
*,
index: _NonIterableHashable | Sequence[HashableT1] = ...,
columns: _NonIterableHashable | Sequence[HashableT2] = ...,
values: _NonIterableHashable | Sequence[HashableT3] = ...,
) -> DataFrame: ...
@overload
def crosstab(
index: list | _ExtendedAnyArrayLike | list[Sequence | _ExtendedAnyArrayLike],
columns: list | _ExtendedAnyArrayLike | list[Sequence | _ExtendedAnyArrayLike],
values: list | _ExtendedAnyArrayLike,
rownames: list[HashableT1] | None = ...,
colnames: list[HashableT2] | None = ...,
*,
aggfunc: str | np.ufunc | Callable[[Series], float],
margins: bool = ...,
margins_name: str = ...,
dropna: bool = ...,
normalize: bool | Literal[0, 1, "all", "index", "columns"] = ...,
) -> DataFrame: ...
@overload
def crosstab(
index: list | _ExtendedAnyArrayLike | list[Sequence | _ExtendedAnyArrayLike],
columns: list | _ExtendedAnyArrayLike | list[Sequence | _ExtendedAnyArrayLike],
values: None = ...,
rownames: list[HashableT1] | None = ...,
colnames: list[HashableT2] | None = ...,
aggfunc: None = ...,
margins: bool = ...,
margins_name: str = ...,
dropna: bool = ...,
normalize: bool | Literal[0, 1, "all", "index", "columns"] = ...,
) -> DataFrame: ...

View File

@ -0,0 +1,273 @@
from collections.abc import Sequence
from typing import (
Literal,
overload,
)
import numpy as np
from pandas import (
Categorical,
CategoricalDtype,
DatetimeIndex,
Index,
Interval,
IntervalIndex,
Series,
Timestamp,
)
from pandas.core.series import TimestampSeries
from pandas._typing import (
IntervalT,
Label,
npt,
)
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: int | Series | Index[int] | Index[float] | Sequence[int] | Sequence[float],
right: bool = ...,
*,
labels: Literal[False],
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray]: ...
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: IntervalIndex[IntervalT],
right: bool = ...,
*,
labels: Literal[False],
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[npt.NDArray[np.intp], IntervalIndex[IntervalT]]: ...
@overload
def cut( # pyright: ignore[reportOverlappingOverload]
x: TimestampSeries,
bins: (
int
| TimestampSeries
| DatetimeIndex
| Sequence[Timestamp]
| Sequence[np.datetime64]
),
right: bool = ...,
labels: Literal[False] | Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Series, DatetimeIndex]: ...
@overload
def cut(
x: TimestampSeries,
bins: IntervalIndex[Interval[Timestamp]],
right: bool = ...,
labels: Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Series, DatetimeIndex]: ...
@overload
def cut(
x: Series,
bins: int | Series | Index[int] | Index[float] | Sequence[int] | Sequence[float],
right: bool = ...,
labels: Literal[False] | Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Series, npt.NDArray]: ...
@overload
def cut(
x: Series,
bins: IntervalIndex[Interval[int]] | IntervalIndex[Interval[float]],
right: bool = ...,
labels: Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Series, IntervalIndex]: ...
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: int | Series | Index[int] | Index[float] | Sequence[int] | Sequence[float],
right: bool = ...,
labels: Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Categorical, npt.NDArray]: ...
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: IntervalIndex[IntervalT],
right: bool = ...,
labels: Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> tuple[Categorical, IntervalIndex[IntervalT]]: ...
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: (
int
| Series
| Index[int]
| Index[float]
| Sequence[int]
| Sequence[float]
| IntervalIndex
),
right: bool = ...,
*,
labels: Literal[False],
retbins: Literal[False] = ...,
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> npt.NDArray[np.intp]: ...
@overload
def cut(
x: TimestampSeries,
bins: (
int
| TimestampSeries
| DatetimeIndex
| Sequence[Timestamp]
| Sequence[np.datetime64]
| IntervalIndex[Interval[Timestamp]]
),
right: bool = ...,
labels: Literal[False] | Sequence[Label] | None = ...,
retbins: Literal[False] = ...,
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> Series[CategoricalDtype]: ...
@overload
def cut(
x: Series,
bins: (
int
| Series
| Index[int]
| Index[float]
| Sequence[int]
| Sequence[float]
| IntervalIndex
),
right: bool = ...,
labels: Literal[False] | Sequence[Label] | None = ...,
retbins: Literal[False] = ...,
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> Series: ...
@overload
def cut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
bins: (
int
| Series
| Index[int]
| Index[float]
| Sequence[int]
| Sequence[float]
| IntervalIndex
),
right: bool = ...,
labels: Sequence[Label] | None = ...,
retbins: Literal[False] = ...,
precision: int = ...,
include_lowest: bool = ...,
duplicates: Literal["raise", "drop"] = ...,
ordered: bool = ...,
) -> Categorical: ...
@overload
def qcut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
*,
labels: Literal[False],
retbins: Literal[False] = ...,
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> npt.NDArray[np.intp]: ...
@overload
def qcut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
labels: Sequence[Label] | None = ...,
retbins: Literal[False] = ...,
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> Categorical: ...
@overload
def qcut(
x: Series,
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
labels: Literal[False] | Sequence[Label] | None = ...,
retbins: Literal[False] = ...,
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> Series: ...
@overload
def qcut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
*,
labels: Literal[False],
retbins: Literal[True],
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.double]]: ...
@overload
def qcut(
x: Series,
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
labels: Literal[False] | Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> tuple[Series, npt.NDArray[np.double]]: ...
@overload
def qcut(
x: Index | npt.NDArray | Sequence[int] | Sequence[float],
q: int | Sequence[float] | Series[float] | Index[float] | npt.NDArray,
labels: Sequence[Label] | None = ...,
*,
retbins: Literal[True],
precision: int = ...,
duplicates: Literal["raise", "drop"] = ...,
) -> tuple[Categorical, npt.NDArray[np.double]]: ...

View File

@ -0,0 +1 @@
def cartesian_product(X): ...