done
This commit is contained in:
441
lib/python3.11/site-packages/narwhals/_pandas_like/namespace.py
Normal file
441
lib/python3.11/site-packages/narwhals/_pandas_like/namespace.py
Normal file
@ -0,0 +1,441 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
import warnings
|
||||
from functools import reduce
|
||||
from itertools import chain
|
||||
from typing import TYPE_CHECKING, Any, Literal, Protocol, overload
|
||||
|
||||
from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen
|
||||
from narwhals._expression_parsing import (
|
||||
combine_alias_output_names,
|
||||
combine_evaluate_output_names,
|
||||
)
|
||||
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
|
||||
from narwhals._pandas_like.expr import PandasLikeExpr
|
||||
from narwhals._pandas_like.selectors import PandasSelectorNamespace
|
||||
from narwhals._pandas_like.series import PandasLikeSeries
|
||||
from narwhals._pandas_like.typing import NativeDataFrameT, NativeSeriesT
|
||||
from narwhals._pandas_like.utils import is_non_nullable_boolean
|
||||
from narwhals._utils import zip_strict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable, Sequence
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from narwhals._compliant.typing import ScalarKwargs
|
||||
from narwhals._utils import Implementation, Version
|
||||
from narwhals.typing import IntoDType, NonNestedLiteral
|
||||
|
||||
|
||||
Incomplete: TypeAlias = Any
|
||||
"""Escape hatch, but leaving a trace that this isn't ideal."""
|
||||
|
||||
|
||||
_Vertical: TypeAlias = Literal[0]
|
||||
_Horizontal: TypeAlias = Literal[1]
|
||||
Axis: TypeAlias = Literal[_Vertical, _Horizontal]
|
||||
|
||||
VERTICAL: _Vertical = 0
|
||||
HORIZONTAL: _Horizontal = 1
|
||||
|
||||
|
||||
class PandasLikeNamespace(
|
||||
EagerNamespace[
|
||||
PandasLikeDataFrame,
|
||||
PandasLikeSeries,
|
||||
PandasLikeExpr,
|
||||
NativeDataFrameT,
|
||||
NativeSeriesT,
|
||||
]
|
||||
):
|
||||
@property
|
||||
def _dataframe(self) -> type[PandasLikeDataFrame]:
|
||||
return PandasLikeDataFrame
|
||||
|
||||
@property
|
||||
def _expr(self) -> type[PandasLikeExpr]:
|
||||
return PandasLikeExpr
|
||||
|
||||
@property
|
||||
def _series(self) -> type[PandasLikeSeries]:
|
||||
return PandasLikeSeries
|
||||
|
||||
@property
|
||||
def selectors(self) -> PandasSelectorNamespace:
|
||||
return PandasSelectorNamespace.from_namespace(self)
|
||||
|
||||
def __init__(self, implementation: Implementation, version: Version) -> None:
|
||||
self._implementation = implementation
|
||||
self._version = version
|
||||
|
||||
def coalesce(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
align = self._series._align_full_broadcast
|
||||
series = align(*(s for _expr in exprs for s in _expr(df)))
|
||||
return [
|
||||
reduce(lambda x, y: x.fill_null(y, strategy=None, limit=None), series)
|
||||
]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="coalesce",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def lit(self, value: NonNestedLiteral, dtype: IntoDType | None) -> PandasLikeExpr:
|
||||
def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
|
||||
pandas_series = self._series.from_iterable(
|
||||
data=[value],
|
||||
name="literal",
|
||||
index=df._native_frame.index[0:1],
|
||||
context=self,
|
||||
)
|
||||
if dtype:
|
||||
return pandas_series.cast(dtype)
|
||||
return pandas_series
|
||||
|
||||
return PandasLikeExpr(
|
||||
lambda df: [_lit_pandas_series(df)],
|
||||
depth=0,
|
||||
function_name="lit",
|
||||
evaluate_output_names=lambda _df: ["literal"],
|
||||
alias_output_names=None,
|
||||
implementation=self._implementation,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
def len(self) -> PandasLikeExpr:
|
||||
return PandasLikeExpr(
|
||||
lambda df: [
|
||||
self._series.from_iterable(
|
||||
[len(df._native_frame)], name="len", index=[0], context=self
|
||||
)
|
||||
],
|
||||
depth=0,
|
||||
function_name="len",
|
||||
evaluate_output_names=lambda _df: ["len"],
|
||||
alias_output_names=None,
|
||||
implementation=self._implementation,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
# --- horizontal ---
|
||||
def sum_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
align = self._series._align_full_broadcast
|
||||
it = chain.from_iterable(expr(df) for expr in exprs)
|
||||
series = align(*it)
|
||||
native_series = (s.fill_null(0, None, None) for s in series)
|
||||
return [reduce(operator.add, native_series)]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="sum_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def all_horizontal(
|
||||
self, *exprs: PandasLikeExpr, ignore_nulls: bool
|
||||
) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
align = self._series._align_full_broadcast
|
||||
series = [s for _expr in exprs for s in _expr(df)]
|
||||
if not ignore_nulls and any(
|
||||
s.native.dtype == "object" and s.is_null().any() for s in series
|
||||
):
|
||||
# classical NumPy boolean columns don't support missing values, so
|
||||
# only do the full scan with `is_null` if we have `object` dtype.
|
||||
msg = "Cannot use `ignore_nulls=False` in `all_horizontal` for non-nullable NumPy-backed pandas Series when nulls are present."
|
||||
raise ValueError(msg)
|
||||
it = (
|
||||
(
|
||||
# NumPy-backed 'bool' dtype can't contain nulls so doesn't need filling.
|
||||
s if is_non_nullable_boolean(s) else s.fill_null(True, None, None)
|
||||
for s in series
|
||||
)
|
||||
if ignore_nulls
|
||||
else iter(series)
|
||||
)
|
||||
return [reduce(operator.and_, align(*it))]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="all_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def any_horizontal(
|
||||
self, *exprs: PandasLikeExpr, ignore_nulls: bool
|
||||
) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
align = self._series._align_full_broadcast
|
||||
series = [s for _expr in exprs for s in _expr(df)]
|
||||
if not ignore_nulls and any(
|
||||
s.native.dtype == "object" and s.is_null().any() for s in series
|
||||
):
|
||||
# classical NumPy boolean columns don't support missing values, so
|
||||
# only do the full scan with `is_null` if we have `object` dtype.
|
||||
msg = "Cannot use `ignore_nulls=False` in `any_horizontal` for non-nullable NumPy-backed pandas Series when nulls are present."
|
||||
raise ValueError(msg)
|
||||
it = (
|
||||
(
|
||||
# NumPy-backed 'bool' dtype can't contain nulls so doesn't need filling.
|
||||
s if is_non_nullable_boolean(s) else s.fill_null(False, None, None)
|
||||
for s in series
|
||||
)
|
||||
if ignore_nulls
|
||||
else iter(series)
|
||||
)
|
||||
return [reduce(operator.or_, align(*it))]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="any_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def mean_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
expr_results = [s for _expr in exprs for s in _expr(df)]
|
||||
align = self._series._align_full_broadcast
|
||||
series = align(
|
||||
*(s.fill_null(0, strategy=None, limit=None) for s in expr_results)
|
||||
)
|
||||
non_na = align(*(1 - s.is_null() for s in expr_results))
|
||||
return [reduce(operator.add, series) / reduce(operator.add, non_na)]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="mean_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def min_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
it = chain.from_iterable(expr(df) for expr in exprs)
|
||||
align = self._series._align_full_broadcast
|
||||
series = align(*it)
|
||||
|
||||
return [
|
||||
PandasLikeSeries(
|
||||
self.concat(
|
||||
(s.to_frame() for s in series), how="horizontal"
|
||||
)._native_frame.min(axis=1),
|
||||
implementation=self._implementation,
|
||||
version=self._version,
|
||||
).alias(series[0].name)
|
||||
]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="min_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
def max_horizontal(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
it = chain.from_iterable(expr(df) for expr in exprs)
|
||||
align = self._series._align_full_broadcast
|
||||
series = align(*it)
|
||||
|
||||
return [
|
||||
PandasLikeSeries(
|
||||
self.concat(
|
||||
(s.to_frame() for s in series), how="horizontal"
|
||||
).native.max(axis=1),
|
||||
implementation=self._implementation,
|
||||
version=self._version,
|
||||
).alias(series[0].name)
|
||||
]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="max_horizontal",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
@property
|
||||
def _concat(self) -> _NativeConcat[NativeDataFrameT, NativeSeriesT]:
|
||||
"""Concatenate pandas objects along a particular axis.
|
||||
|
||||
Return the **native** equivalent of `pd.concat`.
|
||||
"""
|
||||
return self._implementation.to_native_namespace().concat
|
||||
|
||||
def _concat_diagonal(self, dfs: Sequence[NativeDataFrameT], /) -> NativeDataFrameT:
|
||||
if self._implementation.is_pandas() and self._backend_version < (3,):
|
||||
return self._concat(dfs, axis=VERTICAL, copy=False)
|
||||
return self._concat(dfs, axis=VERTICAL)
|
||||
|
||||
def _concat_horizontal(
|
||||
self, dfs: Sequence[NativeDataFrameT | NativeSeriesT], /
|
||||
) -> NativeDataFrameT:
|
||||
if self._implementation.is_cudf():
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message="The behavior of array concatenation with empty entries is deprecated",
|
||||
category=FutureWarning,
|
||||
)
|
||||
return self._concat(dfs, axis=HORIZONTAL)
|
||||
elif self._implementation.is_pandas() and self._backend_version < (3,):
|
||||
return self._concat(dfs, axis=HORIZONTAL, copy=False)
|
||||
return self._concat(dfs, axis=HORIZONTAL)
|
||||
|
||||
def _concat_vertical(self, dfs: Sequence[NativeDataFrameT], /) -> NativeDataFrameT:
|
||||
cols_0 = dfs[0].columns
|
||||
for i, df in enumerate(dfs[1:], start=1):
|
||||
cols_current = df.columns
|
||||
if not (
|
||||
(len(cols_current) == len(cols_0)) and (cols_current == cols_0).all()
|
||||
):
|
||||
msg = (
|
||||
"unable to vstack, column names don't match:\n"
|
||||
f" - dataframe 0: {cols_0.to_list()}\n"
|
||||
f" - dataframe {i}: {cols_current.to_list()}\n"
|
||||
)
|
||||
raise TypeError(msg)
|
||||
if self._implementation.is_pandas() and self._backend_version < (3,):
|
||||
return self._concat(dfs, axis=VERTICAL, copy=False)
|
||||
return self._concat(dfs, axis=VERTICAL)
|
||||
|
||||
def when(self, predicate: PandasLikeExpr) -> PandasWhen[NativeSeriesT]:
|
||||
return PandasWhen[NativeSeriesT].from_expr(predicate, context=self)
|
||||
|
||||
def concat_str(
|
||||
self, *exprs: PandasLikeExpr, separator: str, ignore_nulls: bool
|
||||
) -> PandasLikeExpr:
|
||||
string = self._version.dtypes.String()
|
||||
|
||||
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
|
||||
expr_results = [s for _expr in exprs for s in _expr(df)]
|
||||
align = self._series._align_full_broadcast
|
||||
series = align(*(s.cast(string) for s in expr_results))
|
||||
null_mask = align(*(s.is_null() for s in expr_results))
|
||||
|
||||
if not ignore_nulls:
|
||||
null_mask_result = reduce(operator.or_, null_mask)
|
||||
result = reduce(lambda x, y: x + separator + y, series).zip_with(
|
||||
~null_mask_result, None
|
||||
)
|
||||
else:
|
||||
# NOTE: Trying to help `mypy` later
|
||||
# error: Cannot determine type of "values" [has-type]
|
||||
values: list[PandasLikeSeries]
|
||||
init_value, *values = [
|
||||
s.zip_with(~nm, "") for s, nm in zip_strict(series, null_mask)
|
||||
]
|
||||
|
||||
sep_array = init_value.from_iterable(
|
||||
data=[separator] * len(init_value),
|
||||
name="sep",
|
||||
index=init_value.native.index,
|
||||
context=self,
|
||||
)
|
||||
separators = (sep_array.zip_with(~nm, "") for nm in null_mask[:-1])
|
||||
result = reduce(
|
||||
operator.add,
|
||||
(s + v for s, v in zip_strict(separators, values)),
|
||||
init_value,
|
||||
)
|
||||
|
||||
return [result]
|
||||
|
||||
return self._expr._from_callable(
|
||||
func=func,
|
||||
depth=max(x._depth for x in exprs) + 1,
|
||||
function_name="concat_str",
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
context=self,
|
||||
)
|
||||
|
||||
|
||||
class _NativeConcat(Protocol[NativeDataFrameT, NativeSeriesT]):
|
||||
@overload
|
||||
def __call__(
|
||||
self,
|
||||
objs: Iterable[NativeDataFrameT],
|
||||
*,
|
||||
axis: _Vertical,
|
||||
copy: bool | None = ...,
|
||||
) -> NativeDataFrameT: ...
|
||||
@overload
|
||||
def __call__(
|
||||
self, objs: Iterable[NativeSeriesT], *, axis: _Vertical, copy: bool | None = ...
|
||||
) -> NativeSeriesT: ...
|
||||
@overload
|
||||
def __call__(
|
||||
self,
|
||||
objs: Iterable[NativeDataFrameT | NativeSeriesT],
|
||||
*,
|
||||
axis: _Horizontal,
|
||||
copy: bool | None = ...,
|
||||
) -> NativeDataFrameT: ...
|
||||
@overload
|
||||
def __call__(
|
||||
self,
|
||||
objs: Iterable[NativeDataFrameT | NativeSeriesT],
|
||||
*,
|
||||
axis: Axis,
|
||||
copy: bool | None = ...,
|
||||
) -> NativeDataFrameT | NativeSeriesT: ...
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
objs: Iterable[NativeDataFrameT | NativeSeriesT],
|
||||
*,
|
||||
axis: Axis,
|
||||
copy: bool | None = None,
|
||||
) -> NativeDataFrameT | NativeSeriesT: ...
|
||||
|
||||
|
||||
class PandasWhen(
|
||||
EagerWhen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr, NativeSeriesT]
|
||||
):
|
||||
@property
|
||||
# Signature of "_then" incompatible with supertype "CompliantWhen"
|
||||
# ArrowWhen seems to follow the same pattern, but no mypy complaint there?
|
||||
def _then(self) -> type[PandasThen]: # type: ignore[override]
|
||||
return PandasThen
|
||||
|
||||
def _if_then_else(
|
||||
self,
|
||||
when: NativeSeriesT,
|
||||
then: NativeSeriesT,
|
||||
otherwise: NativeSeriesT | NonNestedLiteral,
|
||||
) -> NativeSeriesT:
|
||||
where: Incomplete = then.where
|
||||
return where(when) if otherwise is None else where(when, otherwise)
|
||||
|
||||
|
||||
class PandasThen(
|
||||
CompliantThen[PandasLikeDataFrame, PandasLikeSeries, PandasLikeExpr, PandasWhen],
|
||||
PandasLikeExpr,
|
||||
):
|
||||
_depth: int = 0
|
||||
_scalar_kwargs: ScalarKwargs = {} # noqa: RUF012
|
||||
_function_name: str = "whenthen"
|
Reference in New Issue
Block a user