done
This commit is contained in:
411
lib/python3.11/site-packages/narwhals/_compliant/series.py
Normal file
411
lib/python3.11/site-packages/narwhals/_compliant/series.py
Normal file
@ -0,0 +1,411 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Generic, Literal, Protocol
|
||||
|
||||
from narwhals._compliant.any_namespace import (
|
||||
CatNamespace,
|
||||
DateTimeNamespace,
|
||||
ListNamespace,
|
||||
StringNamespace,
|
||||
StructNamespace,
|
||||
)
|
||||
from narwhals._compliant.column import CompliantColumn
|
||||
from narwhals._compliant.typing import (
|
||||
CompliantSeriesT_co,
|
||||
EagerDataFrameAny,
|
||||
EagerSeriesT_co,
|
||||
NativeSeriesT,
|
||||
NativeSeriesT_co,
|
||||
)
|
||||
from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals
|
||||
from narwhals._typing_compat import TypeVar, assert_never
|
||||
from narwhals._utils import (
|
||||
_StoresCompliant,
|
||||
_StoresNative,
|
||||
is_compliant_series,
|
||||
is_sized_multi_index_selector,
|
||||
unstable,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable, Iterator, Sequence
|
||||
from types import ModuleType
|
||||
|
||||
import pandas as pd
|
||||
import polars as pl
|
||||
import pyarrow as pa
|
||||
from typing_extensions import NotRequired, Self, TypedDict
|
||||
|
||||
from narwhals._compliant.dataframe import CompliantDataFrame
|
||||
from narwhals._compliant.namespace import EagerNamespace
|
||||
from narwhals._utils import Implementation, Version, _LimitedContext
|
||||
from narwhals.dtypes import DType
|
||||
from narwhals.series import Series
|
||||
from narwhals.typing import (
|
||||
Into1DArray,
|
||||
IntoDType,
|
||||
MultiIndexSelector,
|
||||
RollingInterpolationMethod,
|
||||
SizedMultiIndexSelector,
|
||||
_1DArray,
|
||||
_SliceIndex,
|
||||
)
|
||||
|
||||
class HistData(TypedDict, Generic[NativeSeriesT, "_CountsT_co"]):
|
||||
breakpoint: NotRequired[list[float] | _1DArray | list[Any]]
|
||||
count: NativeSeriesT | _1DArray | _CountsT_co | list[Any]
|
||||
|
||||
|
||||
_CountsT_co = TypeVar("_CountsT_co", bound="Iterable[Any]", covariant=True)
|
||||
|
||||
__all__ = [
|
||||
"CompliantSeries",
|
||||
"EagerSeries",
|
||||
"EagerSeriesCatNamespace",
|
||||
"EagerSeriesDateTimeNamespace",
|
||||
"EagerSeriesHist",
|
||||
"EagerSeriesListNamespace",
|
||||
"EagerSeriesNamespace",
|
||||
"EagerSeriesStringNamespace",
|
||||
"EagerSeriesStructNamespace",
|
||||
]
|
||||
|
||||
|
||||
class CompliantSeries(
|
||||
NumpyConvertible["_1DArray", "Into1DArray"],
|
||||
FromIterable,
|
||||
FromNative[NativeSeriesT],
|
||||
ToNarwhals["Series[NativeSeriesT]"],
|
||||
CompliantColumn,
|
||||
Protocol[NativeSeriesT],
|
||||
):
|
||||
# NOTE: `narwhals`
|
||||
_implementation: Implementation
|
||||
|
||||
@property
|
||||
def native(self) -> NativeSeriesT: ...
|
||||
def __narwhals_series__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __native_namespace__(self) -> ModuleType: ...
|
||||
@classmethod
|
||||
def from_native(cls, data: NativeSeriesT, /, *, context: _LimitedContext) -> Self: ...
|
||||
def to_narwhals(self) -> Series[NativeSeriesT]:
|
||||
return self._version.series(self, level="full")
|
||||
|
||||
def _with_native(self, series: Any) -> Self: ...
|
||||
def _with_version(self, version: Version) -> Self: ...
|
||||
|
||||
# NOTE: `polars`
|
||||
@property
|
||||
def dtype(self) -> DType: ...
|
||||
@property
|
||||
def name(self) -> str: ...
|
||||
def __array__(self, dtype: Any, *, copy: bool | None) -> _1DArray: ...
|
||||
def __contains__(self, other: Any) -> bool: ...
|
||||
def __getitem__(self, item: MultiIndexSelector[Self]) -> Any: ...
|
||||
def __iter__(self) -> Iterator[Any]: ...
|
||||
def __len__(self) -> int:
|
||||
return len(self.native)
|
||||
|
||||
@classmethod
|
||||
def from_numpy(cls, data: Into1DArray, /, *, context: _LimitedContext) -> Self: ...
|
||||
@classmethod
|
||||
def from_iterable(
|
||||
cls,
|
||||
data: Iterable[Any],
|
||||
/,
|
||||
*,
|
||||
context: _LimitedContext,
|
||||
name: str = "",
|
||||
dtype: IntoDType | None = None,
|
||||
) -> Self: ...
|
||||
def __radd__(self, other: Any) -> Self: ...
|
||||
def __rand__(self, other: Any) -> Self: ...
|
||||
def __rmul__(self, other: Any) -> Self: ...
|
||||
def __ror__(self, other: Any) -> Self: ...
|
||||
def all(self) -> bool: ...
|
||||
def any(self) -> bool: ...
|
||||
def arg_max(self) -> int: ...
|
||||
def arg_min(self) -> int: ...
|
||||
def arg_true(self) -> Self: ...
|
||||
def count(self) -> int: ...
|
||||
def filter(self, predicate: Any) -> Self: ...
|
||||
def gather_every(self, n: int, offset: int) -> Self: ...
|
||||
def head(self, n: int) -> Self: ...
|
||||
def is_empty(self) -> bool:
|
||||
return self.len() == 0
|
||||
|
||||
def is_sorted(self, *, descending: bool) -> bool: ...
|
||||
def item(self, index: int | None) -> Any: ...
|
||||
def kurtosis(self) -> float | None: ...
|
||||
def len(self) -> int: ...
|
||||
def max(self) -> Any: ...
|
||||
def mean(self) -> float: ...
|
||||
def median(self) -> float: ...
|
||||
def min(self) -> Any: ...
|
||||
def n_unique(self) -> int: ...
|
||||
def null_count(self) -> int: ...
|
||||
def quantile(
|
||||
self, quantile: float, interpolation: RollingInterpolationMethod
|
||||
) -> float: ...
|
||||
def sample(
|
||||
self,
|
||||
n: int | None,
|
||||
*,
|
||||
fraction: float | None,
|
||||
with_replacement: bool,
|
||||
seed: int | None,
|
||||
) -> Self: ...
|
||||
def scatter(self, indices: int | Sequence[int], values: Any) -> Self: ...
|
||||
def shift(self, n: int) -> Self: ...
|
||||
def skew(self) -> float | None: ...
|
||||
def sort(self, *, descending: bool, nulls_last: bool) -> Self: ...
|
||||
def std(self, *, ddof: int) -> float: ...
|
||||
def sum(self) -> float: ...
|
||||
def tail(self, n: int) -> Self: ...
|
||||
def to_arrow(self) -> pa.Array[Any]: ...
|
||||
def to_dummies(
|
||||
self, *, separator: str, drop_first: bool
|
||||
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
||||
def to_frame(self) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
||||
def to_list(self) -> list[Any]: ...
|
||||
def to_pandas(self) -> pd.Series[Any]: ...
|
||||
def to_polars(self) -> pl.Series: ...
|
||||
def unique(self, *, maintain_order: bool = False) -> Self: ...
|
||||
def value_counts(
|
||||
self, *, sort: bool, parallel: bool, name: str | None, normalize: bool
|
||||
) -> CompliantDataFrame[Self, Any, Any, Any]: ...
|
||||
def var(self, *, ddof: int) -> float: ...
|
||||
def zip_with(self, mask: Any, other: Any) -> Self: ...
|
||||
|
||||
# NOTE: *Technically* `polars`
|
||||
@unstable
|
||||
def hist_from_bins(
|
||||
self, bins: list[float], *, include_breakpoint: bool
|
||||
) -> CompliantDataFrame[Self, Any, Any, Any]:
|
||||
"""`Series.hist(bins=..., bin_count=None)`."""
|
||||
...
|
||||
|
||||
@unstable
|
||||
def hist_from_bin_count(
|
||||
self, bin_count: int, *, include_breakpoint: bool
|
||||
) -> CompliantDataFrame[Self, Any, Any, Any]:
|
||||
"""`Series.hist(bins=None, bin_count=...)`."""
|
||||
...
|
||||
|
||||
|
||||
class EagerSeries(CompliantSeries[NativeSeriesT], Protocol[NativeSeriesT]):
|
||||
_native_series: Any
|
||||
_implementation: Implementation
|
||||
_version: Version
|
||||
_broadcast: bool
|
||||
|
||||
@property
|
||||
def _backend_version(self) -> tuple[int, ...]:
|
||||
return self._implementation._backend_version()
|
||||
|
||||
@classmethod
|
||||
def _align_full_broadcast(cls, *series: Self) -> Sequence[Self]:
|
||||
"""Ensure all of `series` have the same length (and index if `pandas`).
|
||||
|
||||
Scalars get broadcasted to the full length of the longest Series.
|
||||
|
||||
This is useful when you need to construct a full Series anyway, such as:
|
||||
|
||||
DataFrame.select(...)
|
||||
|
||||
It should not be used in binary operations, such as:
|
||||
|
||||
nw.col("a") - nw.col("a").mean()
|
||||
|
||||
because then it's more efficient to extract the right-hand-side's single element as a scalar.
|
||||
"""
|
||||
...
|
||||
|
||||
def _from_scalar(self, value: Any) -> Self:
|
||||
return self.from_iterable([value], name=self.name, context=self)
|
||||
|
||||
def _with_native(
|
||||
self, series: NativeSeriesT, *, preserve_broadcast: bool = False
|
||||
) -> Self:
|
||||
"""Return a new `CompliantSeries`, wrapping the native `series`.
|
||||
|
||||
In cases when operations are known to not affect whether a result should
|
||||
be broadcast, we can pass `preserve_broadcast=True`.
|
||||
Set this with care - it should only be set for unary expressions which don't
|
||||
change length or order, such as `.alias` or `.fill_null`. If in doubt, don't
|
||||
set it, you probably don't need it.
|
||||
"""
|
||||
...
|
||||
|
||||
def __narwhals_namespace__(
|
||||
self,
|
||||
) -> EagerNamespace[Any, Self, Any, Any, NativeSeriesT]: ...
|
||||
|
||||
def _gather(self, rows: SizedMultiIndexSelector[NativeSeriesT]) -> Self: ...
|
||||
def _gather_slice(self, rows: _SliceIndex | range) -> Self: ...
|
||||
def __getitem__(self, item: MultiIndexSelector[Self]) -> Self:
|
||||
if isinstance(item, (slice, range)):
|
||||
return self._gather_slice(item)
|
||||
if is_compliant_series(item):
|
||||
return self._gather(item.native)
|
||||
elif is_sized_multi_index_selector(item): # noqa: RET505
|
||||
return self._gather(item)
|
||||
assert_never(item)
|
||||
|
||||
@property
|
||||
def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ...
|
||||
@property
|
||||
def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT]: ...
|
||||
@property
|
||||
def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT]: ...
|
||||
@property
|
||||
def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT]: ...
|
||||
@property
|
||||
def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT]: ...
|
||||
|
||||
|
||||
class _SeriesNamespace( # type: ignore[misc]
|
||||
_StoresCompliant[CompliantSeriesT_co],
|
||||
_StoresNative[NativeSeriesT_co],
|
||||
Protocol[CompliantSeriesT_co, NativeSeriesT_co],
|
||||
):
|
||||
_compliant_series: CompliantSeriesT_co
|
||||
|
||||
@property
|
||||
def compliant(self) -> CompliantSeriesT_co:
|
||||
return self._compliant_series
|
||||
|
||||
@property
|
||||
def implementation(self) -> Implementation:
|
||||
return self.compliant._implementation
|
||||
|
||||
@property
|
||||
def backend_version(self) -> tuple[int, ...]:
|
||||
return self.implementation._backend_version()
|
||||
|
||||
@property
|
||||
def version(self) -> Version:
|
||||
return self.compliant._version
|
||||
|
||||
@property
|
||||
def native(self) -> NativeSeriesT_co:
|
||||
return self._compliant_series.native # type: ignore[no-any-return]
|
||||
|
||||
def with_native(self, series: Any, /) -> CompliantSeriesT_co:
|
||||
return self.compliant._with_native(series)
|
||||
|
||||
|
||||
class EagerSeriesNamespace(
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
Generic[EagerSeriesT_co, NativeSeriesT_co],
|
||||
):
|
||||
_compliant_series: EagerSeriesT_co
|
||||
|
||||
def __init__(self, series: EagerSeriesT_co, /) -> None:
|
||||
self._compliant_series = series
|
||||
|
||||
|
||||
class EagerSeriesCatNamespace( # type: ignore[misc]
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
CatNamespace[EagerSeriesT_co],
|
||||
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
||||
): ...
|
||||
|
||||
|
||||
class EagerSeriesDateTimeNamespace( # type: ignore[misc]
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
DateTimeNamespace[EagerSeriesT_co],
|
||||
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
||||
): ...
|
||||
|
||||
|
||||
class EagerSeriesListNamespace( # type: ignore[misc]
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
ListNamespace[EagerSeriesT_co],
|
||||
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
||||
): ...
|
||||
|
||||
|
||||
class EagerSeriesStringNamespace( # type: ignore[misc]
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
StringNamespace[EagerSeriesT_co],
|
||||
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
||||
): ...
|
||||
|
||||
|
||||
class EagerSeriesStructNamespace( # type: ignore[misc]
|
||||
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
|
||||
StructNamespace[EagerSeriesT_co],
|
||||
Protocol[EagerSeriesT_co, NativeSeriesT_co],
|
||||
): ...
|
||||
|
||||
|
||||
class EagerSeriesHist(Protocol[NativeSeriesT, _CountsT_co]):
|
||||
_series: EagerSeries[NativeSeriesT]
|
||||
_breakpoint: bool
|
||||
_data: HistData[NativeSeriesT, _CountsT_co]
|
||||
|
||||
@property
|
||||
def native(self) -> NativeSeriesT:
|
||||
return self._series.native
|
||||
|
||||
@classmethod
|
||||
def from_series(
|
||||
cls, series: EagerSeries[NativeSeriesT], *, include_breakpoint: bool
|
||||
) -> Self:
|
||||
obj = cls.__new__(cls)
|
||||
obj._series = series
|
||||
obj._breakpoint = include_breakpoint
|
||||
return obj
|
||||
|
||||
def to_frame(self) -> EagerDataFrameAny: ...
|
||||
def _linear_space( # NOTE: Roughly `pl.linear_space`
|
||||
self,
|
||||
start: float,
|
||||
end: float,
|
||||
num_samples: int,
|
||||
*,
|
||||
closed: Literal["both", "none"] = "both",
|
||||
) -> _1DArray: ...
|
||||
|
||||
# NOTE: *Could* be handled at narwhals-level
|
||||
def is_empty_series(self) -> bool: ...
|
||||
|
||||
# NOTE: **Should** be handled at narwhals-level
|
||||
def data_empty(self) -> HistData[NativeSeriesT, _CountsT_co]:
|
||||
return {"breakpoint": [], "count": []} if self._breakpoint else {"count": []}
|
||||
|
||||
# NOTE: *Could* be handled at narwhals-level, **iff** we add `nw.repeat`, `nw.linear_space`
|
||||
# See https://github.com/narwhals-dev/narwhals/pull/2839#discussion_r2215630696
|
||||
def series_empty(
|
||||
self, arg: int | list[float], /
|
||||
) -> HistData[NativeSeriesT, _CountsT_co]: ...
|
||||
|
||||
def with_bins(self, bins: list[float], /) -> Self:
|
||||
if len(bins) <= 1:
|
||||
self._data = self.data_empty()
|
||||
elif self.is_empty_series():
|
||||
self._data = self.series_empty(bins)
|
||||
else:
|
||||
self._data = self._calculate_hist(bins)
|
||||
return self
|
||||
|
||||
def with_bin_count(self, bin_count: int, /) -> Self:
|
||||
if bin_count == 0:
|
||||
self._data = self.data_empty()
|
||||
elif self.is_empty_series():
|
||||
self._data = self.series_empty(bin_count)
|
||||
else:
|
||||
self._data = self._calculate_hist(self._calculate_bins(bin_count))
|
||||
return self
|
||||
|
||||
def _calculate_breakpoint(self, arg: int | list[float], /) -> list[float] | _1DArray:
|
||||
bins = self._linear_space(0, 1, arg + 1) if isinstance(arg, int) else arg
|
||||
return bins[1:]
|
||||
|
||||
def _calculate_bins(self, bin_count: int) -> _1DArray: ...
|
||||
def _calculate_hist(
|
||||
self, bins: list[float] | _1DArray
|
||||
) -> HistData[NativeSeriesT, _CountsT_co]: ...
|
Reference in New Issue
Block a user