done
This commit is contained in:
432
lib/python3.11/site-packages/narwhals/_ibis/dataframe.py
Normal file
432
lib/python3.11/site-packages/narwhals/_ibis/dataframe.py
Normal file
@ -0,0 +1,432 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
import ibis
|
||||
import ibis.expr.types as ir
|
||||
|
||||
from narwhals._ibis.utils import evaluate_exprs, native_to_narwhals_dtype
|
||||
from narwhals._sql.dataframe import SQLLazyFrame
|
||||
from narwhals._utils import (
|
||||
Implementation,
|
||||
ValidateBackendVersion,
|
||||
Version,
|
||||
not_implemented,
|
||||
parse_columns_to_drop,
|
||||
zip_strict,
|
||||
)
|
||||
from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from ibis.expr.operations import Binary
|
||||
from typing_extensions import Self, TypeAlias, TypeIs
|
||||
|
||||
from narwhals._compliant.typing import CompliantDataFrameAny
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
from narwhals._ibis.group_by import IbisGroupBy
|
||||
from narwhals._ibis.namespace import IbisNamespace
|
||||
from narwhals._ibis.series import IbisInterchangeSeries
|
||||
from narwhals._typing import _EagerAllowedImpl
|
||||
from narwhals._utils import _LimitedContext
|
||||
from narwhals.dataframe import LazyFrame
|
||||
from narwhals.dtypes import DType
|
||||
from narwhals.stable.v1 import DataFrame as DataFrameV1
|
||||
from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy
|
||||
|
||||
JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]"
|
||||
|
||||
|
||||
class IbisLazyFrame(
|
||||
SQLLazyFrame["IbisExpr", "ir.Table", "LazyFrame[ir.Table] | DataFrameV1[ir.Table]"],
|
||||
ValidateBackendVersion,
|
||||
):
|
||||
_implementation = Implementation.IBIS
|
||||
|
||||
def __init__(
|
||||
self, df: ir.Table, *, version: Version, validate_backend_version: bool = False
|
||||
) -> None:
|
||||
self._native_frame: ir.Table = df
|
||||
self._version = version
|
||||
self._cached_schema: dict[str, DType] | None = None
|
||||
self._cached_columns: list[str] | None = None
|
||||
if validate_backend_version:
|
||||
self._validate_backend_version()
|
||||
|
||||
@staticmethod
|
||||
def _is_native(obj: ir.Table | Any) -> TypeIs[ir.Table]:
|
||||
return isinstance(obj, ir.Table)
|
||||
|
||||
@classmethod
|
||||
def from_native(cls, data: ir.Table, /, *, context: _LimitedContext) -> Self:
|
||||
return cls(data, version=context._version)
|
||||
|
||||
def to_narwhals(self) -> LazyFrame[ir.Table] | DataFrameV1[ir.Table]:
|
||||
if self._version is Version.V1:
|
||||
from narwhals.stable.v1 import DataFrame
|
||||
|
||||
return DataFrame(self, level="interchange")
|
||||
return self._version.lazyframe(self, level="lazy")
|
||||
|
||||
def __narwhals_dataframe__(self) -> Self: # pragma: no cover
|
||||
# Keep around for backcompat.
|
||||
if self._version is not Version.V1:
|
||||
msg = "__narwhals_dataframe__ is not implemented for IbisLazyFrame"
|
||||
raise AttributeError(msg)
|
||||
return self
|
||||
|
||||
def __narwhals_lazyframe__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __native_namespace__(self) -> ModuleType:
|
||||
return ibis
|
||||
|
||||
def __narwhals_namespace__(self) -> IbisNamespace:
|
||||
from narwhals._ibis.namespace import IbisNamespace
|
||||
|
||||
return IbisNamespace(version=self._version)
|
||||
|
||||
def get_column(self, name: str) -> IbisInterchangeSeries:
|
||||
from narwhals._ibis.series import IbisInterchangeSeries
|
||||
|
||||
return IbisInterchangeSeries(self.native.select(name), version=self._version)
|
||||
|
||||
def _iter_columns(self) -> Iterator[ir.Expr]:
|
||||
for name in self.columns:
|
||||
yield self.native[name]
|
||||
|
||||
def collect(
|
||||
self, backend: _EagerAllowedImpl | None, **kwargs: Any
|
||||
) -> CompliantDataFrameAny:
|
||||
if backend is None or backend is Implementation.PYARROW:
|
||||
from narwhals._arrow.dataframe import ArrowDataFrame
|
||||
|
||||
return ArrowDataFrame(
|
||||
self.native.to_pyarrow(),
|
||||
validate_backend_version=True,
|
||||
version=self._version,
|
||||
validate_column_names=True,
|
||||
)
|
||||
|
||||
if backend is Implementation.PANDAS:
|
||||
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
|
||||
|
||||
return PandasLikeDataFrame(
|
||||
self.native.to_pandas(),
|
||||
implementation=Implementation.PANDAS,
|
||||
validate_backend_version=True,
|
||||
version=self._version,
|
||||
validate_column_names=True,
|
||||
)
|
||||
|
||||
if backend is Implementation.POLARS:
|
||||
from narwhals._polars.dataframe import PolarsDataFrame
|
||||
|
||||
return PolarsDataFrame(
|
||||
self.native.to_polars(),
|
||||
validate_backend_version=True,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
|
||||
raise ValueError(msg) # pragma: no cover
|
||||
|
||||
def head(self, n: int) -> Self:
|
||||
return self._with_native(self.native.head(n))
|
||||
|
||||
def simple_select(self, *column_names: str) -> Self:
|
||||
return self._with_native(self.native.select(*column_names))
|
||||
|
||||
def aggregate(self, *exprs: IbisExpr) -> Self:
|
||||
selection = [
|
||||
cast("ir.Scalar", val.name(name))
|
||||
for name, val in evaluate_exprs(self, *exprs)
|
||||
]
|
||||
return self._with_native(self.native.aggregate(selection))
|
||||
|
||||
def select(self, *exprs: IbisExpr) -> Self:
|
||||
selection = [val.name(name) for name, val in evaluate_exprs(self, *exprs)]
|
||||
if not selection:
|
||||
msg = "At least one expression must be provided to `select` with the Ibis backend."
|
||||
raise ValueError(msg)
|
||||
|
||||
t = self.native.select(*selection)
|
||||
return self._with_native(t)
|
||||
|
||||
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
|
||||
columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
|
||||
selection = (col for col in self.columns if col not in columns_to_drop)
|
||||
return self._with_native(self.native.select(*selection))
|
||||
|
||||
def lazy(self, backend: None = None, **_: None) -> Self:
|
||||
# The `backend`` argument has no effect but we keep it here for
|
||||
# backwards compatibility because in `narwhals.stable.v1`
|
||||
# function `.from_native()` will return a DataFrame for Ibis.
|
||||
|
||||
if backend is not None: # pragma: no cover
|
||||
msg = "`backend` argument is not supported for Ibis"
|
||||
raise ValueError(msg)
|
||||
return self
|
||||
|
||||
def with_columns(self, *exprs: IbisExpr) -> Self:
|
||||
new_columns_map = dict(evaluate_exprs(self, *exprs))
|
||||
return self._with_native(self.native.mutate(**new_columns_map))
|
||||
|
||||
def filter(self, predicate: IbisExpr) -> Self:
|
||||
# `[0]` is safe as the predicate's expression only returns a single column
|
||||
mask = cast("ir.BooleanValue", predicate(self)[0])
|
||||
return self._with_native(self.native.filter(mask))
|
||||
|
||||
@property
|
||||
def schema(self) -> dict[str, DType]:
|
||||
if self._cached_schema is None:
|
||||
# Note: prefer `self._cached_schema` over `functools.cached_property`
|
||||
# due to Python3.13 failures.
|
||||
self._cached_schema = {
|
||||
name: native_to_narwhals_dtype(dtype, self._version)
|
||||
for name, dtype in self.native.schema().fields.items()
|
||||
}
|
||||
return self._cached_schema
|
||||
|
||||
@property
|
||||
def columns(self) -> list[str]:
|
||||
if self._cached_columns is None:
|
||||
self._cached_columns = (
|
||||
list(self.schema)
|
||||
if self._cached_schema is not None
|
||||
else list(self.native.columns)
|
||||
)
|
||||
return self._cached_columns
|
||||
|
||||
def to_pandas(self) -> pd.DataFrame:
|
||||
# only if version is v1, keep around for backcompat
|
||||
return self.native.to_pandas()
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
# only if version is v1, keep around for backcompat
|
||||
return self.native.to_pyarrow()
|
||||
|
||||
def _with_version(self, version: Version) -> Self:
|
||||
return self.__class__(self.native, version=version)
|
||||
|
||||
def _with_native(self, df: ir.Table) -> Self:
|
||||
return self.__class__(df, version=self._version)
|
||||
|
||||
def group_by(
|
||||
self, keys: Sequence[str] | Sequence[IbisExpr], *, drop_null_keys: bool
|
||||
) -> IbisGroupBy:
|
||||
from narwhals._ibis.group_by import IbisGroupBy
|
||||
|
||||
return IbisGroupBy(self, keys, drop_null_keys=drop_null_keys)
|
||||
|
||||
def rename(self, mapping: Mapping[str, str]) -> Self:
|
||||
def _rename(col: str) -> str:
|
||||
return mapping.get(col, col)
|
||||
|
||||
return self._with_native(self.native.rename(_rename))
|
||||
|
||||
@staticmethod
|
||||
def _join_drop_duplicate_columns(df: ir.Table, columns: Iterable[str], /) -> ir.Table:
|
||||
"""Ibis adds a suffix to the right table col, even when it matches the left during a join."""
|
||||
duplicates = set(df.columns).intersection(columns)
|
||||
return df.drop(*duplicates) if duplicates else df
|
||||
|
||||
def join(
|
||||
self,
|
||||
other: Self,
|
||||
*,
|
||||
how: JoinStrategy,
|
||||
left_on: Sequence[str] | None,
|
||||
right_on: Sequence[str] | None,
|
||||
suffix: str,
|
||||
) -> Self:
|
||||
how_native = "outer" if how == "full" else how
|
||||
rname = "{name}" + suffix
|
||||
if other == self:
|
||||
# Ibis does not support self-references unless created as a view
|
||||
other = self._with_native(other.native.view())
|
||||
if how_native == "cross":
|
||||
joined = self.native.join(other.native, how=how_native, rname=rname)
|
||||
return self._with_native(joined)
|
||||
# help mypy
|
||||
assert left_on is not None # noqa: S101
|
||||
assert right_on is not None # noqa: S101
|
||||
predicates = self._convert_predicates(other, left_on, right_on)
|
||||
joined = self.native.join(other.native, predicates, how=how_native, rname=rname)
|
||||
if how_native == "left":
|
||||
right_names = (n + suffix for n in right_on)
|
||||
joined = self._join_drop_duplicate_columns(joined, right_names)
|
||||
it = (cast("Binary", p.op()) for p in predicates if not isinstance(p, str))
|
||||
to_drop = []
|
||||
for pred in it:
|
||||
right = pred.right.name
|
||||
# Mirrors how polars works.
|
||||
if right not in self.columns and pred.left.name != right:
|
||||
to_drop.append(right)
|
||||
if to_drop:
|
||||
joined = joined.drop(*to_drop)
|
||||
return self._with_native(joined)
|
||||
|
||||
def join_asof(
|
||||
self,
|
||||
other: Self,
|
||||
*,
|
||||
left_on: str,
|
||||
right_on: str,
|
||||
by_left: Sequence[str] | None,
|
||||
by_right: Sequence[str] | None,
|
||||
strategy: AsofJoinStrategy,
|
||||
suffix: str,
|
||||
) -> Self:
|
||||
rname = "{name}" + suffix
|
||||
strategy_op = {"backward": operator.ge, "forward": operator.le}
|
||||
predicates: JoinPredicates = []
|
||||
if op := strategy_op.get(strategy):
|
||||
on: ir.BooleanColumn = op(self.native[left_on], other.native[right_on])
|
||||
else:
|
||||
msg = "Only `backward` and `forward` strategies are currently supported for Ibis"
|
||||
raise NotImplementedError(msg)
|
||||
if by_left is not None and by_right is not None:
|
||||
predicates = self._convert_predicates(other, by_left, by_right)
|
||||
joined = self.native.asof_join(other.native, on, predicates, rname=rname)
|
||||
joined = self._join_drop_duplicate_columns(joined, [right_on + suffix])
|
||||
if by_right is not None:
|
||||
right_names = (n + suffix for n in by_right)
|
||||
joined = self._join_drop_duplicate_columns(joined, right_names)
|
||||
return self._with_native(joined)
|
||||
|
||||
def _convert_predicates(
|
||||
self, other: Self, left_on: Sequence[str], right_on: Sequence[str]
|
||||
) -> JoinPredicates:
|
||||
if left_on == right_on:
|
||||
return left_on
|
||||
return [
|
||||
cast("ir.BooleanColumn", (self.native[left] == other.native[right]))
|
||||
for left, right in zip_strict(left_on, right_on)
|
||||
]
|
||||
|
||||
def collect_schema(self) -> dict[str, DType]:
|
||||
return {
|
||||
name: native_to_narwhals_dtype(dtype, self._version)
|
||||
for name, dtype in self.native.schema().fields.items()
|
||||
}
|
||||
|
||||
def unique(
|
||||
self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
|
||||
) -> Self:
|
||||
if subset_ := subset if keep == "any" else (subset or self.columns):
|
||||
# Sanitise input
|
||||
if any(x not in self.columns for x in subset_):
|
||||
msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}."
|
||||
raise ColumnNotFoundError(msg)
|
||||
|
||||
mapped_keep: dict[str, Literal["first"] | None] = {
|
||||
"any": "first",
|
||||
"none": None,
|
||||
}
|
||||
to_keep = mapped_keep[keep]
|
||||
return self._with_native(self.native.distinct(on=subset_, keep=to_keep))
|
||||
return self._with_native(self.native.distinct(on=subset))
|
||||
|
||||
def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
|
||||
if isinstance(descending, bool):
|
||||
descending = [descending for _ in range(len(by))]
|
||||
|
||||
sort_cols: list[Any] = []
|
||||
|
||||
for i in range(len(by)):
|
||||
direction_fn = ibis.desc if descending[i] else ibis.asc
|
||||
col = direction_fn(by[i], nulls_first=not nulls_last)
|
||||
sort_cols.append(col)
|
||||
|
||||
return self._with_native(self.native.order_by(*sort_cols))
|
||||
|
||||
def top_k(self, k: int, *, by: Iterable[str], reverse: bool | Sequence[bool]) -> Self:
|
||||
if isinstance(reverse, bool):
|
||||
reverse = [reverse] * len(list(by))
|
||||
sort_cols = []
|
||||
|
||||
for is_reverse, by_col in zip_strict(reverse, by):
|
||||
direction_fn = ibis.asc if is_reverse else ibis.desc
|
||||
col = direction_fn(by_col, nulls_first=False)
|
||||
sort_cols.append(cast("ir.Column", col))
|
||||
|
||||
return self._with_native(self.native.order_by(*sort_cols).head(k))
|
||||
|
||||
def drop_nulls(self, subset: Sequence[str] | None) -> Self:
|
||||
subset_ = subset if subset is not None else self.columns
|
||||
return self._with_native(self.native.drop_null(subset_))
|
||||
|
||||
def explode(self, columns: Sequence[str]) -> Self:
|
||||
dtypes = self._version.dtypes
|
||||
schema = self.collect_schema()
|
||||
for col in columns:
|
||||
dtype = schema[col]
|
||||
|
||||
if dtype != dtypes.List:
|
||||
msg = (
|
||||
f"`explode` operation not supported for dtype `{dtype}`, "
|
||||
"expected List type"
|
||||
)
|
||||
raise InvalidOperationError(msg)
|
||||
|
||||
if len(columns) != 1:
|
||||
msg = (
|
||||
"Exploding on multiple columns is not supported with Ibis backend since "
|
||||
"we cannot guarantee that the exploded columns have matching element counts."
|
||||
)
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
return self._with_native(self.native.unnest(columns[0], keep_empty=True))
|
||||
|
||||
def unpivot(
|
||||
self,
|
||||
on: Sequence[str] | None,
|
||||
index: Sequence[str] | None,
|
||||
variable_name: str,
|
||||
value_name: str,
|
||||
) -> Self:
|
||||
import ibis.selectors as s
|
||||
|
||||
index_: Sequence[str] = [] if index is None else index
|
||||
on_: Sequence[str] = (
|
||||
[c for c in self.columns if c not in index_] if on is None else on
|
||||
)
|
||||
|
||||
# Discard columns not in the index
|
||||
final_columns = list(dict.fromkeys([*index_, variable_name, value_name]))
|
||||
|
||||
unpivoted = self.native.pivot_longer(
|
||||
s.cols(*on_), names_to=variable_name, values_to=value_name
|
||||
)
|
||||
return self._with_native(unpivoted.select(*final_columns))
|
||||
|
||||
def with_row_index(self, name: str, order_by: Sequence[str]) -> Self:
|
||||
to_select = [
|
||||
ibis.row_number().over(ibis.window(order_by=order_by)).name(name),
|
||||
ibis.selectors.all(),
|
||||
]
|
||||
return self._with_native(self.native.select(*to_select))
|
||||
|
||||
def sink_parquet(self, file: str | Path | BytesIO) -> None:
|
||||
if isinstance(file, BytesIO): # pragma: no cover
|
||||
msg = "Writing to BytesIO is not supported for Ibis backend."
|
||||
raise NotImplementedError(msg)
|
||||
self.native.to_parquet(file)
|
||||
|
||||
gather_every = not_implemented.deprecated(
|
||||
"`LazyFrame.gather_every` is deprecated and will be removed in a future version."
|
||||
)
|
||||
tail = not_implemented.deprecated(
|
||||
"`LazyFrame.tail` is deprecated and will be removed in a future version."
|
||||
)
|
||||
|
||||
# Intentionally not implemented, as Ibis does its own expression rewriting.
|
||||
_evaluate_window_expr = not_implemented()
|
347
lib/python3.11/site-packages/narwhals/_ibis/expr.py
Normal file
347
lib/python3.11/site-packages/narwhals/_ibis/expr.py
Normal file
@ -0,0 +1,347 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
from functools import partial
|
||||
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast
|
||||
|
||||
import ibis
|
||||
|
||||
from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace
|
||||
from narwhals._ibis.expr_list import IbisExprListNamespace
|
||||
from narwhals._ibis.expr_str import IbisExprStringNamespace
|
||||
from narwhals._ibis.expr_struct import IbisExprStructNamespace
|
||||
from narwhals._ibis.utils import is_floating, lit, narwhals_to_native_dtype
|
||||
from narwhals._sql.expr import SQLExpr
|
||||
from narwhals._utils import Implementation, Version, not_implemented, zip_strict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator, Sequence
|
||||
|
||||
import ibis.expr.types as ir
|
||||
from typing_extensions import Self
|
||||
|
||||
from narwhals._compliant import WindowInputs
|
||||
from narwhals._compliant.typing import (
|
||||
AliasNames,
|
||||
EvalNames,
|
||||
EvalSeries,
|
||||
WindowFunction,
|
||||
)
|
||||
from narwhals._expression_parsing import ExprKind, ExprMetadata
|
||||
from narwhals._ibis.dataframe import IbisLazyFrame
|
||||
from narwhals._ibis.namespace import IbisNamespace
|
||||
from narwhals._utils import _LimitedContext
|
||||
from narwhals.typing import IntoDType, RankMethod, RollingInterpolationMethod
|
||||
|
||||
ExprT = TypeVar("ExprT", bound=ir.Value)
|
||||
IbisWindowFunction = WindowFunction[IbisLazyFrame, ir.Value]
|
||||
IbisWindowInputs = WindowInputs[ir.Value]
|
||||
|
||||
|
||||
class IbisExpr(SQLExpr["IbisLazyFrame", "ir.Value"]):
|
||||
_implementation = Implementation.IBIS
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
call: EvalSeries[IbisLazyFrame, ir.Value],
|
||||
window_function: IbisWindowFunction | None = None,
|
||||
*,
|
||||
evaluate_output_names: EvalNames[IbisLazyFrame],
|
||||
alias_output_names: AliasNames | None,
|
||||
version: Version,
|
||||
implementation: Implementation = Implementation.IBIS,
|
||||
) -> None:
|
||||
self._call = call
|
||||
self._evaluate_output_names = evaluate_output_names
|
||||
self._alias_output_names = alias_output_names
|
||||
self._version = version
|
||||
self._metadata: ExprMetadata | None = None
|
||||
self._window_function: IbisWindowFunction | None = window_function
|
||||
|
||||
@property
|
||||
def window_function(self) -> IbisWindowFunction:
|
||||
def default_window_func(
|
||||
df: IbisLazyFrame, window_inputs: IbisWindowInputs
|
||||
) -> Sequence[ir.Value]:
|
||||
return [
|
||||
expr.over(
|
||||
ibis.window(
|
||||
group_by=window_inputs.partition_by,
|
||||
order_by=self._sort(*window_inputs.order_by),
|
||||
)
|
||||
)
|
||||
for expr in self(df)
|
||||
]
|
||||
|
||||
return self._window_function or default_window_func
|
||||
|
||||
def _window_expression(
|
||||
self,
|
||||
expr: ir.Value,
|
||||
partition_by: Sequence[str | ir.Value] = (),
|
||||
order_by: Sequence[str | ir.Column] = (),
|
||||
rows_start: int | None = None,
|
||||
rows_end: int | None = None,
|
||||
*,
|
||||
descending: Sequence[bool] | None = None,
|
||||
nulls_last: Sequence[bool] | None = None,
|
||||
) -> ir.Value:
|
||||
if rows_start is not None and rows_end is not None:
|
||||
rows_between = {"preceding": -rows_start, "following": rows_end}
|
||||
elif rows_end is not None:
|
||||
rows_between = {"following": rows_end}
|
||||
elif rows_start is not None: # pragma: no cover
|
||||
rows_between = {"preceding": -rows_start}
|
||||
else:
|
||||
rows_between = {}
|
||||
window = ibis.window(
|
||||
group_by=partition_by,
|
||||
order_by=self._sort(*order_by, descending=descending, nulls_last=nulls_last),
|
||||
**rows_between,
|
||||
)
|
||||
return expr.over(window)
|
||||
|
||||
def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover
|
||||
from narwhals._ibis.namespace import IbisNamespace
|
||||
|
||||
return IbisNamespace(version=self._version)
|
||||
|
||||
def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
|
||||
# Ibis does its own broadcasting.
|
||||
return self
|
||||
|
||||
def _sort(
|
||||
self,
|
||||
*cols: ir.Column | str,
|
||||
descending: Sequence[bool] | None = None,
|
||||
nulls_last: Sequence[bool] | None = None,
|
||||
) -> Iterator[ir.Column]:
|
||||
descending = descending or [False] * len(cols)
|
||||
nulls_last = nulls_last or [False] * len(cols)
|
||||
mapping = {
|
||||
(False, False): partial(ibis.asc, nulls_first=True),
|
||||
(False, True): partial(ibis.asc, nulls_first=False),
|
||||
(True, False): partial(ibis.desc, nulls_first=True),
|
||||
(True, True): partial(ibis.desc, nulls_first=False),
|
||||
}
|
||||
yield from (
|
||||
cast("ir.Column", mapping[(_desc, _nulls_last)](col))
|
||||
for col, _desc, _nulls_last in zip_strict(cols, descending, nulls_last)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_column_names(
|
||||
cls: type[Self],
|
||||
evaluate_column_names: EvalNames[IbisLazyFrame],
|
||||
/,
|
||||
*,
|
||||
context: _LimitedContext,
|
||||
) -> Self:
|
||||
def func(df: IbisLazyFrame) -> Sequence[ir.Column]:
|
||||
return [df.native[name] for name in evaluate_column_names(df)]
|
||||
|
||||
return cls(
|
||||
func,
|
||||
evaluate_output_names=evaluate_column_names,
|
||||
alias_output_names=None,
|
||||
version=context._version,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_column_indices(cls, *column_indices: int, context: _LimitedContext) -> Self:
|
||||
def func(df: IbisLazyFrame) -> Sequence[ir.Column]:
|
||||
return [df.native[i] for i in column_indices]
|
||||
|
||||
return cls(
|
||||
func,
|
||||
evaluate_output_names=cls._eval_names_indices(column_indices),
|
||||
alias_output_names=None,
|
||||
version=context._version,
|
||||
)
|
||||
|
||||
def _with_binary(self, op: Callable[..., ir.Value], other: Self | Any) -> Self:
|
||||
return self._with_callable(op, other=other)
|
||||
|
||||
def _with_elementwise(
|
||||
self, op: Callable[..., ir.Value], /, **expressifiable_args: Self | Any
|
||||
) -> Self:
|
||||
return self._with_callable(op, **expressifiable_args)
|
||||
|
||||
@classmethod
|
||||
def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT:
|
||||
return cast("ExprT", expr.name(name))
|
||||
|
||||
def __invert__(self) -> Self:
|
||||
invert = cast("Callable[..., ir.Value]", operator.invert)
|
||||
return self._with_callable(invert)
|
||||
|
||||
def all(self) -> Self:
|
||||
return self._with_callable(lambda expr: expr.all().fill_null(lit(True)))
|
||||
|
||||
def any(self) -> Self:
|
||||
return self._with_callable(lambda expr: expr.any().fill_null(lit(False)))
|
||||
|
||||
def quantile(
|
||||
self, quantile: float, interpolation: RollingInterpolationMethod
|
||||
) -> Self:
|
||||
if interpolation != "linear":
|
||||
msg = "Only linear interpolation methods are supported for Ibis quantile."
|
||||
raise NotImplementedError(msg)
|
||||
return self._with_callable(lambda expr: expr.quantile(quantile))
|
||||
|
||||
def clip(self, lower_bound: Any, upper_bound: Any) -> Self:
|
||||
def _clip(
|
||||
expr: ir.NumericValue, lower: Any | None = None, upper: Any | None = None
|
||||
) -> ir.NumericValue:
|
||||
return expr.clip(lower=lower, upper=upper)
|
||||
|
||||
if lower_bound is None:
|
||||
return self._with_callable(_clip, upper=upper_bound)
|
||||
if upper_bound is None:
|
||||
return self._with_callable(_clip, lower=lower_bound)
|
||||
return self._with_callable(_clip, lower=lower_bound, upper=upper_bound)
|
||||
|
||||
def n_unique(self) -> Self:
|
||||
return self._with_callable(
|
||||
lambda expr: expr.nunique() + expr.isnull().any().cast("int8")
|
||||
)
|
||||
|
||||
def len(self) -> Self:
|
||||
def func(df: IbisLazyFrame) -> Sequence[ir.IntegerScalar]:
|
||||
return [df.native.count() for _ in self._evaluate_output_names(df)]
|
||||
|
||||
return self.__class__(
|
||||
func,
|
||||
evaluate_output_names=self._evaluate_output_names,
|
||||
alias_output_names=self._alias_output_names,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
def std(self, ddof: int) -> Self:
|
||||
def _std(expr: ir.NumericColumn, ddof: int) -> ir.Value:
|
||||
if ddof == 0:
|
||||
return expr.std(how="pop")
|
||||
if ddof == 1:
|
||||
return expr.std(how="sample")
|
||||
n_samples = expr.count()
|
||||
std_pop = expr.std(how="pop")
|
||||
ddof_lit = lit(ddof)
|
||||
return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt()
|
||||
|
||||
return self._with_callable(lambda expr: _std(expr, ddof))
|
||||
|
||||
def var(self, ddof: int) -> Self:
|
||||
def _var(expr: ir.NumericColumn, ddof: int) -> ir.Value:
|
||||
if ddof == 0:
|
||||
return expr.var(how="pop")
|
||||
if ddof == 1:
|
||||
return expr.var(how="sample")
|
||||
n_samples = expr.count()
|
||||
var_pop = expr.var(how="pop")
|
||||
ddof_lit = lit(ddof)
|
||||
return var_pop * n_samples / (n_samples - ddof_lit)
|
||||
|
||||
return self._with_callable(lambda expr: _var(expr, ddof))
|
||||
|
||||
def null_count(self) -> Self:
|
||||
return self._with_callable(lambda expr: expr.isnull().sum())
|
||||
|
||||
def is_nan(self) -> Self:
|
||||
def func(expr: ir.FloatingValue | Any) -> ir.Value:
|
||||
otherwise = expr.isnan() if is_floating(expr.type()) else False
|
||||
return ibis.ifelse(expr.isnull(), None, otherwise)
|
||||
|
||||
return self._with_callable(func)
|
||||
|
||||
def is_finite(self) -> Self:
|
||||
return self._with_callable(lambda expr: ~(expr.isinf() | expr.isnan()))
|
||||
|
||||
def is_in(self, other: Sequence[Any]) -> Self:
|
||||
return self._with_callable(lambda expr: expr.isin(other))
|
||||
|
||||
def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self:
|
||||
# Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward
|
||||
# strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539
|
||||
if strategy is not None:
|
||||
msg = "`strategy` is not supported for the Ibis backend"
|
||||
raise NotImplementedError(msg)
|
||||
if limit is not None:
|
||||
msg = "`limit` is not supported for the Ibis backend" # pragma: no cover
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value:
|
||||
return expr.fill_null(value)
|
||||
|
||||
return self._with_callable(_fill_null, value=value)
|
||||
|
||||
def cast(self, dtype: IntoDType) -> Self:
|
||||
def _func(expr: ir.Column) -> ir.Value:
|
||||
native_dtype = narwhals_to_native_dtype(dtype, self._version)
|
||||
# ibis `cast` overloads do not include DataType, only literals
|
||||
return expr.cast(native_dtype) # type: ignore[unused-ignore]
|
||||
|
||||
return self._with_callable(_func)
|
||||
|
||||
def is_unique(self) -> Self:
|
||||
return self._with_callable(
|
||||
lambda expr: expr.isnull().count().over(ibis.window(group_by=(expr))) == 1
|
||||
)
|
||||
|
||||
def rank(self, method: RankMethod, *, descending: bool) -> Self:
|
||||
def _rank(expr: ir.Column) -> ir.Value:
|
||||
order_by = next(self._sort(expr, descending=[descending], nulls_last=[True]))
|
||||
window = ibis.window(order_by=order_by)
|
||||
|
||||
if method == "dense":
|
||||
rank_ = order_by.dense_rank()
|
||||
elif method == "ordinal":
|
||||
rank_ = ibis.row_number().over(window)
|
||||
else:
|
||||
rank_ = order_by.rank()
|
||||
|
||||
# Ibis uses 0-based ranking. Add 1 to match polars 1-based rank.
|
||||
rank_ = rank_ + lit(1)
|
||||
|
||||
# For "max" and "average", adjust using the count of rows in the partition.
|
||||
if method == "max":
|
||||
# Define a window partitioned by expr (i.e. each distinct value)
|
||||
partition = ibis.window(group_by=[expr])
|
||||
cnt = expr.count().over(partition)
|
||||
rank_ = rank_ + cnt - lit(1)
|
||||
elif method == "average":
|
||||
partition = ibis.window(group_by=[expr])
|
||||
cnt = expr.count().over(partition)
|
||||
avg = cast("ir.NumericValue", (cnt - lit(1)) / lit(2.0))
|
||||
rank_ = rank_ + avg
|
||||
|
||||
return ibis.cases((expr.notnull(), rank_))
|
||||
|
||||
return self._with_callable(_rank)
|
||||
|
||||
@property
|
||||
def str(self) -> IbisExprStringNamespace:
|
||||
return IbisExprStringNamespace(self)
|
||||
|
||||
@property
|
||||
def dt(self) -> IbisExprDateTimeNamespace:
|
||||
return IbisExprDateTimeNamespace(self)
|
||||
|
||||
@property
|
||||
def list(self) -> IbisExprListNamespace:
|
||||
return IbisExprListNamespace(self)
|
||||
|
||||
@property
|
||||
def struct(self) -> IbisExprStructNamespace:
|
||||
return IbisExprStructNamespace(self)
|
||||
|
||||
# NOTE: https://github.com/ibis-project/ibis/issues/10542
|
||||
cum_prod = not_implemented()
|
||||
|
||||
# NOTE: https://github.com/ibis-project/ibis/issues/11176
|
||||
skew = not_implemented()
|
||||
kurtosis = not_implemented()
|
||||
|
||||
_count_star = not_implemented()
|
||||
|
||||
# Intentionally not implemented, as Ibis does its own expression rewriting.
|
||||
_push_down_window_function = not_implemented()
|
83
lib/python3.11/site-packages/narwhals/_ibis/expr_dt.py
Normal file
83
lib/python3.11/site-packages/narwhals/_ibis/expr_dt.py
Normal file
@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Callable
|
||||
|
||||
from narwhals._duration import Interval
|
||||
from narwhals._ibis.utils import (
|
||||
UNITS_DICT_BUCKET,
|
||||
UNITS_DICT_TRUNCATE,
|
||||
timedelta_to_ibis_interval,
|
||||
)
|
||||
from narwhals._sql.expr_dt import SQLExprDateTimeNamesSpace
|
||||
from narwhals._utils import not_implemented
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import ibis.expr.types as ir
|
||||
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
from narwhals._ibis.utils import BucketUnit, TruncateUnit
|
||||
|
||||
|
||||
class IbisExprDateTimeNamespace(SQLExprDateTimeNamesSpace["IbisExpr"]):
|
||||
def millisecond(self) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.millisecond())
|
||||
|
||||
def microsecond(self) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.microsecond())
|
||||
|
||||
def to_string(self, format: str) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.strftime(format))
|
||||
|
||||
def weekday(self) -> IbisExpr:
|
||||
# Ibis uses 0-6 for Monday-Sunday. Add 1 to match polars.
|
||||
return self.compliant._with_callable(lambda expr: expr.day_of_week.index() + 1)
|
||||
|
||||
def _bucket(self, kwds: dict[BucketUnit, Any], /) -> Callable[..., ir.TimestampValue]:
|
||||
def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
|
||||
return expr.bucket(**kwds)
|
||||
|
||||
return fn
|
||||
|
||||
def _truncate(self, unit: TruncateUnit, /) -> Callable[..., ir.TimestampValue]:
|
||||
def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
|
||||
return expr.truncate(unit)
|
||||
|
||||
return fn
|
||||
|
||||
def truncate(self, every: str) -> IbisExpr:
|
||||
interval = Interval.parse(every)
|
||||
multiple, unit = interval.multiple, interval.unit
|
||||
if unit == "q":
|
||||
multiple, unit = 3 * multiple, "mo"
|
||||
if multiple != 1:
|
||||
if self.compliant._backend_version < (7, 1): # pragma: no cover
|
||||
msg = "Truncating datetimes with multiples of the unit is only supported in Ibis >= 7.1."
|
||||
raise NotImplementedError(msg)
|
||||
fn = self._bucket({UNITS_DICT_BUCKET[unit]: multiple})
|
||||
else:
|
||||
fn = self._truncate(UNITS_DICT_TRUNCATE[unit])
|
||||
return self.compliant._with_callable(fn)
|
||||
|
||||
def offset_by(self, every: str) -> IbisExpr:
|
||||
interval = Interval.parse_no_constraints(every)
|
||||
unit = interval.unit
|
||||
if unit in {"y", "q", "mo", "d", "ns"}:
|
||||
msg = f"Offsetting by {unit} is not yet supported for ibis."
|
||||
raise NotImplementedError(msg)
|
||||
offset = timedelta_to_ibis_interval(interval.to_timedelta())
|
||||
return self.compliant._with_callable(lambda expr: expr.add(offset))
|
||||
|
||||
def replace_time_zone(self, time_zone: str | None) -> IbisExpr:
|
||||
if time_zone is None:
|
||||
return self.compliant._with_callable(lambda expr: expr.cast("timestamp"))
|
||||
msg = "`replace_time_zone` with non-null `time_zone` not yet implemented for Ibis" # pragma: no cover
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
nanosecond = not_implemented()
|
||||
total_minutes = not_implemented()
|
||||
total_seconds = not_implemented()
|
||||
total_milliseconds = not_implemented()
|
||||
total_microseconds = not_implemented()
|
||||
total_nanoseconds = not_implemented()
|
||||
convert_time_zone = not_implemented()
|
||||
timestamp = not_implemented()
|
29
lib/python3.11/site-packages/narwhals/_ibis/expr_list.py
Normal file
29
lib/python3.11/site-packages/narwhals/_ibis/expr_list.py
Normal file
@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from narwhals._compliant import LazyExprNamespace
|
||||
from narwhals._compliant.any_namespace import ListNamespace
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import ibis.expr.types as ir
|
||||
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
from narwhals.typing import NonNestedLiteral
|
||||
|
||||
|
||||
class IbisExprListNamespace(LazyExprNamespace["IbisExpr"], ListNamespace["IbisExpr"]):
|
||||
def len(self) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.length())
|
||||
|
||||
def unique(self) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.unique())
|
||||
|
||||
def contains(self, item: NonNestedLiteral) -> IbisExpr:
|
||||
return self.compliant._with_callable(lambda expr: expr.contains(item))
|
||||
|
||||
def get(self, index: int) -> IbisExpr:
|
||||
def _get(expr: ir.ArrayColumn) -> ir.Column:
|
||||
return expr[index]
|
||||
|
||||
return self.compliant._with_callable(_get)
|
83
lib/python3.11/site-packages/narwhals/_ibis/expr_str.py
Normal file
83
lib/python3.11/site-packages/narwhals/_ibis/expr_str.py
Normal file
@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Callable
|
||||
|
||||
from ibis.expr.datatypes import Timestamp
|
||||
|
||||
from narwhals._sql.expr_str import SQLExprStringNamespace
|
||||
from narwhals._utils import _is_naive_format, not_implemented
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import ibis.expr.types as ir
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
|
||||
IntoStringValue: TypeAlias = "str | ir.StringValue"
|
||||
|
||||
|
||||
class IbisExprStringNamespace(SQLExprStringNamespace["IbisExpr"]):
|
||||
def strip_chars(self, characters: str | None) -> IbisExpr:
|
||||
if characters is not None:
|
||||
msg = "Ibis does not support `characters` argument in `str.strip_chars`"
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
return self.compliant._with_callable(lambda expr: expr.strip())
|
||||
|
||||
def _replace_all(
|
||||
self, pattern: IntoStringValue, value: IntoStringValue
|
||||
) -> Callable[..., ir.StringValue]:
|
||||
def fn(expr: ir.StringColumn) -> ir.StringValue:
|
||||
return expr.re_replace(pattern, value)
|
||||
|
||||
return fn
|
||||
|
||||
def _replace_all_literal(
|
||||
self, pattern: IntoStringValue, value: IntoStringValue
|
||||
) -> Callable[..., ir.StringValue]:
|
||||
def fn(expr: ir.StringColumn) -> ir.StringValue:
|
||||
return expr.replace(pattern, value) # pyright: ignore[reportArgumentType]
|
||||
|
||||
return fn
|
||||
|
||||
def replace_all(
|
||||
self, pattern: str, value: str | IbisExpr, *, literal: bool
|
||||
) -> IbisExpr:
|
||||
fn = self._replace_all_literal if literal else self._replace_all
|
||||
if isinstance(value, str):
|
||||
return self.compliant._with_callable(fn(pattern, value))
|
||||
return self.compliant._with_elementwise(
|
||||
lambda expr, value: fn(pattern, value)(expr), value=value
|
||||
)
|
||||
|
||||
def _to_datetime(self, format: str) -> Callable[..., ir.TimestampValue]:
|
||||
def fn(expr: ir.StringColumn) -> ir.TimestampValue:
|
||||
return expr.as_timestamp(format)
|
||||
|
||||
return fn
|
||||
|
||||
def _to_datetime_naive(self, format: str) -> Callable[..., ir.TimestampValue]:
|
||||
def fn(expr: ir.StringColumn) -> ir.TimestampValue:
|
||||
dtype: Any = Timestamp(timezone=None)
|
||||
return expr.as_timestamp(format).cast(dtype)
|
||||
|
||||
return fn
|
||||
|
||||
def to_datetime(self, format: str | None) -> IbisExpr:
|
||||
if format is None:
|
||||
msg = "Cannot infer format with Ibis backend"
|
||||
raise NotImplementedError(msg)
|
||||
fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime
|
||||
return self.compliant._with_callable(fn(format))
|
||||
|
||||
def to_date(self, format: str | None) -> IbisExpr:
|
||||
if format is None:
|
||||
msg = "Cannot infer format with Ibis backend"
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
def fn(expr: ir.StringColumn) -> ir.DateValue:
|
||||
return expr.as_date(format)
|
||||
|
||||
return self.compliant._with_callable(fn)
|
||||
|
||||
replace = not_implemented()
|
19
lib/python3.11/site-packages/narwhals/_ibis/expr_struct.py
Normal file
19
lib/python3.11/site-packages/narwhals/_ibis/expr_struct.py
Normal file
@ -0,0 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from narwhals._compliant import LazyExprNamespace
|
||||
from narwhals._compliant.any_namespace import StructNamespace
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import ibis.expr.types as ir
|
||||
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
|
||||
|
||||
class IbisExprStructNamespace(LazyExprNamespace["IbisExpr"], StructNamespace["IbisExpr"]):
|
||||
def field(self, name: str) -> IbisExpr:
|
||||
def func(expr: ir.StructColumn) -> ir.Column:
|
||||
return expr[name]
|
||||
|
||||
return self.compliant._with_callable(func).alias(name)
|
32
lib/python3.11/site-packages/narwhals/_ibis/group_by.py
Normal file
32
lib/python3.11/site-packages/narwhals/_ibis/group_by.py
Normal file
@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from narwhals._sql.group_by import SQLGroupBy
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
import ibis.expr.types as ir # noqa: F401
|
||||
|
||||
from narwhals._ibis.dataframe import IbisLazyFrame
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
|
||||
|
||||
class IbisGroupBy(SQLGroupBy["IbisLazyFrame", "IbisExpr", "ir.Value"]):
|
||||
def __init__(
|
||||
self,
|
||||
df: IbisLazyFrame,
|
||||
keys: Sequence[str] | Sequence[IbisExpr],
|
||||
/,
|
||||
*,
|
||||
drop_null_keys: bool,
|
||||
) -> None:
|
||||
frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
|
||||
self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
|
||||
|
||||
def agg(self, *exprs: IbisExpr) -> IbisLazyFrame:
|
||||
native = self.compliant.native
|
||||
return self.compliant._with_native(
|
||||
native.group_by(self._keys).aggregate(*self._evaluate_exprs(exprs))
|
||||
).rename(dict(zip(self._keys, self._output_key_names)))
|
160
lib/python3.11/site-packages/narwhals/_ibis/namespace.py
Normal file
160
lib/python3.11/site-packages/narwhals/_ibis/namespace.py
Normal file
@ -0,0 +1,160 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
from functools import reduce
|
||||
from itertools import chain
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import ibis
|
||||
import ibis.expr.types as ir
|
||||
|
||||
from narwhals._expression_parsing import (
|
||||
combine_alias_output_names,
|
||||
combine_evaluate_output_names,
|
||||
)
|
||||
from narwhals._ibis.dataframe import IbisLazyFrame
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
from narwhals._ibis.selectors import IbisSelectorNamespace
|
||||
from narwhals._ibis.utils import function, lit, narwhals_to_native_dtype
|
||||
from narwhals._sql.namespace import SQLNamespace
|
||||
from narwhals._sql.when_then import SQLThen, SQLWhen
|
||||
from narwhals._utils import Implementation, requires
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable, Sequence
|
||||
|
||||
from narwhals._utils import Version
|
||||
from narwhals.typing import ConcatMethod, IntoDType, PythonLiteral
|
||||
|
||||
|
||||
class IbisNamespace(SQLNamespace[IbisLazyFrame, IbisExpr, "ir.Table", "ir.Value"]):
|
||||
_implementation: Implementation = Implementation.IBIS
|
||||
|
||||
def __init__(self, *, version: Version) -> None:
|
||||
self._version = version
|
||||
|
||||
@property
|
||||
def selectors(self) -> IbisSelectorNamespace:
|
||||
return IbisSelectorNamespace.from_namespace(self)
|
||||
|
||||
@property
|
||||
def _expr(self) -> type[IbisExpr]:
|
||||
return IbisExpr
|
||||
|
||||
@property
|
||||
def _lazyframe(self) -> type[IbisLazyFrame]:
|
||||
return IbisLazyFrame
|
||||
|
||||
def _function(self, name: str, *args: ir.Value | PythonLiteral) -> ir.Value:
|
||||
return function(name, *args)
|
||||
|
||||
def _lit(self, value: Any) -> ir.Value:
|
||||
return lit(value)
|
||||
|
||||
def _when(
|
||||
self, condition: ir.Value, value: ir.Value, otherwise: ir.Expr | None = None
|
||||
) -> ir.Value:
|
||||
if otherwise is None:
|
||||
return ibis.cases((condition, value))
|
||||
return ibis.cases((condition, value), else_=otherwise) # pragma: no cover
|
||||
|
||||
def _coalesce(self, *exprs: ir.Value) -> ir.Value:
|
||||
return ibis.coalesce(*exprs)
|
||||
|
||||
def concat(
|
||||
self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod
|
||||
) -> IbisLazyFrame:
|
||||
if how == "diagonal":
|
||||
msg = "diagonal concat not supported for Ibis. Please join instead."
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
items = list(items)
|
||||
native_items = [item.native for item in items]
|
||||
schema = items[0].schema
|
||||
if not all(x.schema == schema for x in items[1:]):
|
||||
msg = "inputs should all have the same schema"
|
||||
raise TypeError(msg)
|
||||
return self._lazyframe.from_native(ibis.union(*native_items), context=self)
|
||||
|
||||
def concat_str(
|
||||
self, *exprs: IbisExpr, separator: str, ignore_nulls: bool
|
||||
) -> IbisExpr:
|
||||
def func(df: IbisLazyFrame) -> list[ir.Value]:
|
||||
cols = list(chain.from_iterable(expr(df) for expr in exprs))
|
||||
cols_casted = [s.cast("string") for s in cols]
|
||||
|
||||
if not ignore_nulls:
|
||||
result = cols_casted[0]
|
||||
for col in cols_casted[1:]:
|
||||
result = result + separator + col
|
||||
else:
|
||||
result = lit(separator).join(cols_casted)
|
||||
|
||||
return [result]
|
||||
|
||||
return self._expr(
|
||||
call=func,
|
||||
evaluate_output_names=combine_evaluate_output_names(*exprs),
|
||||
alias_output_names=combine_alias_output_names(*exprs),
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
|
||||
def func(cols: Iterable[ir.Value]) -> ir.Value:
|
||||
cols = list(cols)
|
||||
return reduce(operator.add, (col.fill_null(lit(0)) for col in cols)) / reduce(
|
||||
operator.add, (col.isnull().ifelse(lit(0), lit(1)) for col in cols)
|
||||
)
|
||||
|
||||
return self._expr._from_elementwise_horizontal_op(func, *exprs)
|
||||
|
||||
@requires.backend_version((10, 0))
|
||||
def when(self, predicate: IbisExpr) -> IbisWhen:
|
||||
return IbisWhen.from_expr(predicate, context=self)
|
||||
|
||||
def lit(self, value: Any, dtype: IntoDType | None) -> IbisExpr:
|
||||
def func(_df: IbisLazyFrame) -> Sequence[ir.Value]:
|
||||
ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None
|
||||
return [lit(value, ibis_dtype)]
|
||||
|
||||
return self._expr(
|
||||
func,
|
||||
evaluate_output_names=lambda _df: ["literal"],
|
||||
alias_output_names=None,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
def len(self) -> IbisExpr:
|
||||
def func(_df: IbisLazyFrame) -> list[ir.Value]:
|
||||
return [_df.native.count()]
|
||||
|
||||
return self._expr(
|
||||
call=func,
|
||||
evaluate_output_names=lambda _df: ["len"],
|
||||
alias_output_names=None,
|
||||
version=self._version,
|
||||
)
|
||||
|
||||
|
||||
class IbisWhen(SQLWhen["IbisLazyFrame", "ir.Value", IbisExpr]):
|
||||
lit = lit
|
||||
|
||||
@property
|
||||
def _then(self) -> type[IbisThen]:
|
||||
return IbisThen
|
||||
|
||||
def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
|
||||
is_expr = self._condition._is_expr
|
||||
condition = df._evaluate_expr(self._condition)
|
||||
then_ = self._then_value
|
||||
then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_)
|
||||
other_ = self._otherwise_value
|
||||
if other_ is None:
|
||||
result = ibis.cases((condition, then))
|
||||
else:
|
||||
otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_)
|
||||
result = ibis.cases((condition, then), else_=otherwise)
|
||||
return [result]
|
||||
|
||||
|
||||
class IbisThen(SQLThen["IbisLazyFrame", "ir.Value", IbisExpr], IbisExpr): ...
|
32
lib/python3.11/site-packages/narwhals/_ibis/selectors.py
Normal file
32
lib/python3.11/site-packages/narwhals/_ibis/selectors.py
Normal file
@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from narwhals._compliant import CompliantSelector, LazySelectorNamespace
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import ibis.expr.types as ir # noqa: F401
|
||||
|
||||
from narwhals._ibis.dataframe import IbisLazyFrame # noqa: F401
|
||||
from narwhals._ibis.expr import IbisWindowFunction
|
||||
|
||||
|
||||
class IbisSelectorNamespace(LazySelectorNamespace["IbisLazyFrame", "ir.Value"]):
|
||||
@property
|
||||
def _selector(self) -> type[IbisSelector]:
|
||||
return IbisSelector
|
||||
|
||||
|
||||
class IbisSelector( # type: ignore[misc]
|
||||
CompliantSelector["IbisLazyFrame", "ir.Value"], IbisExpr
|
||||
):
|
||||
_window_function: IbisWindowFunction | None = None
|
||||
|
||||
def _to_expr(self) -> IbisExpr:
|
||||
return IbisExpr(
|
||||
self._call,
|
||||
evaluate_output_names=self._evaluate_output_names,
|
||||
alias_output_names=self._alias_output_names,
|
||||
version=self._version,
|
||||
)
|
41
lib/python3.11/site-packages/narwhals/_ibis/series.py
Normal file
41
lib/python3.11/site-packages/narwhals/_ibis/series.py
Normal file
@ -0,0 +1,41 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, NoReturn
|
||||
|
||||
from narwhals._ibis.utils import native_to_narwhals_dtype
|
||||
from narwhals.dependencies import get_ibis
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import ModuleType
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
from narwhals._utils import Version
|
||||
from narwhals.dtypes import DType
|
||||
|
||||
|
||||
class IbisInterchangeSeries:
|
||||
def __init__(self, df: Any, version: Version) -> None:
|
||||
self._native_series = df
|
||||
self._version = version
|
||||
|
||||
def __narwhals_series__(self) -> Self:
|
||||
return self
|
||||
|
||||
def __native_namespace__(self) -> ModuleType:
|
||||
return get_ibis()
|
||||
|
||||
@property
|
||||
def dtype(self) -> DType:
|
||||
return native_to_narwhals_dtype(
|
||||
self._native_series.schema().types[0], self._version
|
||||
)
|
||||
|
||||
def __getattr__(self, attr: str) -> NoReturn:
|
||||
msg = (
|
||||
f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
|
||||
"If you would like to see this kind of object better supported in "
|
||||
"Narwhals, please open a feature request "
|
||||
"at https://github.com/narwhals-dev/narwhals/issues."
|
||||
)
|
||||
raise NotImplementedError(msg)
|
270
lib/python3.11/site-packages/narwhals/_ibis/utils.py
Normal file
270
lib/python3.11/site-packages/narwhals/_ibis/utils.py
Normal file
@ -0,0 +1,270 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast, overload
|
||||
|
||||
import ibis
|
||||
import ibis.expr.datatypes as ibis_dtypes
|
||||
|
||||
from narwhals._utils import Version, isinstance_or_issubclass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
from datetime import timedelta
|
||||
|
||||
import ibis.expr.types as ir
|
||||
from ibis.common.temporal import TimestampUnit
|
||||
from ibis.expr.datatypes import DataType as IbisDataType
|
||||
from typing_extensions import TypeAlias, TypeIs
|
||||
|
||||
from narwhals._duration import IntervalUnit
|
||||
from narwhals._ibis.dataframe import IbisLazyFrame
|
||||
from narwhals._ibis.expr import IbisExpr
|
||||
from narwhals.dtypes import DType
|
||||
from narwhals.typing import IntoDType, PythonLiteral
|
||||
|
||||
Incomplete: TypeAlias = Any
|
||||
"""Marker for upstream issues."""
|
||||
|
||||
|
||||
@overload
|
||||
def lit(value: bool, dtype: None = ...) -> ir.BooleanScalar: ... # noqa: FBT001
|
||||
@overload
|
||||
def lit(value: int, dtype: None = ...) -> ir.IntegerScalar: ...
|
||||
@overload
|
||||
def lit(value: float, dtype: None = ...) -> ir.FloatingScalar: ...
|
||||
@overload
|
||||
def lit(value: str, dtype: None = ...) -> ir.StringScalar: ...
|
||||
@overload
|
||||
def lit(value: PythonLiteral | ir.Value, dtype: None = ...) -> ir.Scalar: ...
|
||||
@overload
|
||||
def lit(value: Any, dtype: Any) -> Incomplete: ...
|
||||
def lit(value: Any, dtype: Any | None = None) -> Incomplete:
|
||||
"""Alias for `ibis.literal`."""
|
||||
literal: Incomplete = ibis.literal
|
||||
return literal(value, dtype)
|
||||
|
||||
|
||||
BucketUnit: TypeAlias = Literal[
|
||||
"years",
|
||||
"quarters",
|
||||
"months",
|
||||
"days",
|
||||
"hours",
|
||||
"minutes",
|
||||
"seconds",
|
||||
"milliseconds",
|
||||
"microseconds",
|
||||
"nanoseconds",
|
||||
]
|
||||
TruncateUnit: TypeAlias = Literal[
|
||||
"Y", "Q", "M", "W", "D", "h", "m", "s", "ms", "us", "ns"
|
||||
]
|
||||
|
||||
UNITS_DICT_BUCKET: Mapping[IntervalUnit, BucketUnit] = {
|
||||
"y": "years",
|
||||
"q": "quarters",
|
||||
"mo": "months",
|
||||
"d": "days",
|
||||
"h": "hours",
|
||||
"m": "minutes",
|
||||
"s": "seconds",
|
||||
"ms": "milliseconds",
|
||||
"us": "microseconds",
|
||||
"ns": "nanoseconds",
|
||||
}
|
||||
|
||||
UNITS_DICT_TRUNCATE: Mapping[IntervalUnit, TruncateUnit] = {
|
||||
"y": "Y",
|
||||
"q": "Q",
|
||||
"mo": "M",
|
||||
"d": "D",
|
||||
"h": "h",
|
||||
"m": "m",
|
||||
"s": "s",
|
||||
"ms": "ms",
|
||||
"us": "us",
|
||||
"ns": "ns",
|
||||
}
|
||||
|
||||
FUNCTION_REMAPPING = {
|
||||
"starts_with": "startswith",
|
||||
"ends_with": "endswith",
|
||||
"regexp_matches": "re_search",
|
||||
"str_split": "split",
|
||||
"dayofyear": "day_of_year",
|
||||
"to_date": "date",
|
||||
}
|
||||
|
||||
|
||||
def evaluate_exprs(df: IbisLazyFrame, /, *exprs: IbisExpr) -> list[tuple[str, ir.Value]]:
|
||||
native_results: list[tuple[str, ir.Value]] = []
|
||||
for expr in exprs:
|
||||
native_series_list = expr(df)
|
||||
output_names = expr._evaluate_output_names(df)
|
||||
if expr._alias_output_names is not None:
|
||||
output_names = expr._alias_output_names(output_names)
|
||||
if len(output_names) != len(native_series_list): # pragma: no cover
|
||||
msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
|
||||
raise AssertionError(msg)
|
||||
native_results.extend(zip(output_names, native_series_list))
|
||||
return native_results
|
||||
|
||||
|
||||
@lru_cache(maxsize=16)
|
||||
def native_to_narwhals_dtype(ibis_dtype: IbisDataType, version: Version) -> DType: # noqa: C901, PLR0912
|
||||
dtypes = version.dtypes
|
||||
if ibis_dtype.is_int64():
|
||||
return dtypes.Int64()
|
||||
if ibis_dtype.is_int32():
|
||||
return dtypes.Int32()
|
||||
if ibis_dtype.is_int16():
|
||||
return dtypes.Int16()
|
||||
if ibis_dtype.is_int8():
|
||||
return dtypes.Int8()
|
||||
if ibis_dtype.is_uint64():
|
||||
return dtypes.UInt64()
|
||||
if ibis_dtype.is_uint32():
|
||||
return dtypes.UInt32()
|
||||
if ibis_dtype.is_uint16():
|
||||
return dtypes.UInt16()
|
||||
if ibis_dtype.is_uint8():
|
||||
return dtypes.UInt8()
|
||||
if ibis_dtype.is_boolean():
|
||||
return dtypes.Boolean()
|
||||
if ibis_dtype.is_float64():
|
||||
return dtypes.Float64()
|
||||
if ibis_dtype.is_float32():
|
||||
return dtypes.Float32()
|
||||
if ibis_dtype.is_string():
|
||||
return dtypes.String()
|
||||
if ibis_dtype.is_date():
|
||||
return dtypes.Date()
|
||||
if is_timestamp(ibis_dtype):
|
||||
_unit = cast("TimestampUnit", ibis_dtype.unit)
|
||||
return dtypes.Datetime(time_unit=_unit.value, time_zone=ibis_dtype.timezone)
|
||||
if is_interval(ibis_dtype):
|
||||
_time_unit = ibis_dtype.unit.value
|
||||
if _time_unit not in {"ns", "us", "ms", "s"}: # pragma: no cover
|
||||
msg = f"Unsupported interval unit: {_time_unit}"
|
||||
raise NotImplementedError(msg)
|
||||
return dtypes.Duration(_time_unit)
|
||||
if is_array(ibis_dtype):
|
||||
if ibis_dtype.length:
|
||||
return dtypes.Array(
|
||||
native_to_narwhals_dtype(ibis_dtype.value_type, version),
|
||||
ibis_dtype.length,
|
||||
)
|
||||
return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version))
|
||||
if is_struct(ibis_dtype):
|
||||
return dtypes.Struct(
|
||||
[
|
||||
dtypes.Field(name, native_to_narwhals_dtype(dtype, version))
|
||||
for name, dtype in ibis_dtype.items()
|
||||
]
|
||||
)
|
||||
if ibis_dtype.is_decimal(): # pragma: no cover
|
||||
return dtypes.Decimal()
|
||||
if ibis_dtype.is_time():
|
||||
return dtypes.Time()
|
||||
if ibis_dtype.is_binary():
|
||||
return dtypes.Binary()
|
||||
return dtypes.Unknown() # pragma: no cover
|
||||
|
||||
|
||||
def is_timestamp(obj: IbisDataType) -> TypeIs[ibis_dtypes.Timestamp]:
|
||||
return obj.is_timestamp()
|
||||
|
||||
|
||||
def is_interval(obj: IbisDataType) -> TypeIs[ibis_dtypes.Interval]:
|
||||
return obj.is_interval()
|
||||
|
||||
|
||||
def is_array(obj: IbisDataType) -> TypeIs[ibis_dtypes.Array[Any]]:
|
||||
return obj.is_array()
|
||||
|
||||
|
||||
def is_struct(obj: IbisDataType) -> TypeIs[ibis_dtypes.Struct]:
|
||||
return obj.is_struct()
|
||||
|
||||
|
||||
def is_floating(obj: IbisDataType) -> TypeIs[ibis_dtypes.Floating]:
|
||||
return obj.is_floating()
|
||||
|
||||
|
||||
dtypes = Version.MAIN.dtypes
|
||||
NW_TO_IBIS_DTYPES: Mapping[type[DType], IbisDataType] = {
|
||||
dtypes.Float64: ibis_dtypes.Float64(),
|
||||
dtypes.Float32: ibis_dtypes.Float32(),
|
||||
dtypes.Binary: ibis_dtypes.Binary(),
|
||||
dtypes.String: ibis_dtypes.String(),
|
||||
dtypes.Boolean: ibis_dtypes.Boolean(),
|
||||
dtypes.Date: ibis_dtypes.Date(),
|
||||
dtypes.Time: ibis_dtypes.Time(),
|
||||
dtypes.Int8: ibis_dtypes.Int8(),
|
||||
dtypes.Int16: ibis_dtypes.Int16(),
|
||||
dtypes.Int32: ibis_dtypes.Int32(),
|
||||
dtypes.Int64: ibis_dtypes.Int64(),
|
||||
dtypes.UInt8: ibis_dtypes.UInt8(),
|
||||
dtypes.UInt16: ibis_dtypes.UInt16(),
|
||||
dtypes.UInt32: ibis_dtypes.UInt32(),
|
||||
dtypes.UInt64: ibis_dtypes.UInt64(),
|
||||
dtypes.Decimal: ibis_dtypes.Decimal(),
|
||||
}
|
||||
# Enum support: https://github.com/ibis-project/ibis/issues/10991
|
||||
UNSUPPORTED_DTYPES = (dtypes.Int128, dtypes.UInt128, dtypes.Categorical, dtypes.Enum)
|
||||
|
||||
|
||||
def narwhals_to_native_dtype(dtype: IntoDType, version: Version) -> IbisDataType:
|
||||
dtypes = version.dtypes
|
||||
base_type = dtype.base_type()
|
||||
if ibis_type := NW_TO_IBIS_DTYPES.get(base_type):
|
||||
return ibis_type
|
||||
if isinstance_or_issubclass(dtype, dtypes.Datetime):
|
||||
return ibis_dtypes.Timestamp.from_unit(dtype.time_unit, timezone=dtype.time_zone)
|
||||
if isinstance_or_issubclass(dtype, dtypes.Duration):
|
||||
return ibis_dtypes.Interval(unit=dtype.time_unit) # pyright: ignore[reportArgumentType]
|
||||
if isinstance_or_issubclass(dtype, dtypes.List):
|
||||
inner = narwhals_to_native_dtype(dtype.inner, version)
|
||||
return ibis_dtypes.Array(value_type=inner)
|
||||
if isinstance_or_issubclass(dtype, dtypes.Struct):
|
||||
fields = [
|
||||
(field.name, narwhals_to_native_dtype(field.dtype, version))
|
||||
for field in dtype.fields
|
||||
]
|
||||
return ibis_dtypes.Struct.from_tuples(fields)
|
||||
if isinstance_or_issubclass(dtype, dtypes.Array):
|
||||
inner = narwhals_to_native_dtype(dtype.inner, version)
|
||||
return ibis_dtypes.Array(value_type=inner, length=dtype.size)
|
||||
if issubclass(base_type, UNSUPPORTED_DTYPES):
|
||||
msg = f"Converting to {base_type.__name__} dtype is not supported for Ibis."
|
||||
raise NotImplementedError(msg)
|
||||
msg = f"Unknown dtype: {dtype}" # pragma: no cover
|
||||
raise AssertionError(msg)
|
||||
|
||||
|
||||
def timedelta_to_ibis_interval(td: timedelta) -> ibis.expr.types.temporal.IntervalScalar:
|
||||
return ibis.interval(days=td.days, seconds=td.seconds, microseconds=td.microseconds)
|
||||
|
||||
|
||||
def function(name: str, *args: ir.Value | PythonLiteral) -> ir.Value:
|
||||
# Workaround SQL vs Ibis differences.
|
||||
if name == "row_number":
|
||||
return ibis.row_number() + lit(1)
|
||||
if name == "least":
|
||||
return ibis.least(*args)
|
||||
if name == "greatest":
|
||||
return ibis.greatest(*args)
|
||||
expr = args[0]
|
||||
if name == "var_pop":
|
||||
return cast("ir.NumericColumn", expr).var(how="pop")
|
||||
if name == "var_samp":
|
||||
return cast("ir.NumericColumn", expr).var(how="sample")
|
||||
if name == "stddev_pop":
|
||||
return cast("ir.NumericColumn", expr).std(how="pop")
|
||||
if name == "stddev_samp":
|
||||
return cast("ir.NumericColumn", expr).std(how="sample")
|
||||
if name == "substr":
|
||||
# Ibis is 0-indexed here, SQL is 1-indexed
|
||||
return cast("ir.StringColumn", expr).substr(args[1] - 1, *args[2:]) # type: ignore[operator] # pyright: ignore[reportArgumentType]
|
||||
return getattr(expr, FUNCTION_REMAPPING.get(name, name))(*args[1:])
|
Reference in New Issue
Block a user