done
This commit is contained in:
290
lib/python3.11/site-packages/narwhals/_pandas_like/series_dt.py
Normal file
290
lib/python3.11/site-packages/narwhals/_pandas_like/series_dt.py
Normal file
@ -0,0 +1,290 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from narwhals._compliant.any_namespace import DateTimeNamespace
|
||||
from narwhals._constants import (
|
||||
EPOCH_YEAR,
|
||||
MS_PER_SECOND,
|
||||
NS_PER_SECOND,
|
||||
SECONDS_PER_DAY,
|
||||
US_PER_SECOND,
|
||||
)
|
||||
from narwhals._duration import Interval
|
||||
from narwhals._pandas_like.utils import (
|
||||
ALIAS_DICT,
|
||||
UNITS_DICT,
|
||||
PandasLikeSeriesNamespace,
|
||||
calculate_timestamp_date,
|
||||
calculate_timestamp_datetime,
|
||||
get_dtype_backend,
|
||||
int_dtype_mapper,
|
||||
is_dtype_pyarrow,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from datetime import timedelta
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from narwhals._pandas_like.series import PandasLikeSeries
|
||||
from narwhals.typing import TimeUnit
|
||||
|
||||
|
||||
class PandasLikeSeriesDateTimeNamespace(
|
||||
PandasLikeSeriesNamespace, DateTimeNamespace["PandasLikeSeries"]
|
||||
):
|
||||
def date(self) -> PandasLikeSeries:
|
||||
result = self.with_native(self.native.dt.date)
|
||||
if str(result.dtype).lower() == "object":
|
||||
msg = (
|
||||
"Accessing `date` on the default pandas backend "
|
||||
"will return a Series of type `object`."
|
||||
"\nThis differs from polars API and will prevent `.dt` chaining. "
|
||||
"Please switch to the `pyarrow` backend:"
|
||||
'\ndf.convert_dtypes(dtype_backend="pyarrow")'
|
||||
)
|
||||
raise NotImplementedError(msg)
|
||||
return result
|
||||
|
||||
def year(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.year)
|
||||
|
||||
def month(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.month)
|
||||
|
||||
def day(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.day)
|
||||
|
||||
def hour(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.hour)
|
||||
|
||||
def minute(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.minute)
|
||||
|
||||
def second(self) -> PandasLikeSeries:
|
||||
return self.with_native(self.native.dt.second)
|
||||
|
||||
def millisecond(self) -> PandasLikeSeries:
|
||||
return self.microsecond() // 1000
|
||||
|
||||
def microsecond(self) -> PandasLikeSeries:
|
||||
if self.backend_version < (3, 0, 0) and self._is_pyarrow():
|
||||
# crazy workaround for https://github.com/pandas-dev/pandas/issues/59154
|
||||
import pyarrow.compute as pc # ignore-banned-import()
|
||||
|
||||
from narwhals._arrow.utils import lit
|
||||
|
||||
arr_ns = self.native.array
|
||||
arr = arr_ns.__arrow_array__()
|
||||
result_arr = pc.add(
|
||||
pc.multiply(pc.millisecond(arr), lit(1_000)), pc.microsecond(arr)
|
||||
)
|
||||
result = type(self.native)(type(arr_ns)(result_arr), name=self.native.name)
|
||||
return self.with_native(result)
|
||||
|
||||
return self.with_native(self.native.dt.microsecond)
|
||||
|
||||
def nanosecond(self) -> PandasLikeSeries:
|
||||
return self.microsecond() * 1_000 + self.native.dt.nanosecond
|
||||
|
||||
def ordinal_day(self) -> PandasLikeSeries:
|
||||
year_start = self.native.dt.year
|
||||
result = (
|
||||
self.native.to_numpy().astype("datetime64[D]")
|
||||
- (year_start.to_numpy() - EPOCH_YEAR).astype("datetime64[Y]")
|
||||
).astype("int32") + 1
|
||||
dtype = "Int64[pyarrow]" if self._is_pyarrow() else "int32"
|
||||
return self.with_native(
|
||||
type(self.native)(result, dtype=dtype, name=year_start.name)
|
||||
)
|
||||
|
||||
def weekday(self) -> PandasLikeSeries:
|
||||
# Pandas is 0-6 while Polars is 1-7
|
||||
return self.with_native(self.native.dt.weekday) + 1
|
||||
|
||||
def _is_pyarrow(self) -> bool:
|
||||
return is_dtype_pyarrow(self.native.dtype)
|
||||
|
||||
def _get_total_seconds(self) -> Any:
|
||||
if hasattr(self.native.dt, "total_seconds"):
|
||||
return self.native.dt.total_seconds()
|
||||
return ( # pragma: no cover
|
||||
self.native.dt.days * SECONDS_PER_DAY
|
||||
+ self.native.dt.seconds
|
||||
+ (self.native.dt.microseconds / US_PER_SECOND)
|
||||
+ (self.native.dt.nanoseconds / NS_PER_SECOND)
|
||||
)
|
||||
|
||||
def total_minutes(self) -> PandasLikeSeries:
|
||||
s = self._get_total_seconds()
|
||||
# this calculates the sign of each series element
|
||||
s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
|
||||
s_abs = s.abs() // 60
|
||||
if ~s.isna().any():
|
||||
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
|
||||
return self.with_native(s_abs * s_sign)
|
||||
|
||||
def total_seconds(self) -> PandasLikeSeries:
|
||||
s = self._get_total_seconds()
|
||||
# this calculates the sign of each series element
|
||||
s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
|
||||
s_abs = s.abs() // 1
|
||||
if ~s.isna().any():
|
||||
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
|
||||
return self.with_native(s_abs * s_sign)
|
||||
|
||||
def total_milliseconds(self) -> PandasLikeSeries:
|
||||
s = self._get_total_seconds() * MS_PER_SECOND
|
||||
# this calculates the sign of each series element
|
||||
s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
|
||||
s_abs = s.abs() // 1
|
||||
if ~s.isna().any():
|
||||
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
|
||||
return self.with_native(s_abs * s_sign)
|
||||
|
||||
def total_microseconds(self) -> PandasLikeSeries:
|
||||
s = self._get_total_seconds() * US_PER_SECOND
|
||||
# this calculates the sign of each series element
|
||||
s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
|
||||
s_abs = s.abs() // 1
|
||||
if ~s.isna().any():
|
||||
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
|
||||
return self.with_native(s_abs * s_sign)
|
||||
|
||||
def total_nanoseconds(self) -> PandasLikeSeries:
|
||||
s = self._get_total_seconds() * NS_PER_SECOND
|
||||
# this calculates the sign of each series element
|
||||
s_sign = 2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
|
||||
s_abs = s.abs() // 1
|
||||
if ~s.isna().any():
|
||||
s_abs = s_abs.astype(int_dtype_mapper(s.dtype))
|
||||
return self.with_native(s_abs * s_sign)
|
||||
|
||||
def to_string(self, format: str) -> PandasLikeSeries:
|
||||
# Polars' parser treats `'%.f'` as pandas does `'.%f'`
|
||||
# PyArrow interprets `'%S'` as "seconds, plus fractional seconds"
|
||||
# and doesn't support `%f`
|
||||
if not self._is_pyarrow():
|
||||
format = format.replace("%S%.f", "%S.%f")
|
||||
else:
|
||||
format = format.replace("%S.%f", "%S").replace("%S%.f", "%S")
|
||||
return self.with_native(self.native.dt.strftime(format))
|
||||
|
||||
def replace_time_zone(self, time_zone: str | None) -> PandasLikeSeries:
|
||||
de_zone = self.native.dt.tz_localize(None)
|
||||
result = de_zone.dt.tz_localize(time_zone) if time_zone is not None else de_zone
|
||||
return self.with_native(result)
|
||||
|
||||
def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
|
||||
if self.compliant.dtype.time_zone is None: # type: ignore[attr-defined]
|
||||
result = self.native.dt.tz_localize("UTC").dt.tz_convert(time_zone)
|
||||
else:
|
||||
result = self.native.dt.tz_convert(time_zone)
|
||||
return self.with_native(result)
|
||||
|
||||
def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries:
|
||||
s = self.native
|
||||
dtype = self.compliant.dtype
|
||||
mask_na = s.isna()
|
||||
dtypes = self.version.dtypes
|
||||
if dtype == dtypes.Date:
|
||||
# Date is only supported in pandas dtypes if pyarrow-backed
|
||||
s_cast = s.astype("Int32[pyarrow]")
|
||||
result = calculate_timestamp_date(s_cast, time_unit)
|
||||
elif isinstance(dtype, dtypes.Datetime):
|
||||
fn = (
|
||||
s.view
|
||||
if (self.implementation.is_pandas() and self.backend_version < (2,))
|
||||
else s.astype
|
||||
)
|
||||
s_cast = fn("Int64[pyarrow]") if self._is_pyarrow() else fn("int64")
|
||||
result = calculate_timestamp_datetime(s_cast, dtype.time_unit, time_unit)
|
||||
else:
|
||||
msg = "Input should be either of Date or Datetime type"
|
||||
raise TypeError(msg)
|
||||
result[mask_na] = None
|
||||
return self.with_native(result)
|
||||
|
||||
def truncate(self, every: str) -> PandasLikeSeries:
|
||||
interval = Interval.parse(every)
|
||||
multiple, unit = interval.multiple, interval.unit
|
||||
native = self.native
|
||||
if self.implementation.is_cudf():
|
||||
if multiple != 1:
|
||||
msg = f"Only multiple `1` is supported for cuDF, got: {multiple}."
|
||||
raise NotImplementedError(msg)
|
||||
return self.with_native(self.native.dt.floor(ALIAS_DICT.get(unit, unit)))
|
||||
dtype_backend = get_dtype_backend(native.dtype, self.compliant._implementation)
|
||||
if unit in {"mo", "q", "y"}:
|
||||
if self.implementation.is_cudf():
|
||||
msg = f"Truncating to {unit} is not supported yet for cuDF."
|
||||
raise NotImplementedError(msg)
|
||||
if dtype_backend == "pyarrow":
|
||||
import pyarrow.compute as pc # ignore-banned-import
|
||||
|
||||
ca = native.array._pa_array
|
||||
result_arr = pc.floor_temporal(ca, multiple, UNITS_DICT[unit])
|
||||
else:
|
||||
if unit == "q":
|
||||
multiple *= 3
|
||||
np_unit = "M"
|
||||
elif unit == "mo":
|
||||
np_unit = "M"
|
||||
else:
|
||||
np_unit = "Y"
|
||||
arr = native.values # noqa: PD011
|
||||
arr_dtype = arr.dtype
|
||||
result_arr = arr.astype(f"datetime64[{multiple}{np_unit}]").astype(
|
||||
arr_dtype
|
||||
)
|
||||
result_native = type(native)(
|
||||
result_arr, dtype=native.dtype, index=native.index, name=native.name
|
||||
)
|
||||
return self.with_native(result_native)
|
||||
return self.with_native(
|
||||
self.native.dt.floor(f"{multiple}{ALIAS_DICT.get(unit, unit)}")
|
||||
)
|
||||
|
||||
def offset_by(self, by: str) -> PandasLikeSeries:
|
||||
native = self.native
|
||||
pdx = self.compliant.__native_namespace__()
|
||||
if self._is_pyarrow():
|
||||
import pyarrow as pa # ignore-banned-import
|
||||
|
||||
compliant = self.compliant
|
||||
ca = pa.chunked_array([compliant.to_arrow()]) # type: ignore[arg-type]
|
||||
result = (
|
||||
compliant._version.namespace.from_backend("pyarrow")
|
||||
.compliant.from_native(ca)
|
||||
.dt.offset_by(by)
|
||||
.native
|
||||
)
|
||||
result_pd = native.__class__(
|
||||
result, dtype=native.dtype, index=native.index, name=native.name
|
||||
)
|
||||
else:
|
||||
interval = Interval.parse_no_constraints(by)
|
||||
multiple, unit = interval.multiple, interval.unit
|
||||
if unit == "q":
|
||||
multiple *= 3
|
||||
unit = "mo"
|
||||
offset: pd.DateOffset | timedelta
|
||||
if unit == "y":
|
||||
offset = pdx.DateOffset(years=multiple)
|
||||
elif unit == "mo":
|
||||
offset = pdx.DateOffset(months=multiple)
|
||||
elif unit == "ns":
|
||||
offset = pdx.Timedelta(multiple, unit=UNITS_DICT[unit])
|
||||
else:
|
||||
offset = interval.to_timedelta()
|
||||
dtype = self.compliant.dtype
|
||||
datetime_dtype = self.version.dtypes.Datetime
|
||||
if unit == "d" and isinstance(dtype, datetime_dtype) and dtype.time_zone:
|
||||
native_without_timezone = native.dt.tz_localize(None)
|
||||
result_pd = native_without_timezone + offset
|
||||
result_pd = result_pd.dt.tz_localize(dtype.time_zone)
|
||||
else:
|
||||
result_pd = native + offset
|
||||
|
||||
return self.with_native(result_pd)
|
Reference in New Issue
Block a user